cscript confa adofile confa
* this test script was designed for version 2.0.2

set more off
use hs-cfa

* the initial model might or might not work
rcof "confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9)" == 430

* better starting values
confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9), from(ones)
* assert some bookkeeping results
local p = 9
local pstar = `p'*(`p'+1)/2
assert (e(df_u) == `pstar' + `p' - (3*`p'+3))
assert (e(df_m) == 2*`p' + 3)
assert (e(N)==_N)
scalar T_ones = e(chi2)

* will assert that estimates are comparable later
estimates store confa_ones
mat b_ones = e(b)
scalar T_ones = e(chi2)

* different starting value strategy
confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9), from(iv)
estimates store confa_iv
mat b_iv = e(b)
scalar T_iv = e(chi2)

confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9), from(smart)
estimates store confa_smart
mat b_smart = e(b)
scalar T_smart = e(chi2)

* difficult option
confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9), from(iv) difficult
estimates store confa_iv_d
mat b_iv_d = e(b)
scalar T_iv_d = e(chi2)

confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9), from(smart) difficult
estimates store confa_smart_d
mat b_smart_d = e(b)
scalar T_smart_d = e(chi2)

* are the differences in converged values reasonably small?
assert mreldif( b_ones, b_iv ) < 2e-5
assert mreldif( b_ones, b_smart ) < 2e-5
assert mreldif( b_ones, b_smart_d ) < 2e-5
assert mreldif( b_ones, b_iv_d ) < 2e-5
assert reldif(T_ones, T_iv) < 1e-6
assert reldif(T_ones, T_smart ) < 1e-6
assert reldif(T_ones, T_iv_d) < 1e-6
assert reldif(T_ones, T_smart_d ) < 1e-6

* work with correlations
estat corr

* accept correlated errors option
confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9), from(iv) corr(x7:x8)
assert abs( e(chi2) - T_iv ) > 0.1

* accept the different normalization option
confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9), from(ones) unitvar(_all)
estimates store confa_unitvar

* check correlations produced by -estat corr- from earlier results
assert reldif( _b[phi_1_2:_cons], 0.4585086 ) < 1e-5
assert reldif( _b[phi_1_3:_cons], 0.4705321 ) < 1e-3
assert reldif( _b[phi_2_3:_cons], 0.2829826 ) < 1e-3
assert reldif( _se[phi_1_2:_cons], 0.0634637 ) < 1e-4
assert reldif( _se[phi_1_3:_cons], 0.0862308 ) < 1e-3
assert reldif( _se[phi_2_3:_cons], 0.0714708 ) < 1e-4
assert reldif( e(chi2), T_ones) < 1e-7

* accept Satorra-Bentler option
confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9), from(iv) vce(sbentler)
estimates store confa_sbentler
mat b_sb = e(b)

assert mreldif( b_ones, b_sb ) < 1e-5
assert reldif( e(chi2), T_iv ) < 1e-6

* accept if conditions
confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9) if school==2, from(ones) unitvar(_all) vce(sbentler)
estimates store confa_yb
mat b_yb = e(b)
mat se_yb = diag( vecdiag( e(V) ) )
mata : st_matrix( "se_yb", matpowersym(st_matrix("se_yb"), 0.5))
mat se_yb = vecdiag( se_yb )
estat fit
scalar RMSEA_yb = r(RMSEA)
scalar CFI_yb = r(CFI)
scalar TMLE_yb = e(chi2)
scalar Tsc_yb = e(Tsc)

* check against published results:
* Yuan & Bentler (2007) Structural Equation Modeling, Ch. 10 in
* Handbook of Statistics, Vol. 26: Psychometrics, North Holland
* Section 26.7 and Table 1 p. 347, first column
mean x1-x9 if school==2

mat hs_confa_yb07_b = ( _b[x1], _b[x2], _b[x3], _b[x4], _b[x5], _b[x6], _b[x7], _b[x8], _b[x9])
mat hs_confa_yb07_b = (hs_confa_yb07_b, 0.779, 0.575, 0.722, 0.974, 0.964, 0.938, 0.683, 0.838, 0.718)
mat hs_confa_yb07_b = (hs_confa_yb07_b, 1, 1, 0.541, 1, 0.334, 0.520)
mat hs_confa_yb07_b = (hs_confa_yb07_b, 0.721,0.905,0.560,0.317,0.422,0.409,0.6020,0.400,0.542)

mat hs_confa_yb07_se = ( _se[x1], _se[x2], _se[x3], _se[x4], _se[x5], _se[x6], _se[x7], _se[x8], _se[x9])
mat hs_confa_yb07_se = (hs_confa_yb07_se, 0.116, 0.094, 0.091, 0.084, 0.083, 0.082, 0.080, 0.089, 0.086)
mat hs_confa_yb07_se = (hs_confa_yb07_se, 0, 0, 0.094, 0, 0.115, 0.100)
mat hs_confa_yb07_se = (hs_confa_yb07_se, 0.168, 0.140, 0.108, 0.066, 0.072, 0.076, 0.080, 0.112, 0.085)

scalar hs_confa_yb07_TMLE = 51.189
scalar hs_confa_yb07_Tsc = 49.732
scalar hs_confa_yb07_NFI = 0.898
scalar hs_confa_yb07_CFI = 0.942
scalar hs_confa_yb07_RMSEA = 0.089

* compare estimation results with reported results
assert mreldif( hs_confa_yb07_b, b_yb) < 1e-3*sqrt( colsof(b_yb) )
assert mreldif( hs_confa_yb07_se, se_yb) < 3e-3*sqrt( colsof(b_yb) )
assert reldif( TMLE_yb*(e(N)-1)/e(N),hs_confa_yb07_TMLE) < 1e-3
assert reldif( Tsc_yb*(e(N)-1)/e(N),hs_confa_yb07_Tsc) < 0.02
assert reldif( CFI_yb,hs_confa_yb07_CFI) < 4e-3
assert reldif( RMSEA_yb,hs_confa_yb07_RMSEA) < 1e-3

* compare subsetting results
keep if school==2
confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9), from(ones) unitvar(_all) vce(sbentler)
est store confa_yb_sub
mat b_yb_sub = e(b)
mat se_yb_sub = diag( vecdiag( e(V) ) )
mata : st_matrix( "se_yb_sub", matpowersym(st_matrix("se_yb_sub"), 0.5))
mat se_yb_sub = vecdiag( se_yb_sub )
assert mreldif( b_yb, b_yb_sub ) < 2e-6
assert mreldif( se_yb, se_yb_sub ) < 1e-6

* produce some specified errors
rcof "confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9), from(iv) unitvar(_all)" == 198
rcof "confa vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9)" == 198
rcof "confa (vis: x1 x2 x3 (text: x4 x5 x6) (math: x7 x8 x9)" == 198
rcof "confa (vis: x1 x2 x3 text: x4 x5 x6) (math: x7 x8 x9)" == 198
rcof "confa vis: x1 x2 x3" == 198
rcof "confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9), from(iv) corr(x7:id)" == 198
rcof "confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9), from(iv) vce(bootstrap)" == 198
rcof "confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9), from(iv) abcdef" == 198

* some non-sensical data
* independent data
clear
set obs 1000
set seed 2093
forvalues k=1/5 {
   gen x`k' = rnormal()
}
rcof "confa (f: x1 - x5)" == 430
* a poorly identified model, loadings should be insignificant

* perfectly correlated data
forvalues k=6/9 {
   gen x`k' = x1
}
confa (f: x1 x6-x9), from(ones) iter(20) difficult
* the data is singular multivariate normal, the likelihood must explode
assert e(ll) > 1000

clear
set obs 1000
set seed 9823
gen f=rnormal()
forvalues k=1/6 {
  gen x`k' = f+rnormal()
}

* for multivariate normal data, the standard errors
* should be about the same
confa (f: x*), from(ones) vce(oim)
est store mv_oim
mat mv_oim = e(V)

confa (f: x*), from(ones) vce(robust)
est store mv_rob
mat mv_rob = e(V)

confa (f: x*), from(ones) vce(sbentler)
est store mv_sb
mat mv_sb = e(V)

assert mreldif( mv_oim, mv_rob ) < 1e-3
assert mreldif( mv_sb, mv_rob ) < 1e-3

* test predicted values
predict feb, ebayes
predict fr,  regress
* the two must be the same
assert feb == fr

predict fm,  mle
predict fba, bart
* the two must be the same
assert fm == fba

* factor predictions must be strongly correlated
corr fm fr
assert r(rho)>0.99

corr f fm
assert r(rho)>0.92

* non-normal properly specified model
clear
set obs 1000
set seed 9823
gen f= sqrt(2)*((uniform()<0.5) + (uniform()<0.5)-1)
forvalues k=1/6 {
  gen x`k' = f+rnormal()
}
* for non-normal data, the robust and Satorra-Bentler standard errors
* should be about the same
confa (f: x*), from(ones) vce(oim) search(off)
est store nn_moim
mat nn_oim = e(V)

confa (f: x*), from(ones) vce(robust) search(off)
est store nn_rob
mat nn_rob = e(V)

confa (f: x*), from(ones) vce(sbentler) search(off)
est store nn_sb
mat nn_sb = e(V)

assert mreldif( nn_oim, nn_rob ) > 1e-3
assert mreldif( nn_sb, nn_rob ) < 1e-3

* structurally misspecified model
clear
set obs 1000
set seed 6732
mata : l1 = rgamma(1,6,5,0.5)
mata : l2 = (rgamma(1,3,2,0.3),J(1,3,0))
mata : s = rgamma(1,6,6,0.4)
mata : A = l1'*l1 + l2'*l2 + diag(s)
mata : st_matrix("A", A)
drawnorm x1-x6 , cov( A )

confa (f: x*), from(iv)
mat ms_oim = e(V)
mat bb = e(b)
* must reject good fit
assert e(p) < 0.05

confa (f: x*), from(bb) robust search(off)
mat ms_rob = e(V)
mat ilog = e(ilog)
assert ilog[1,3] == 0
* asserts matrix staring values and quick convergence

confa (f: x*), from(bb) vce(sbentler) search(off)
mat ms_sb = e(V)

* all standard errors must be different
assert mreldif( ms_oim, ms_rob ) > 1e-3
assert mreldif( ms_sb, ms_rob ) > 1e-3
assert mreldif( ms_oim, ms_sb ) > 1e-3

* since the data are normal, S-B corrections do not change much
assert reldif( e(chi2), e(Tsc) )<0.01
assert reldif( e(Tsc), e(Tadj) )<0.02
assert reldif( e(T2), e(chi2) ) <0.05
assert reldif( e(SBd), e(df_u) )<0.02

* Bollen-Stine rotation
bollenstine, reps(500)
* expect to reject
assert e(p_u_BS) < 0.05
* expect the test statistic to have a chi^2 distribution
assert reldif( e(T_BS_05), invchi2( e(df_u),0.05) )<0.1
assert reldif( e(T_BS_95), invchi2( e(df_u),0.95) )<0.1

* produce a saturated model
clear
set obs 1000
gen f = rnormal()
forvalues k=1/4 {
  gen x`k' = f+rnormal()
}
confa (f: x*), from(ones) corr( x1:x4 x2:x3 )
assert e(lr_u) == 0
assert e(df_u) == 0

exit