*! Date : 17 Feb 2000 (STB-55: sbe34) *! Version : 1.27 *! Author : Adrian Mander *! Email : adrian.mander@mrc-bsu.cam.ac.uk *! Iterative proportional fitting in contingency tables *! *! SYNTAX ipf a b c d, fit(a*b + a*c) * *The latest version 1.25 tries to sort out the . values in the constrained file. *14/2/00 stops algorithm if the likelihood repeats due to rounding errors. program define ipf, rclass version 6.0 preserve di "Deleting all matrices......" matrix drop _all tempfile master qui save `master' *********************************************************************** * parse the syntax fit(x*y + x*z +y*z) * * NB the local `vlist' contains all the variables from the fit syntax *********************************************************************** syntax [varlist (default=none)] [fweight/] , FIT(string) [ CONstr(string) CONFILE(string) CONVARS(varlist) SAVE(string) EXPect NOLOG ACC(real 0.000001)] tokenize "`varlist'" di di "Expansion of the various marginal models" di "----------------------------------------" local i 1 local vlist " " gettoken mmodel fit:fit, parse("+") while "`mmodel'"~="" { while "`mmodel'"=="+" { gettoken mmodel fit:fit, parse("+") } * now split mmodel into the variables in the interaction terms gettoken left mmodel:mmodel, parse("*") while "`left'"~="" { while "`left'"=="*" { gettoken left mmodel:mmodel, parse("*") } cap confirm variable `left' if _rc==111 { di in red "Make sure `left' exists in your dataset" } confirm variable `left' local marg`i' "`marg`i'' `left'" gettoken left mmodel:mmodel, parse("*") } local vlist "`vlist' `marg`i''" local i =`i'+1 gettoken mmodel fit:fit, parse("+") } **************************************************************************** * The next bit of the code will be able to remove the * multiples in the varlist constructed from the model statement * this is held in `vlist' * * NB: There could be a problem here if a variable in the varlist has the * same name as ANY local macro used in the program **************************************************************************** tokenize "`vlist'" while "`1'"~="" { /* if "``1''"=="`freq'" { di in red "Change the name of the variable freq" exit(198) } */ if "``1''"=="" { local ulist "`ulist' `1'" local `1'="seen"} mac shift 1 } **************************************************************************** * Take individual data as the default or else use the frequency variable * from the option **************************************************************************** tempvar freqwt if "`weight'"=="" { qui gen `freqwt'=1} else { qui gen `freqwt'=`exp' } * Display the marginal models varlists This is part check part information local nomargs = `i'-1 local i 1 while `i'<=`nomargs' { di "marginal model `i' varlist : `marg`i'' " local i = `i'+1 } ******************************************************************************** * Now I need to expand to dataset to cover all possibilities and give * them all frequency 0 before doing a contract statement ******************************************************************************** if "`varlist'"=="" { local ind 1 tokenize "`ulist'" while "`1'"~="" { _unique `1' mat u`ind'=resp local `1' = r(unique) local ind = `ind'+1 mac shift 1 } tempfile temp qui save `temp',replace drop _all tokenize "`ulist'" while "`1'"~="" { gen `1'=. mac shift 1 } tokenize "`ulist'" qui set obs ``1'' local last = "" sort `1' local ind 1 while "`1'"~="" { local last "`last' `1'" sort `last' qui by `last': replace `1'=u`ind'[_n,1] cap expand ``2'' sort `1' local last "`last' `1'" mac shift 1 local ind=`ind'+1 } qui gen `freqwt'=0 sort `ulist' } else { local ind 1 tokenize "`varlist'" while "`1'"~="" { _unique `1' mat u`ind'=resp local `1' = r(unique) local ind = `ind'+1 mac shift 1 } tempfile temp qui save `temp',replace drop _all tokenize "`varlist'" while "`1'"~="" { gen `1'=. mac shift 1 } tokenize "`varlist'" qui set obs ``1'' local last = "" sort `1' local ind 1 while "`1'"~="" { local last "`last' `1'" sort `last' qui by `last': replace `1'=u`ind'[_n,1] cap expand ``2'' sort `1' local last "`last' `1'" mac shift 1 local ind=`ind'+1 } qui gen `freqwt'=0 sort `varlist' } ***************************************************************** * Calculate the model degrees of freedom. Obviously due to * structural zeroes and sampling zeroes the degrees of freedom * may be lower. ***************************************************************** local tind 1 tokenize "`ulist'" while "`1'"~="" { local tok`tind' = "`1'" mac shift 1 local tind=`tind'+1 } * di "NOW to make an ordered set of margi's" local term "" local i 1 while `i'<=`nomargs' { local uind 1 while `uind' <= `tind' { tokenize "`marg`i''" while "`tok`uind''"~="`1'" & "`1'"~="" { mac shift 1 } if "`1'"~="" { local term`i' = "`term`i'' `1'" } * di "m `marg`i'' term `term`i''" local uind = `uind'+1 } local i =`i'+1 } local i 1 while `i'<=`nomargs' { local marg`i' = "`term`i''" local i =`i'+1 } local i 1 while `i'<=`nomargs' { * di "Ordered marginal model `i' varlist : `marg`i'' " local i = `i'+1 } di "unique varlist `ulist'" local term 1 local i 1 while `i'<=`nomargs' { tokenize "`marg`i''" *Compare `marg`i'' to `ulist' ..... *di "`1' `2' `3' `4' `5'" local mind 1 while "``mind''"~="" { local uind 1 while "``mind''"~="`tok`uind''" { local uind=`uind'+1 } local u`mind' = `uind' local mind = `mind'+1 } tokenize "`marg`i''" local nterms 1 while "``nterms''"~="" { local nterms=`nterms'+1 } local nterms = `nterms'-1 local j 0 while `j'< 2^`nterms' { binary `j' `nterms' local it 1 local terms "" local uterms "" while `it'<=`nterms' { local terms "`terms'`r(s`it')'" local uterms "`uterms'`u`r(s`it')''" local it = `it'+1 } local mylist "`mylist' `uterms'" * di "`terms'" local j=`j'+1 } local i = `i'+1 } *di "mylist = `mylist'" tokenize "`mylist'" local mlind 1 while "``mlind''"~="" { local ind 1 while "``mlind''" ~= "``ind''" { local ind=`ind'+1 } if (`mlind' == `ind') { local blist "`blist' ``ind''" } local mlind=`mlind'+1 } *di "blist `blist'" *di local nparms 0 tokenize "`blist'" while "`1'"~="" { local len=length("`1'") * di _continue "length `len' | " local prod = 1 if `len'>1 { local i 1 while `i'<=`len' { local ade=substr("`1'",`i',1) local prod = `prod' * (``tok`ade'''-1) local i=`i'+1 * di _continue " `ade' `tok`ade'' ``tok`ade''' df=`prod' " } } else { local ade="`1'" local prod = `prod' * (``tok`ade'''-1) * di _continue " `ade' `tok`ade'' ``tok`ade''' nparms=`prod' " } * di _continue "`1' df=`prod'" local nparms = `nparms'+`prod' * di * di "DF = `df'" mac shift 1 } local ncells 1 local i 1 tokenize "`ulist'" while "``i''"~="" { local ncells = `ncells'* ``tok`i''' local i = `i'+1 } *add one for the constant term :) local nparms =`nparms'+1 local df = `ncells'-`nparms' di di "-------------------------------------------------------------------" di in blue "N.B. structural/sampling zeroes may lead to an incorrect df" di "Residual degrees of freedom = `df' " di *list qui append using `temp' *list ********************************************************* * Do a contracting of the dataset on the varlist since * the likelihood will be calculated over this space as * the model may be on a smaller space * * NB: the ulist should be a SUBSET of the varlist ********************************************************* local LHOOD 0 if "`varlist'"~="" { local LHOOD 1 di in red "The likelihood is calculated for the cells spanned by `varlist'" global tablist "`varlist'" sort `varlist' qui by `varlist': replace `freqwt'=sum(`freqwt') qui by `varlist': keep if _n==_N save lhood,replace *l `varlist' } ********************************************************* * Do a contracting of the dataset! * 1) This has to be done on the unique list in the model ********************************************************* sort `ulist' qui by `ulist': replace `freqwt'=sum(`freqwt') qui by `ulist': keep if _n==_N *list `ulist' *********************************************************************** *Initialise the Expected Frequencies to either be contained in * a file or by using the syntax I have devised. * * [D==1.T==1]=2 means * when D is 1 AND T is 1 the expected starting value * is 2 the values of the D and T must be specified otherwise what will * the baseline category be? *********************************************************************** cap confirm new Efreq qui gen double Efreq=. cap confirm new Efreqold qui gen double Efreqold = 1 if "`constr'"~="" & "`confile'"=="" { * get the first variable * di " `part' after `constr'" while "`constr'"~="" { gettoken start constr:constr,parse(",") gettoken part start:start,parse("[ ") gettoken part start:start,parse("]") local conif " `part'" gettoken part start:start,parse("] ") di in red "replacing all values with the condition `conif' with `start'" qui replace Efreqold = `start' if `conif' gettoken start constr:constr,parse(",") } } if "`confile'"~="" { if "`convars'"=="" { di in red "You must specify convars option when using CONFILE" exit(198) } drop Efreqold sort `convars' cap drop _merge merge `convars' using `confile' cap drop _merge *Efreqold should be in `confile' cap confirm variable Efreqold if _rc==111 { di "the constraints file must include Efreqold" exit(111) } tempvar constr constrm gen `constr'=(Efreqold ~=.) gen `constrm'=(Efreqold ==.) replace Efreqold=1 if Efreqold==. *l D E `constr' } gen Ofreq = `freqwt' *sort `ulist' *l `ulist' Ofreq Efreqold /********************************************************* * Do a contracting of the dataset! * Because of the merging ********************************************************* sort `ulist' Efreqold qui by `ulist': replace `freqwt'=sum(`freqwt') qui by `ulist': keep if _n==_N */ *l `ulist' Ofreq Efreqold *qui compress ******************************************************* * The algorithm loop that stops when the loglikelihood *doesnt change much ******************************************************* local llh=1000 local oldllh=0 local iter=1 cap gen marg=. cap gen nmarg=. while (abs(`llh'-`oldllh')>`acc') { *Sum over the marginal models local i 1 while `i'<=`nomargs' { sort `marg`i'' qui by `marg`i'' : replace marg=sum(Efreqold) qui by `marg`i'' : replace marg=marg[_N] qui by `marg`i'' : replace nmarg=sum(Ofreq) qui by `marg`i'' : replace nmarg=nmarg[_N] qui by `marg`i'' : replace Efreq=Efreqold*(nmarg[_N]/marg[_N]) local i = `i'+1 * Make a copy of the new estimates qui replace Efreqold=Efreq } * Sum over the constrained section of the continguency table. if "`convars'"~="" { sort `convars' qui by `convars' : replace marg=sum(Efreqold) if `constr'==0 qui by `convars' : replace marg=marg[_N] if `constr'==0 qui by `convars' : replace nmarg=sum(Ofreq) if `constr'==0 qui by `convars' : replace nmarg=nmarg[_N] if `constr'==0 qui by `convars' : replace Efreq=Efreqold*(nmarg[_N]/marg[_N]) if `constr'==0 * Make a copy of the new estimates qui replace Efreqold=Efreq if `constr'==0 } ************************************************************************ * Calculate the Multinomial/Poisson Likelihood if a varlist else the poisson * sampling likelihood ************************************************************************ if "`varlist'"~="" { sort `varlist' * l `varlist' Efreq Ofreq qui save temp,replace qui use lhood,replace cap drop _merge merge `varlist' using temp cap drop _merge sort `ulist' qui by `ulist': replace Efreq=Efreq[_N]/2 drop Ofreq rename `freqwt' Ofreq sort `varlist' } /* To Calculate the multinomial loglikelihood. tempvar addn addall addall2 pi gen `addn'=sum(Ofreq) gen `addm'=sum(Efreq) gen `pi'= Efreq/`addm'[_N] gen `addnf'=Efreq gen `adnlogm' = sum( Ofreq * log(Efreq) ) gen double `addall2' = sum( Ofreq * log(`pi') - lnfact(Ofreq) ) local llh2= lnfact(`addn'[_N]) + `addall2'[_N] if "`nolog'"~="" { di "Loglikelihood = `llh' or `llh2'" } drop `addm' `adnlogm' `addnf' `addn' `addall' `addall2' `pi' */ *This is kernel of likelihood only tempvar addall gen double `addall' = sum( Ofreq * log(Efreq) - Efreq ) * gen double `addall' = sum( Ofreq * log(Efreq) - Efreq - lnfact(Ofreq) ) local oldllh = `llh' *l Ofreq Efreq eofreq lofreq `addall' local llh= `addall'[_N] if "`nolog'"=="" { di "Loglikelihood = `llh' " } drop `addall' local iter=`iter'+1 } ********************************************************* * Pearson's and likelihood ratio tests of goodness of fit ********************************************************* tempvar addmn addm gen `addmn'=sum(((Efreq-Ofreq)^2)/Efreq) gen `addm'=sum(Ofreq*log(Ofreq/Efreq)) local g2 = 2*`addm'[_N] local x2 = `addmn'[_N] local pv_g = chiprob(`df',`g2') local pv_x = chiprob(`df',`x2') di di "Goodness of Fit Tests" di "---------------------" di "df = `df'" di "Likelihood Ratio Statistic G^2 = " %8.4f `g2', "p-value = " %5.3f `pv_g' di "Pearson Statistic X^2 = " %8.4f `x2', "p-value = " %5.3f `pv_x' drop `addm' `addmn' ********************************************************* * Calculating the probabilities of each category with * groups defined by the varlist ********************************************************* if "`varlist'"~="" { sort `varlist' save temp,replace use lhood,replace merge `varlist' using temp sort `ulist' qui by `ulist': replace Efreq=Efreq[_N] * l `varlist' Efreq Ofreq sort `varlist' qui gen prob=sum(Efreq) qui replace prob=Efreq/prob[_N] sort `varlist' qui compress if "`expect'"~="" { l `varlist' Efreq Ofreq prob, noobs nodisplay } if "`save'"~="" { keep `varlist' Efreq Ofreq prob cap confirm new `save'.dta if _rc~=0 { save `save', replace } else { save `save' } } } else { qui sort `ulist' qui gen prob=sum(Efreq) qui replace prob=Efreq/prob[_N] qui compress if "`expect'"~="" { l `ulist' Efreq Ofreq prob, noobs nodisplay } if "`save'"~="" { keep `ulist' Efreq Ofreq prob cap confirm new `save'.dta if _rc~=0 { save `save', replace } else { save `save' } } } use `master',replace return local vlist "`ulist'" return scalar df=`df' return scalar parms=`nparms' return scalar ncells=`ncells' return scalar llh = `llh' return scalar g2 = `g2' return scalar x2 = `x2' return scalar pvg = `pv_g' return scalar pvx = `pv_x' restore end ********************************************** * Calculate the unique values in ********************************************** program define _unique, rclass syntax [varlist] preserve sort `varlist' qui by `varlist': keep if _n==1 qui drop if `varlist'==. return scalar unique=_N cap mkmat `varlist', matrix(resp) if _rc~=0 { di in red "Too many unique values to form resp! in _unique" di in red "there are "_N noi list `varlist' } restore end ********************************************** * Work out binary ********************************************** program define binary, rclass args dec nterms local two=2^(`nterms'-1) local ind=`nterms' local rem=`dec' local str`nterms' bin = "" while `two'>1 { if `rem'>=`two' { return local s`ind'=`ind' local bin = "`bin'1" } else { return local s`ind'="" local bin = "`bin'0" } local rem=mod(`rem',`two') local ind=`ind'-1 local two=`two'/2 } local bin = "`bin'`rem'" return local bin="`bin'" if `rem'==0 { return local s1="" } else { return local s1=`rem' } end