*! version 1.3 November 17, 2006 @ 08:51:52 *! Construct -sqom- sample to allow faster cluster specification * kohler@wz-berlin.de *1.0 Initial version *1.1 Option "keep(varlist) added" *1.2 Bug fix. Wrong sorting in sqclusterdat return -> also needed a fix in sqom.ado *1.3 Bug fix. Reshape wide error (also see sqom.ado 1.6) program sqclusterdat version 9.1 syntax [, return keep(varlist) ] // Shrink the Data to the dimensions of SQdist if "`return'" == "" { if "`_dta[Sqclusterdat]'" != "" { di as error "Data already groupdat" exit 198 } _sqclusterdata } // Adds results to origional data else { if "`_dta[Sqclusterdat]'" == "" { di as error "Data already returned" exit 198 } capture _sqclusterreturn `keep' if _rc { use _Sqclusterorig, replace di as text "Group results could not be merged to sequence data" di as text "Returned to original sequence data" } char _dta[Sqclusterdat] } end program _sqclusterdata quietly { save _Sqclusterorig, replace capture keep `_dta[SQomsample]' // Drop Sequences with Gaps tempvar lcensor rcensor gap by `_dta[SQiis]' (`_dta[SQtis]'), sort: gen `lcensor' = sum(!mi(`_dta[SQis]')) by `_dta[SQiis]' (`_dta[SQtis]'): gen `rcensor' = sum(mi(`_dta[SQis]')) by `_dta[SQiis]' (`_dta[SQtis]'): /// replace `rcensor' = ((_N-_n) == (`rcensor'[_N]-`rcensor'[_n])) & mi(`_dta[SQis]') by `_dta[SQiis]' (`_dta[SQtis]'): /// gen `gap' = sum(mi(`_dta[SQis]') & `lcensor' & !`rcensor') by `_dta[SQiis]' (`_dta[SQtis]'): /// drop if `gap'[_N]>0 drop `lcensor' `rcensor' `gap' // Reshape Wide keep `_dta[SQis]' `_dta[SQiis]' `_dta[SQtis]' reshape wide `_dta[SQis]', i(`_dta[SQiis]') j(`_dta[SQtis]') unab varlist: `_dta[SQis]'* // Store a copy (for -_sqclusterreturn-) by `varlist', sort: gen _SQid = 1 if _n==1 drop `varlist' replace _SQid = sum(_SQid) label var _SQid "key for -sqclusterdata, return-" sort _SQid save _Sqclusterdata, replace // Keep one Sequence of each type by _SQid: gen _SQn = _N label var _SQn "Sequence frequency" by _SQid: keep if _n==1 char _dta[Sqclusterdat] 1 cluster drop _all } end program define _sqclusterreturn syntax [varlist] tempfile clusterdat foreach name in `_dta[_cl]' { local namelist "`namelist' `name'*" } quietly { capture confirm new variable _SQid if !_rc { gen _SQid = `=word("`_dta[_cl]'",1)'_id } else replace _SQid = `=word("`_dta[_cl]'",1)'_id sort _SQid save `clusterdat' use _Sqclusterdata, clear merge _SQid using `clusterdat' assert _merge ==3 erase _Sqclusterdata.dta keep `_dta[SQiis]' `namelist' `varlist' sort `_dta[SQiis]' save `clusterdat', replace use _Sqclusterorig, clear sort `_dta[SQiis]' merge `_dta[SQiis]' using `clusterdat' assert _merge!=2 drop _merge erase _Sqclusterorig.dta } end