* generate processed datasets of events
* create mortality measures
* create patient counts

capture log close
log using "logs/makedata.log", replace

set matsize 1000

* set to 0001 or 05 or 20 or 100 to choose sample to work from
local sampsize = "100"

* which conditions?

global CONDITIONS "ami chf428 hip pnu hipkne"

* conditions with survival

global CONDITIONS_SURV "ami chf428 hip pnu"

* make the event data?
global MAKEEVENTS = 1

* we had some trouble with outpatient data having non-unique values in
* diag_id part fileyear clm_id in early years. this is the first year we'll start
* trusting the outpatient data
global OP_STARTYEAR = 2001

* make the "distance" data as a part of the event data?
global MAKEDIST = 1
* calculate closest PN beginning with this year
global DISTANCE_STARTYEAR = 2008
* calculate closest PN ending with this year
global DISTANCE_ENDYEAR = 2010

* seed for tiebreaks
global EVENTSSEED = 789531

* make the bootstrap sample skeletons
global MAKEBOOT = 1

* bootstrap count
global NBOOT = 300

* seed for bootstrap samples
global BOOTSEED = 875347

* make risk-adjusted survival, readmission, productivity?
global MAKESURVREAD = 1

* make risk-adjusted survival for the long horizon (whole sample)?
global MAKELONGSURV = 1

* lower limit on number of patients when estimating risk-adjusted rates
if ("`sampsize'"!="100") {
	global MINPATS = 5
}
else {
	global MINPATS = 25
}

* make the hospital compare data?
global MAKEHCOMPARE = 1

* make the patient counts?
global MAKECOUNTS = 1

* what is the base year for static/dynamic allocation
local baseyear = 2008

* years to look forward for dynamic allocation
* currently 2: dynamic allocation is from 2008 to 2010
local d = 2

* bring together everything?
global BRINGTOGETHER = 1

* risk adjusters

global RISKADJUSTERS "ARSINDIC_* como_* card_*"

* risk adjusters (age/race/sex only)
global ARSRISKADJUSTERS "ARSINDIC_*"

set more off
clear

adopath ++ PLUS
adopath ++ ./ado/

* make the data
if ($MAKEEVENTS) {

	set seed $EVENTSSEED

	
	local c = 1
	foreach cond in $CONDITIONS {
		display "***** WORKING ON `cond' *****"

		* bring in index events

		use diag_id bene_id diag_year zip_indx medrawindx fileyear ///
			surv_30 ///
			age_grp male race ///
			using src/1PrimaryExtract/indx/`cond'`sampsize'_indxdemo.dta
		isid diag_id
	
		* deal with race
		gen byte nonwhite = race!="1"
		drop race

		* zip of beneficiary
		rename zip_indx zip_bene
		destring zip_bene, replace
		rename diag_year year

		* bring in provider of index event, discharge date, and source of admission
		merge 1:1 diag_id medrawindx fileyear ///
			using src/1PrimaryExtract/medpar/`cond'`sampsize'_medparclms.dta, ///
			keep(master match) ///
			keepusing( ///
				prvnumgrp er_amt is_shorthosp is_cah dschrgdt admsndt dstntncd ///
			) ///
			generate(match_provider)
		rename prvnumgrp pn
		
		count if match_provider==1
		local unmatched = r(N)
		if (`unmatched'>0) {
			display "***** COULDN'T MATCH `unmatched' INDEX EVENTS TO INDEX CLAIM! ******"
			display "dropping..."
		}
		drop if match_provider==1
		drop match_provider
		
		* for hip/knee only: 
		* drop people receiving both hip and knee replacement
		* generate "comorbidity" indicators for categories:
		* hip replacement
		* knee replacement
		if ("`cond'"=="hipkne") {
			merge 1:1 diag_id medrawindx fileyear ///
				using src/1PrimaryExtract/medpar/`cond'`sampsize'_medparclms.dta, ///
				keep(master match) assert(match using) ///
				keepusing( ///
					prcdrcd1 prcdrcd2 prcdrcd3 prcdrcd4 prcdrcd5 prcdrcd6 ///
				) nogenerate

			gen byte como_hip_any = 0
			gen byte como_knee_any = 0
		
			forvalues idx = 1/6 {
				replace como_hip_any = como_hip_any | prcdrcd`idx'=="8151"
				replace como_knee_any = como_knee_any | prcdrcd`idx'=="8154"
			}
			drop prcdrcd*
			
			display "number receiving both hip and knee replacement (dropping):"
			count if como_hip_any & como_knee_any
			drop if como_hip_any & como_knee_any
			
			* categories mutually exclusive and exhaustive
			assert como_hip_any + como_knee_any == 1
			
			* knee replacement only to be omitted category
			drop como_knee_any
			rename como_hip_any como_hiprpl
			label var como_hiprpl "patient received a hip replacement procedure"
		}
		
		drop medrawindx fileyear
		
		* limit to short term and critical access hospitals
		keep if is_shorthosp | is_cah
		drop is_shorthosp is_cah
		
		* indicator for transfers to other sources of inpatient non-rehab/non-nursing care
		gen byte transferred_code = ///
			dstntncd=="02" | dstntncd=="05" | dstntncd=="43" | dstntncd=="62" | dstntncd=="66"
		* indicator for discharged against medical advice or died in hospital
		gen byte amadeath = dstntncd=="07" | dstntncd=="20"
		drop dstntncd
		
		* generate thru ed indicator using ER charges
		gen byte ed = er_amt!=. & er_amt>0
		gen byte noned = !ed
		drop er_amt

		* merge provider to POS to get indicator for non-US state
		merge m:1 pn year using pos/pos.dta, ///
			keepusing(nonstate) ///
			keep(master match) generate(match_pos)

		* merge to POS for pn's that for some reason were not included in the POS in that year
		* merge to the last mention of the POS

		merge m:1 pn using pos/pos_lastyear.dta, ///
			keepusing(nonstate) ///
			keep(master match match_update match_conflict) ///
			generate(match_pos_lastyear) update

		display "Observations that did not match to provider of service file (dropping):"
		count if match_pos_lastyear==1
		drop if match_pos_lastyear==1
		drop match_pos match_pos_lastyear
		
		* get rid of patients treated outside the US
		display "Observations at hospitals outside US states:"
		count if nonstate==1
		drop if nonstate==1
		drop nonstate
		
		* rebase to pn_new identifier
		merge m:1 pn using dartmouth_xwlk/pn2pn_new.dta, ///
			keep(master match) generate(match_dxwlk)
		
		display "Observations that did not match to dartmouth crosswalk to pn_new (dropping):"
		count if match_dxwlk==1
		drop if match_dxwlk==1
		drop match_dxwlk
		
		* bring in zip, hrr, hsa of provider (pn_new)
		merge m:1 pn_new using dartmouth_xwlk/pn_new.dta, ///
			assert(match using) keep(match) nogenerate keepusing(zip hrrnum hsanum)
		
		rename zip zip_pn
		rename hrrnum hrrnum_pn
		rename hsanum hsanum_pn
		
		* switch provider identifier
		rename pn pn_orig
		rename pn_new pn
		
		* bring in HRR/HSA of patient
		rename zip_bene zip
		
		* merge to zip-HRR/HSA map for the actual year of the patient
		merge m:1 zip year using zip2hsahrr/zip2hsahrr.dta, ///
			keepusing(hrrnum hsanum) ///
			keep(master match) generate(match_bene_hrr)
		
		* merge to last year zipcode was mentioned in zipcode-hrr map for zipcodes that for
		* some reason were not listed in that year's map

		merge m:1 zip using zip2hsahrr/zip2hsahrr_lastyear.dta, ///
			keepusing(hrrnum hsanum) ///
			keep(master match match_update match_conflict) ///
			generate(match_bene_hrr_lastyear) update
	
		display "Observations for which bene couldn't be matched to HRR/HSA (dropping):"
		count if match_bene_hrr_lastyear==1
		drop if match_bene_hrr_lastyear==1
		drop match_bene_hrr match_bene_hrr_lastyear

		rename zip zip_bene
		rename hrrnum hrrnum_bene
		rename hsanum hsanum_bene
		
		* bring in lat and lon coordinates of patient and provider
		
		foreach side in bene pn {
		
			rename zip_`side' zip
				
			* merge to lat/lon
	
			merge m:1 zip using zip2latlon/zip, ///
				keep(master match) keepusing(lat lon src_sas) ///
				generate(match_`side'_zip)
			
			display "`side' observations w/ zips that couldn't be matched to lat/lon (dropping):"
			count if match_`side'_zip==1
			display "Offending ZIP codes:"
			tab zip if match_`side'_zip==1
			drop if match_`side'_zip==1
			drop match_`side'_zip
			
			display "`side' observations w/ zips that matched to geonames db (dropping):"
			count if src_sas==0
			display "Offending ZIP codes:"
			tab zip if src_sas==0
			drop if src_sas==0
			drop src_sas
	
			rename zip zip_`side'
						
			rename lon lon_`side'
			rename lat lat_`side'
	
			* convert to radians
			replace lon_`side' = lon_`side'*(_pi/180)
			replace lat_`side' = lat_`side'*(_pi/180)
			
		}
		
		if ($MAKEDIST) {
		
			display "calculating whether patient went to nearest provider..."
			
			* now make a list of closest providers observed treating an AMI in each year to each
			* patient zipcode in each year
			
			* make a list of provider lat/lons - years

			* then make a list of all patient zipcodes observed in the year
			* match each patient's zipcode-year to the nearest
			* provider observed to that zipcode

			tempfile latlonpn zipbene

			* provider latlons

			preserve
			
			* drop to sample for which we'll be calculating distance
			keep if year >= $DISTANCE_STARTYEAR & year <= $DISTANCE_ENDYEAR

			keep pn lat_pn lon_pn year
			duplicates drop
			sort year
			save `latlonpn'

			restore

			preserve

			* patient zipcodes
			
			* drop to sample for which we'll be calculating distance
			keep if year >= $DISTANCE_STARTYEAR & year <= $DISTANCE_ENDYEAR

			keep zip_bene lat_bene lon_bene year
			duplicates drop
			isid zip_bene year
			sort year
			
			save `zipbene'
			
			* match patient zips to providers one year at a time to save memory
			
			forvalues year=$DISTANCE_STARTYEAR/$DISTANCE_ENDYEAR {
				display "-> year `year'"
				use `zipbene' if year==`year', clear

				* match each patient zip in the year to all providers in the year

				joinby year using `latlonpn', unmatched(master)
				assert _merge==3
				drop _merge

				* distance from patient zip-year to provider

				* distance formula from http://www.movable-type.co.uk/scripts/latlong.html
				* "spherical law of cosines"
				* avg radius of earth 3958.756 miles from http://bluemm.blogspot.com/2007/01/excel-formula-to-calculate-distance.html
				gen double dist = acos( sin(lat_bene)*sin(lat_pn) + cos(lat_bene)*cos(lat_pn)*cos(lon_pn-lon_bene) )*3958.756

				* when lat/lons were the same, the above formula returns missing not zero, fix that:
				replace dist = 0 if lat_bene==lat_pn & lon_bene==lon_pn

				* pick provider with minimum distance (if there are multiple providers in the same zipcode
				* this could pick multiple providers)

				egen double mindist = min(dist), by(zip_bene year)
				gen byte ismin = mindist==dist
				keep if ismin

				keep zip_bene year pn
				isid zip_bene year pn

				save closestpn/`cond'`sampsize'_`year'.dta, replace
			}

			clear
			
			* roll up patient zips to providers from each year
			forvalues year=$DISTANCE_STARTYEAR/$DISTANCE_ENDYEAR {
				if (_N==0) {
					use closestpn/`cond'`sampsize'_`year'.dta
				}
				else {
					append using closestpn/`cond'`sampsize'_`year'.dta
				}
				rm closestpn/`cond'`sampsize'_`year'.dta
			}
			
			isid zip_bene year pn
			save closestpn/`cond'`sampsize'.dta, replace

			restore
		}
	
		* merge to list of closest PNs
		merge m:1 zip_bene year pn using closestpn/`cond'`sampsize'.dta, ///
			keep(master match) generate(match_closest)

		* a match means the patient went to the closest PN
		* but this is only defined between DISTANCE_STARTYEAR and DISTANCE_ENDYEAR
		gen byte chose_closest = match_closest==3 ///
			if year >= $DISTANCE_STARTYEAR & year <= $DISTANCE_ENDYEAR
		
		drop match_closest
		
		* make a variable containing the closest provider to patient open in that year
		* we'll use this for the counterfactual allocation
		* issue: some patient zip's will have multiple providers that are "closest"
		* solution: randomly choose among closest providers
		
		tempfile diagtoclosest
		preserve
		
		* reduce to years with closest provider data
		keep if year >= $DISTANCE_STARTYEAR & year <= $DISTANCE_ENDYEAR
		
		* merge to closest provider(s) open in year
		keep diag_id zip_bene year
		joinby zip_bene year using closestpn/`cond'`sampsize'.dta, unmatched(master)
		assert _merge==3
		drop _merge
		
		* pick the closest provider among providers that matched
		gen rand = runiform()
		egen minrand = min(rand), by(diag_id)
		gen hasmin = rand==minrand
		keep if hasmin

		rename pn pn_closest
		keep diag_id pn_closest
		isid diag_id
		
		save `diagtoclosest'
		restore
		
		* bring in closest provider
		merge 1:1 diag_id using `diagtoclosest', assert(master match)
		* make sure all obs matched in years with closest provider data
		tab _merge year
		assert _merge==3 if year >= $DISTANCE_STARTYEAR & year <= $DISTANCE_ENDYEAR
		drop _merge

		* distance formula from http://www.movable-type.co.uk/scripts/latlong.html
		* "spherical law of cosines"
		* avg radius of earth 3958.756 miles from http://bluemm.blogspot.com/2007/01/excel-formula-to-calculate-distance.html
		gen double dist = acos( sin(lat_bene)*sin(lat_pn) + cos(lat_bene)*cos(lat_pn)*cos(lon_pn-lon_bene) )*3958.756

		* when lat/lons were the same, the above formula returns missing not zero, fix that:
		replace dist = 0 if lat_bene==lat_pn & lon_bene==lon_pn

		gen byte samehrr = hrrnum_bene==hrrnum_pn

		* bring in risk adjusters
		merge 1:1 diag_id using src/2SecondManiOut/comorbid/`cond'`sampsize'_medpar_cc.dta, ///
			assert(match using) keep(match) nogenerate keepusing(como_* card_*)

		* note: the card_ami* variables are based on icd-9 codes whose reporting changes
		* etc. by maurice's recommendation we should just use card_mi. so let's drop
		* the card_ami* vars.
		drop card_ami*
		
		* form an indicator for an ED encounter that ended the day of, or day before,
		* index admission
		
		preserve
		
		clear
		
		* make list of OP claims that had an ED revenue center code (rev_center 0450-0459 or
		* 0981
		
		tempfile edrevcntrs
		
		* revenue lines with an ED revenue center code
		* drop denied lines per resdac note to use rev_ncvr==rev_chrg
		* (http://www.resdac.org/sites/resdac.org/files/CMMI_FAQs.pdf)
		use diag_id part fileyear clm_id rev_cntr rev_ncvr rev_chrg ///
			if ///
				rev_cntr!="" & ///
				( (real(rev_cntr) >= 450 & real(rev_cntr) <= 459) | rev_cntr=="0981") & ///
				rev_ncvr != rev_chrg ///
			using src/1PrimaryExtract/op/`cond'`sampsize'_oprevcntrs.dta		
		drop rev_ncvr rev_chrg

		* FIXME: there were some weird issues with early file years. claims weren't
		* unique in diag_id part fileyear clm_id. did maurice fix?
		drop if fileyear < $OP_STARTYEAR
		
		* reduce to list of claims with an ED revenue center code
		keep diag_id part fileyear clm_id
		duplicates drop

		sort diag_id part fileyear clm_id
		save `edrevcntrs'
		
		clear
		
		* pull in claims
		* limit to claims that end on the day of, or day before, index admission
		* drop denied claims by limiting to blank no_paycd per resdac note
		* (http://www.resdac.org/sites/resdac.org/files/CMMI_FAQs.pdf)
		use diag_id part fileyear clm_id provider dgn_date thru_dt no_paycd ///
			if ///
				(dgn_date==thru_dt | dgn_date==thru_dt+1) & ///
				no_paycd=="" ///
			using src/1PrimaryExtract/op/`cond'`sampsize'_opclms.dta
			
		* there were some weird issues with early file years. see above
		drop if fileyear < $OP_STARTYEAR
		
		* limit to claims with an ED revenue center code
		merge 1:1 diag_id part fileyear clm_id using `edrevcntrs', keep(match) nogenerate
		
		* limit to claims that were NOT at the hospital that provided the index event
		* NOTE: (FIXME?) we identify whether the ED encounter occurred at the same
		* provider as the index event stay using the regular medicare PNs on the claims
		* not the "synthetic" PNs that we constructed from the dartmouth xwalk and
		* now use as provider id's in e.g. regressions
		
		* first get the fileyear and medrawindx of the index claim
		rename fileyear fileyear_op
		merge m:1 diag_id ///
			using src/1PrimaryExtract/indx/`cond'`sampsize'_indxdemo.dta, ///
			assert(match using) keep(match) keepusing(fileyear medrawindx) nogenerate
		
		* bring in provider of index event. some will not match (due to some bug in maurice's
		* code?), but i removed these diag_id from the events list anyway
		rename provider provider_op
		merge m:1 diag_id medrawindx fileyear ///
			using src/1PrimaryExtract/medpar/`cond'`sampsize'_medparclms.dta, ///
			keep(match) keepusing(prvnumgrp) nogenerate
		drop fileyear*
		rename prvnumgrp provider_index
		
		gen byte same = provider_op==provider_index
		display "outpatient ed claim day of/day before index event was from index provider"
		tab same
		
		* an ED encounter isn't a transfer if it occurs at the same institution as the
		* index stay
		drop if same
		drop same
		
		* make list of index admissions with an ED claim on day of, or day before, index
		* admission, that wasn't at index event hospital
		gen nclaims = 1
		collapse (sum) nclaims, by(diag_id)

		* how many ED claims per index admission in day of and day before index adm?
		summ nclaims, detail
		drop nclaims

		sort diag_id
		
		tempfile edbefore
		save `edbefore'

		* form an indicator for an inpatient stay that ended the day of, or day before,
		* index admission
		* we'll consider these patients to be transfers

		* pull in stays
		* limit to stays that end on the day of, or day before, index admission
		* and that were not the index event and that were IP/CAH ;)
		* FIXME: do we want to include SNF?
		use diag_id dgn_date prvnumgrp is_shorthosp is_cah dschrgdt flagindx ///
			if dgn_date==dschrgdt | dgn_date==dschrgdt+1 ///
			using src/1PrimaryExtract/medpar/`cond'`sampsize'_medparclms.dta
		
		drop dgn_date dschrgdt 
		
		* drop index stays
		drop if flagindx
		drop flagindx
		
		* must be ip/cah
		keep if is_shorthosp | is_cah
		drop is_shorthosp is_cah
		* limit to claims that were NOT at the hospital that provided the index event
		* NOTE (FIXME?) we use the medicare PNs to check whether the hospitals were
		* the same, not the synthetic PNs i constructed from the dartmouth xwalk. see
		* equivalent NOTE in the ED section above
		
		* first get the fileyear and medrawindx of the index claim
		merge m:1 diag_id ///
			using src/1PrimaryExtract/indx/`cond'`sampsize'_indxdemo.dta, ///
			assert(match using) keep(match) keepusing(fileyear medrawindx) nogenerate
		
		* bring in provider of index event. some will not match for bizarre reasons, but
		* these diag_id have been removed from the events list anyway
		rename prvnumgrp provider_medpar
		merge m:1 diag_id medrawindx fileyear ///
			using src/1PrimaryExtract/medpar/`cond'`sampsize'_medparclms.dta, ///
			keep(match) keepusing(prvnumgrp) nogenerate
		drop fileyear*
		rename prvnumgrp provider_index
		
		gen byte same = provider_medpar==provider_index
		display "inpatient discharge on day of/day before index event was from index provider"
		tab same

		* a candidate stay isn't a transfer if it occurs at the same institution as the
		* index stay
		drop if same
		drop same

		* make list of index admissions with an ip stay on day of, or day before, index
		* admission, that wasn't at index event hospital
		gen nclaims = 1
		collapse (sum) nclaims, by(diag_id)

		* how many stays per index admission in day of and day before index adm?
		summ nclaims, detail
		drop nclaims

		sort diag_id
		
		tempfile ipbefore
		save `ipbefore'
			
		* back to events list		
		restore
		
		* indicator for ed claim on day of, or day before, index adm
		* limit to year>=OP_STARTYEAR since we only see ED claims starting then
		* tiny FIXME: for dgn_date==1janOP_STARTYEAR, we miss the ED claim that ends 31dec(OP_STARTYEAR-1)
		* that's totally irrelevant for our analysis tho!
		merge 1:1 diag_id using `edbefore', keep(master match) generate(match_edbefore)
		gen byte edbefore = match_edbefore==3 if year>=$OP_STARTYEAR
		drop match_edbefore

		* indicator for ip stay on day of, or day before, index adm
		merge 1:1 diag_id using `ipbefore', keep(master match) generate(match_ipbefore)
		gen byte ipbefore = match_ipbefore==3
		drop match_ipbefore
		
		* indicator for non-ed patient who was transferred in from another inpatient
		* facility or ED
		gen byte nonedtr = noned*(edbefore|ipbefore) if year>=$OP_STARTYEAR
		
		* indicator for any hospital admission within 30 days post-discharge
		* spending and summed drg weights for 30 day window starting with admission date
		
		tempfile readmspend
		
		* need to join to medpar claims
		preserve
		
		keep diag_id dschrgdt admsndt
		rename dschrgdt dschrgdt_index
		rename admsndt admsndt_index
		
		joinby diag_id ///
			using src/1PrimaryExtract/medpar/`cond'`sampsize'_medparclms.dta, ///
			unmatched(none)
		
		keep diag_id dschrgdt_index admsndt_index ///
			admsndt dschrgdt flagindx ///
			is_shorthosp is_cah ///
			drg_cd drgprice outlramt
		
		* limit to short term hospitals / critical access hospitals
		keep if is_shorthosp | is_cah
		
		* transfers have admissions the day of, or day following, index discharge
		gen byte transfer = ///
			!flagindx & ///
			(admsndt >= dschrgdt_index) & ///
			(admsndt - dschrgdt_index <= 1)
		
		* readmissions are admissions that are not transfers, and occur w/in 30 days
		gen byte readmission = ///
			!flagindx & ///
			(admsndt >= dschrgdt_index) & ///
			(admsndt - dschrgdt_index > 1) & (admsndt - dschrgdt_index <= 30)
		
		* raw $ spending
		gen spending = drgprice + outlramt
		drop drgprice outlramt
		
		* real spending
		
		* bring in drg weights
		rename drg_cd drg
		destring drg, replace
		gen fyear = year(dofq( qofd(dschrgdt)+1 ))
		merge m:1 drg fyear using drg_productivity/drg_productivity.dta, ///
			keep(master match) generate(match_drgweight)
		tab drg fyear if match_drgweight==1
		if (r(N) != 0) {
			display "*** SOME DRGS COULD NOT BE MATCHED TO WEIGHTS ***"
			display "Number of observations that failed to match:"
			display r(N)
		}
		
		drop match_drgweight
		
		* drg of the index event
		gen drg_index = drg if flagindx
		* weight of the index event
		gen wt_index = drgweight if flagindx
		* fyear of the index event
		gen fyear_index = fyear if flagindx
		drop drg fyear
		
		* now we'll make 30 day spending measures
		* 30 day window begins with admission date and ends with admission date + 30
		
		* suppose people are admitted at the very beginning of the admisison day
		* and people are discharged at the very end of the discharge date --
		* we set discharge day to 12am on discharge day + 1
		
		rename admsndt_index window_start
		gen window_end = window_start + 30
		
		gen span_in_window = ///
			min(max(window_start,dschrgdt+1),window_end) - ///
			min(max(window_start,admsndt),window_end)

		* a hospital stay that begins before the index stay but ends on the day of the
		* index admission will have that last day count in the share. let's knock that
		* day out
		
		* in fact let's knock out all hospital stays that begin before the index
		* admission
		replace span_in_window = 0 if admsndt < window_start
		
		* share of days spent in 30 day window
		gen span_total = dschrgdt+1 - admsndt
		gen share_in_window = span_in_window/span_total
		
		* deflate spending/resources by the share
		replace spending = spending*share_in_window
		replace drgweight = drgweight*share_in_window
		gen wt_index_defl = wt_index*share_in_window
		
		* down to the index event level
		collapse ///
			(max) transfer readmission flagindx ///
			(sum) spending drgweight ///
			(mean) drg_index wt_index wt_index_defl fyear_index, by(diag_id)
		
		assert flagindx==1
		drop flagindx
		
		* save the list
		save `readmspend'
		
		* back to data
		restore
		
		* merge in list of diag_id with transfers/readmissions, spending, drg weights
		merge 1:1 diag_id using `readmspend', assert(match)
		
		rename readmission read_30
		rename transfer transferred_claims
		drop dschrgdt admsndt
		
		rename spending spend_30
		rename drgweight drgwt_30
		
		* now save data together...
		compress
		save processed_events/`cond'`sampsize'.dta, replace
	
		clear
		local c = `c' + 1
	}

}

if ($MAKEBOOT) {
	
	clear
	
	set seed $BOOTSEED
	
	* first make a list of hrr's by bringing together all the HRRs in the events for all
	* the conditions

	* bring in list of HRRs from the processed events
	foreach cond in $CONDITIONS {
		use hrrnum_pn using processed_events/`cond'`sampsize'.dta
		duplicates drop
		save bootsample/`cond'`sampsize'.dta
	}

	* bring the lists together
	foreach cond in $CONDITIONS {
		if (_N==0) {
			use bootsample/`cond'`sampsize'.dta
		}
		else {
			append using bootsample/`cond'`sampsize'.dta
		}
		
		rm bootsample/`cond'`sampsize'.dta
	}
	
	* make sure the source samples for each condition include the same HRRs
	* i.e. the sd of the number of samples that the HRR appears in should be 0
	gen count = 1
	collapse (sum) count, by(hrrnum_pn)
	qui summ count
	assert r(sd)==0
	drop count
	
	* "original" sample
	gen freq_bs0 = 1
	label variable freq_bs0 "number of instances of original HRR in 'original sample' === 1"

	* make the bootstrap samples, which are actually just frequency weights applied
	* to each HRR
	forvalues i=1/$NBOOT {
		gen freq_bs`i' = 0
		bsample , weight(freq_bs`i')
		label variable freq_bs`i' "number of instances of original HRR in bs sample `i'"
	}
	
	save bootsample/bootsample`sampsize'.dta, replace

	/*	
	* make the "original" dataset for bootstrapping
	* the original HRR (e.g. for merging to another dataset)
	rename hrrnum_pn hrrnum_pn_orig
	* the cluster ID of the HRR (e.g. the level at which the fixed effects will be in the
	* analysis regressions)
	egen hrrnum_pn_clid = group(hrrnum_pn_orig)
	sort hrrnum_pn_clid
	
	save bootsamples/bs0`sampsize'.dta, replace
	
	*/
	
	
}

if ($MAKESURVREAD) {

	foreach cond in $CONDITIONS {
		display "***** WORKING ON `cond' *****"
		
		use processed_events/`cond'`sampsize'.dta
		
		* create the "in sample" indicator that will include the survival, productivity,
		* and readmission samples
		
		gen byte insample = 1
		
		* for now, just patients 2006-2008
		replace insample = insample & (year>=2006 & year<=2008)
		
		* with the synthetic pn's from the dartmouth xwalk, each pn now has a fixed
		* location. so no pn should ever move hrr/hsa. sanity check...
		foreach area in hrr hsa {
			egen min`area' = min(`area'num_pn) if insample, by(pn)
			egen max`area' = max(`area'num_pn) if insample, by(pn)
			assert min`area'==max`area'
			drop min`area' max`area'
		}
		
		* make age/race/sex interactions
		egen arsgroup = group(age_grp nonwhite male)
		* make age/race/sex group indicators
		qui tab arsgroup , gen(ARSINDIC_)
		* omitted category
		drop ARSINDIC_1
				
		* in the 5% sample for hip/knee we get a collinearity problem
		* drop this to allow the regressions to complete
		if ("`cond'"=="hipkne" & "`sampsize'"=="05") {
			drop ARSINDIC_28 ARSINDIC_27
		}
		
		* make the log-spending and log-resources variables
		gen lspend_30 = ln(spend_30)
		gen ldrgwt_30 = ln(drgwt_30)
		
		* the grand sample will only include patients with a valid DRG weight and
		* hospital payment
		* this greatly simplifies calculating productivity
		replace insample = 0 if lspend_30==.|ldrgwt_30==.
		
		* reduce to the grand sample set... survival and readmission samples
		* will be subsets of this
		keep if insample

		* the measures are:
		* surv - risk-adjusted survival
		* sars - survival (age/race/sex adjusted)
		* snra - survival (no risk adjustment)
		* read - risk-adjusted readmission
		* rars - readmission (age/race/sex adjusted)
		* rnra - readmission (no risk adjustment)
		* prsp - risk and spending adjusted survival
		* prre - risk and resources adjusted survival
		* drgw - risk adjusted resources
		* spnd - risk adjusted spending
		
		* we do all of them for AMI, but just survival and readmission for the rest
		local all_meas "surv sars snra read rars rnra"
		if ("`cond'"=="ami") {
			local all_meas "`all_meas' prsp prre drgw spnd"
		}
				
		* now make the insample var names for each measure
		foreach meas in `all_meas' {
			gen byte insamp_`meas' = insample
		}
		drop insample
				
		* the readmission sample excludes anyone:
		* -> transferred out of the hospital according to the destination code
		* -> transferred out of the hospital according to claims (i.e. admitted to
		* a hospital on the day of or day following discharge)
		* -> leaving against medical advice or dying in the hospital
		foreach meas in read rars rnra {
			replace insamp_`meas' = ///
				insamp_`meas' & !transferred_code & !transferred_claims & !amadeath
		}
		
		* now make the risk-adjusted estimates for each bootstrap sample
		
		forvalues bsidx = 0/$NBOOT {
			preserve
			
			* bring in the bootstrap weight
			merge m:1 hrrnum_pn using bootsample/bootsample`sampsize', assert(match) ///
				nogenerate keepusing(freq_bs`bsidx')
			
			* create freq_bs`bsidx' observations for each original observation
			drop if freq_bs`bsidx'==0
			expand freq_bs`bsidx'
			drop freq_bs`bsidx'
			
			* create the new hrr IDs at the level of the bootstrap
			egen bootseq_within_hrrnum = seq(), by(diag_id)
			egen hrrnum_pn_boot = group(bootseq_within_hrrnum hrrnum_pn)
			egen pn_boot = group(pn hrrnum_pn_boot)
			
			* move old HRR ID and hospital ID out of the way
			rename pn pn_preboot
			rename hrrnum_pn hrrnum_pn_preboot
			
			xtset pn_boot
		
			foreach meas in `all_meas' {
				display "measure: `meas'"

				* lhs variable
				if ("`meas'"=="read"|"`meas'"=="rars"|"`meas'"=="rnra") {
					local lhsvar = "read_30"
				}
				else if ("`meas'"=="drgw") {
					local lhsvar = "ldrgwt_30"
				}
				else if ("`meas'"=="spnd") {
					local lhsvar = "lspend_30"
				}
				else {
					local lhsvar = "surv_30"
				}
			
				* spending variable (only defined for productivity)
				if ("`meas'"=="prsp") {
					local spendvar = "lspend_30"
				}
				else if ("`meas'"=="prre") {
					local spendvar = "ldrgwt_30"
				}
				else {
					local spendvar = ""
				}
			
				* risk adjusters for the measures
				if ("`meas'"=="sars"|"`meas'"=="rars") {
					* age/race/sex only
					local riskadjusters "$ARSRISKADJUSTERS"
				}
				else if ("`meas'"=="snra"|"`meas'"=="rnra") {
					* no risk adjusters
					
					* weird bug: xtreg with no rhs variables throws up an error
					* need to make this variable (which is then dropped by xtreg!)
					local riskadjusters "ones"
					gen byte ones = 1
				}
				else {
					* full set of risk adjusters
					local riskadjusters "$RISKADJUSTERS"
				}
					
				* make sure insamp is set to 0 if there are any missing obs		
				markout insamp_`meas' `lhsvar' `spendvar' `riskadjusters'
				
				* impose the minimum patient count
		
				* patients in the pn
				egen `meas'_`cond'_npats = sum(insamp_`meas'), by(pn_boot)
	
				qui distinct pn_boot if `meas'_`cond'_npats >= 1
				local totpn = r(ndistinct)
				qui distinct pn_boot if `meas'_`cond'_npats >= $MINPATS
				local qualpn = r(ndistinct)
				local droppedpn = `totpn' - `qualpn'
	
				display "Total hospitals: `totpn'"
				display "Hospitals with >= $MINPATS: `qualpn' (Dropping `droppedpn')"
		
				* sample restriction: must have min number of patients
				* and must be in the subsample (e.g. an ed patient)
				replace insamp_`meas' = (`meas'_`cond'_npats >= $MINPATS) & insamp_`meas'
				* and so the number of patients actually included if npatients_pn < MINPATS
				* is 0
				replace `meas'_`cond'_npats = 0 if `meas'_`cond'_npats < $MINPATS
				
				* run the regression!
				xtreg `lhsvar' `spendvar' `riskadjusters' if insamp_`meas', ///
					fe vce(cluster hrrnum_pn_boot)
				* regression sample should equal the insample indicator
				assert e(sample) == insamp_`meas'

				estimates save estimates/`meas'/`cond'`sampsize'/bs`bsidx'.ster, replace
	
				* Extract the fixed effects, which are the risk-adjusted rates
				predict `meas'_`cond'_fe if insamp_`meas', u
				
				* Note the regression was run with a constant term so we must
				* add it to the FE
				replace `meas'_`cond'_fe = `meas'_`cond'_fe + _b[_cons] ///
					if insamp_`meas'
				
				* make standard errors

				* Extract the error term fits, which speeds up fese
				predict `meas'_`cond'_ehat if insamp_`meas', e
			
				if ("`meas'"=="snra"|"`meas'"=="rnra") {
					* weird bug: xtreg with no rhs variables throws up an error so
					* we had to make a variable of ones (which was then dropped by xtreg!)
					* we can now get rid of it
					drop ones
					local riskadjusters ""
				
					* ok next problem. fese requires a RHS variable!
					* we'll just make the standard errors ourselves...
					gen `meas'_`cond'_se = ///
						e(sigma_e)*sqrt(1/`meas'_`cond'_npats) if insamp_`meas'
				}
				else {
					* if we have risk-adjusters then we can use fese
					fese_adam `lhsvar' `spendvar' `riskadjusters' if insamp_`meas', ///
						ehat(`meas'_`cond'_ehat) homo(`meas'_`cond'_se)
				}
				
				* calculate covariance of measurement error between survival and this
				* measure
				
				* all samples: covariance of survival and readmission 
				* ami-only: covariance of survival and input measures
				if ( ///
					("`meas'"=="read") | ///
					("`cond'"=="ami" & ("`meas'"=="drgw"|"`meas'"=="spnd") ) ///
				) {
				
					fese2 ///
						( surv_30 $RISKADJUSTERS if insamp_surv) ///
						( `lhsvar' $RISKADJUSTERS if insamp_`meas'), ///
						ehat1( surv_`cond'_ehat ) ehat2( `meas'_`cond'_ehat ) ///
						v11( surv_`meas'_`cond'_v2 ) v22( `meas'_surv_`cond'_v2 ) ///
						v12( surv_`meas'_`cond'_v12 )
				}
				
				if ("`meas'"=="surv"|"`meas'"=="read") {
					* mean survival/readmission rate in subsample
					egen `meas'_`cond'_mean = mean(`lhsvar') if insamp_`meas'
				}
			
			}
			
			* for "original sample", save a list of diag IDs so i can recover the full
			* analysis sample
			if (`bsidx'==0) {
				tempfile cursample
				save `cursample'
				keep diag_id insamp_*
				sort diag_id
				isid diag_id
				save samples/`cond'`sampsize'.dta, replace
				use `cursample', clear
			}
					
			* down to the pn level
		
			if ("`cond'"=="ami") {
				collapse (mean) ///
					surv_`cond'_fe surv_`cond'_se surv_`cond'_mean ///
					sars_`cond'_fe sars_`cond'_se ///
					snra_`cond'_fe snra_`cond'_se ///
					read_`cond'_fe read_`cond'_se read_`cond'_mean ///
					rars_`cond'_fe rars_`cond'_se ///
					rnra_`cond'_fe rnra_`cond'_se ///
					prsp_`cond'_fe prsp_`cond'_se ///
					prre_`cond'_fe prre_`cond'_se ///
					drgw_`cond'_fe drgw_`cond'_se ///
					spnd_`cond'_fe spnd_`cond'_se ///
					surv_read_`cond'_v2 read_surv_`cond'_v2 surv_read_`cond'_v12 ///
					surv_drgw_`cond'_v2 drgw_surv_`cond'_v2 surv_drgw_`cond'_v12 ///
					surv_spnd_`cond'_v2 spnd_surv_`cond'_v2 surv_spnd_`cond'_v12 ///
					, by( ///
						pn_boot hrrnum_pn_boot bootseq_within_hrrnum ///
						pn_preboot hrrnum_pn_preboot ///
						surv_`cond'_npats read_`cond'_npats ///
						sars_`cond'_npats rars_`cond'_npats ///
						snra_`cond'_npats rnra_`cond'_npats ///
						prsp_`cond'_npats prre_`cond'_npats ///
						drgw_`cond'_npats spnd_`cond'_npats ///
					)
			}
			else {
				collapse (mean) ///
					surv_`cond'_fe surv_`cond'_se surv_`cond'_mean ///
					sars_`cond'_fe sars_`cond'_se ///
					snra_`cond'_fe snra_`cond'_se ///
					read_`cond'_fe read_`cond'_se read_`cond'_mean ///
					rars_`cond'_fe rars_`cond'_se ///
					rnra_`cond'_fe rnra_`cond'_se ///
					surv_read_`cond'_v2 read_surv_`cond'_v2 surv_read_`cond'_v12 ///
					, by( ///
						pn_boot hrrnum_pn_boot ///
						pn_preboot hrrnum_pn_preboot bootseq_within_hrrnum ///
						surv_`cond'_npats read_`cond'_npats ///
						sars_`cond'_npats rars_`cond'_npats ///
						snra_`cond'_npats rnra_`cond'_npats ///
					)
			}
		
			isid pn_boot
			isid pn_preboot hrrnum_pn_preboot bootseq_within_hrrnum
			
			* add labels
			
			foreach meas in `all_meas' {

				if ("`meas'" == "surv") {
					local longmeas "surv"
				}
				else if ("`meas'" == "sars") {
					local longmeas "surv-ars"
				}
				else if ("`meas'" == "snra") {
					local longmeas "surv-nora"
				}
				else if ("`meas'"=="read") {
					local longmeas "readm"
				}
				else if ("`meas'"=="rars") {
					local longmeas "readm-ars"
				}
				else if ("`meas'"=="rnra") {
					local longmeas "readm-nora"
				}
				else if ("`meas'"=="prsp") {
					local longmeas "prod-spend"
				}
				else if ("`meas'"=="prre") {
					local longmeas "prod-real"
				}
				else if ("`meas'"=="drgw") {
					local longmeas "ldrgwt"
				}
				else if ("`meas'"=="spnd") {
					local longmeas "lspend"
				}
				
				label var `meas'_`cond'_fe "`cond' `longmeas' / raw FE"
				label var `meas'_`cond'_se "`cond' `longmeas' / std error of FE"
				label var `meas'_`cond'_npats "`cond' `longmeas' / number of patients in hospital-yidx"

				* label the covariances with survival
				if ( ///
					("`meas'"=="read") | ///
					("`cond'"=="ami" & ("`meas'"=="drgw"|"`meas'"=="spnd") ) ///
				) {
					label var surv_`meas'_`cond'_v2 "`cond' surv / var of FE"
					label var `meas'_surv_`cond'_v2 "`cond' `longmeas' / var of FE"
					label var surv_`meas'_`cond'_v12 "`cond' surv-`longmeas' / covar of FE"
				}
				
				if ("`meas'"=="surv" | "`meas'"=="read") {
					label var `meas'_`cond'_mean "`cond' `longmeas' / aggregate 30 day `longmeas' rate"	
				}

			}
						
			label var hrrnum_pn_preboot "actual provider HRR"
			label var bootseq_within_hrrnum "replication number within the HRR"
			label var hrrnum_pn_boot "fake bootstrap hrr of provider"

			label var pn_preboot "actual provider number"
			label var pn_boot "fake bootstrap provider number"

			* the year index is 1 (2006-2008)
			gen byte yidx = 1
			label var yidx "year index (1=2006-2008)"
		
			order yidx pn_boot hrrnum_pn_boot ///
				pn_preboot hrrnum_pn_preboot bootseq_within_hrrnum ///
				surv_`cond'_npats surv_`cond'_mean ///
				surv_`cond'_fe surv_`cond'_se ///
				sars_`cond'_npats ///
				sars_`cond'_fe sars_`cond'_se ///
				snra_`cond'_npats ///
				snra_`cond'_fe snra_`cond'_se ///
				read_`cond'_npats read_`cond'_mean ///
				read_`cond'_fe read_`cond'_se ///
				rars_`cond'_npats ///
				rars_`cond'_fe rars_`cond'_se ///
				rnra_`cond'_npats ///
				rnra_`cond'_fe rnra_`cond'_se
				
			if ("`cond'"=="ami") {
				order ///
					prsp_`cond'_npats prsp_`cond'_fe prsp_`cond'_se ///
					prre_`cond'_npats prre_`cond'_fe prre_`cond'_se ///
					drgw_`cond'_npats drgw_`cond'_fe drgw_`cond'_se ///
					surv_drgw_`cond'_v2 drgw_surv_`cond'_v2 surv_drgw_`cond'_v12 ///
					surv_spnd_`cond'_v2 spnd_surv_`cond'_v2 surv_spnd_`cond'_v12, ///
					last
			}
			
			
			sort pn_preboot hrrnum_pn_preboot bootseq_within_hrrnum yidx
			save surv_read/`cond'`sampsize'/bs`bsidx'.dta, replace
		
			restore
		
		}
		
		clear
	}
}

if ($MAKELONGSURV) {
	foreach cond in $CONDITIONS_SURV {

		display "***** WORKING ON `cond' *****"
	
		use processed_events/`cond'`sampsize'.dta
	
		* create the "in sample" indicator
		gen byte insample = 1
	
		* year index groups years into triples
		* -3 = 1994-1996
		* -2 = 1997-1999
		* -1 = 2000-2002
		* 0 = 2003-2005
		* 1 = 2006-2008
		gen yidx = floor((year-2003)/3)
		tab yidx
	
		* 1993 will be yidx==-4. 2009+ will be yidx==2. limit sample to 1994-2008
		replace insample = 0 if yidx<-3 | yidx>1
	
		* generate the hospital-cross-yearindex group variable
		* the fixed effects will be at this level		
		* whatever you do, DON'T merge on this between files!!! the same number will
		* represent different hospitals
		egen pnXyidx = group(pn yidx)
		xtset pnXyidx
	
		* with the synthetic pn's from the dartmouth xwalk, each pn now has a fixed
		* location. so no pn should ever move hrr/hsa. sanity check...
		foreach area in hrr hsa {
			egen min`area' = min(`area'num_pn) if insample, by(pnXyidx)
			egen max`area' = max(`area'num_pn) if insample, by(pnXyidx)
			assert min`area'==max`area'
			drop min`area' max`area'
		}
	
		* make age/race/sex interactions
		egen arsgroup = group(age_grp nonwhite male)
		* make age/race/sex group indicators
		qui tab arsgroup , gen(ARSINDIC_)
		* omitted category
		drop ARSINDIC_1

		* the grand sample will only include patients with a valid DRG weight and
		* hospital payment
		* this greatly simplifies calculating productivity
		replace insample = 0 if spend_30==.|spend_30==0|drgwt_30==.|drgwt_30==0
		
		* reduce to the grand sample set... survival and readmission samples
		* will be subsets of this
		keep if insample
	
		* now make the insample var names
		rename insample insamp_longsurv

		* make sure insamp is set to 0 if there are any missing obs		
		markout insamp_longsurv surv_30 $RISKADJUSTERS

		* impose the minimum patient count

		* patients in the pn-yidx
		egen longsurv_npats = sum(insamp_longsurv), by(pnXyidx)

		qui distinct pnXyidx if longsurv_npats >= 1
		local totpn = r(ndistinct)
		qui distinct pnXyidx if longsurv_npats >= $MINPATS
		local qualpn = r(ndistinct)
		local droppedpn = `totpn' - `qualpn'

		display "Total hospital-yidx: `totpn'"
		display "Hospital-yidx with >= $MINPATS: `qualpn' (Dropping `droppedpn')"

		* sample restriction: must have min number of patients

		replace insamp_longsurv = (longsurv_npats >= $MINPATS) & insamp_longsurv
		replace longsurv_npats = 0 if longsurv_npats < $MINPATS

		* now make the risk-adjusted estimates for each bootstrap sample
	
		forvalues bsidx = 0/$NBOOT {

			preserve
		
			* bring in the bootstrap weight
			merge m:1 hrrnum_pn using bootsample/bootsample`sampsize', assert(match) ///
				nogenerate keepusing(freq_bs`bsidx')
		
			* create freq_bs`bsidx' observations for each original observation
			drop if freq_bs`bsidx'==0
			expand freq_bs`bsidx'
			drop freq_bs`bsidx'
		
			* create the new hrr IDs at the level of the bootstrap
			egen bootseq_within_hrrnum = seq(), by(diag_id)
			egen hrrnum_pn_boot = group(bootseq_within_hrrnum hrrnum_pn)
			egen pn_boot = group(pn hrrnum_pn_boot)
			egen pnXyidx_boot = group(pn_boot yidx)
		
			* move old HRR ID and hospital ID out of the way
			rename pn pn_preboot
			rename hrrnum_pn hrrnum_pn_preboot
			rename pnXyidx pnXyidx_preboot
		
			xtset pnXyidx_boot

			* the measures are:
			* surv - risk-adjusted survival
			* sars - survival (age/race/sex adjusted)
			* snra - survival (no risk adjustment)
	
			* only doing surv for now
			foreach meas in surv sars snra {
				display "measure: `meas'"
		
				* risk adjusters for the measures
				if ("`meas'"=="sars") {
					* age/race/sex only
					local riskadjusters "$ARSRISKADJUSTERS"
				}
				else if ("`meas'"=="snra") {
					* no risk adjusters
			
					* weird bug: xtreg with no rhs variables throws up an error
					* need to make this variable (which is then dropped by xtreg!)
					local riskadjusters "ones"
					gen byte ones = 1
				}
				else {
					* full set of risk adjusters
					local riskadjusters "$RISKADJUSTERS"
				}
				
				* now run the FE regression for each yidx
				tempvar fe se ehat
				gen long`meas'_`cond'_fe = .
				gen long`meas'_`cond'_se = .
				forvalues yidx_cur = -3/1 {

					* run the regression!
					xtreg surv_30 `riskadjusters' ///
						if insamp_longsurv & yidx==`yidx_cur', ///
						fe vce(cluster hrrnum_pn_boot)
					* regression sample should equal the insample indicator
					assert e(sample) == (insamp_longsurv & yidx==`yidx_cur')

					capture mkdir estimates/long`meas'/`cond'`sampsize'_yidx`yidx_cur'
					estimates save estimates/long`meas'/`cond'`sampsize'_yidx`yidx_cur'/bs`bsidx'.ster, replace

					* Extract the fixed effects, which are the risk-adjusted rates
					predict `fe' if insamp_longsurv & yidx==`yidx_cur', u

					* Copy into the full FE variable
					* Note the regression was run with a constant term so we must
					* add it to the FE
					replace long`meas'_`cond'_fe = `fe' + _b[_cons] ///
						if insamp_longsurv & yidx==`yidx_cur'
					drop `fe'
					
					* Now deal with SEs			
					if ("`meas'"=="snra") {
						* fese requires a RHS variable but non-risk adjusted
						* measure has no RHS variables.
						* we'll just make the standard errors ourselves...
						replace long`meas'_`cond'_se = ///
							e(sigma_e)*sqrt(1/longsurv_npats) ///
							if insamp_longsurv & yidx==`yidx_cur'
					}
					else {
						* extract the error term fits, which speeds up fese
						predict `ehat' if insamp_longsurv & yidx==`yidx_cur', e
						* since we have risk-adjusters  we can use fese
						fese_adam surv_30 `riskadjusters' ///
							if insamp_longsurv & yidx==`yidx_cur', ///
							ehat(`ehat') homo(`se')
						* copy into the full SE variable
						replace long`meas'_`cond'_se = `se' ///
							if insamp_longsurv & yidx==`yidx_cur'
						drop `se' `ehat'
					}
				}
				
				if ("`meas'"=="snra") {
					* weird bug: xtreg with no rhs variables throws up an error so
					* we had to make a variable of ones (which was then dropped by xtreg!)
					* we can now get rid of it
					drop ones
					local riskadjusters ""
				}
				
			}

			* mean survival/readmission rate in subsample
			egen longsurv_`cond'_mean = mean(surv_30) if insamp_longsurv
	
			rename longsurv_npats longsurv_`cond'_npats

			* for "original sample", save a list of diag IDs so i can recover the full
			* analysis sample
			if (`bsidx'==0) {
				tempfile cursample
				save `cursample'
				keep diag_id insamp_longsurv
				sort diag_id
				isid diag_id
				save samples/`cond'_long`sampsize'.dta, replace
				use `cursample', clear
			}

			collapse (mean) ///
				longsurv_`cond'_fe longsurv_`cond'_se longsurv_`cond'_mean ///
				longsars_`cond'_fe longsars_`cond'_se ///
				longsnra_`cond'_fe longsnra_`cond'_se ///
				, by( ///
					pn_boot yidx hrrnum_pn_boot ///
					pn_preboot hrrnum_pn_preboot bootseq_within_hrrnum ///
					longsurv_`cond'_npats ///
				)

			isid pn_boot yidx
			isid pn_preboot yidx hrrnum_pn_preboot bootseq_within_hrrnum

			foreach meas in surv sars snra {

				if ("`meas'" == "surv") {
					local longmeas "long surv"
				}
				else if ("`meas'" == "sars") {
					local longmeas "long surv-ars"
				}
				else if ("`meas'" == "snra") {
					local longmeas "long surv-nora"
				}
		
				label var long`meas'_`cond'_fe "`cond' `longmeas' / raw FE"
				label var long`meas'_`cond'_se "`cond' `longmeas' / std error of FE"
				if ("`meas'"=="surv" ) {
					label var long`meas'_`cond'_npats "`cond' `longmeas' / number of patients in hospital-yidx"
					label var long`meas'_`cond'_mean "`cond' `longmeas' / aggregate 30 day long`longmeas' rate"	
				}

			}

			label var hrrnum_pn_preboot "actual provider HRR"
			label var bootseq_within_hrrnum "replication number within the HRR"
			label var hrrnum_pn_boot "fake bootstrap hrr of provider"

			label var pn_preboot "actual provider number"
			label var pn_boot "fake bootstrap provider number"
		
			label var yidx "year index (1=2006-2008)"

			order yidx pn_boot hrrnum_pn_boot ///
				pn_preboot hrrnum_pn_preboot bootseq_within_hrrnum ///
				longsurv_`cond'_npats longsurv_`cond'_mean ///
				longsurv_`cond'_fe longsurv_`cond'_se ///
				longsars_`cond'_fe longsars_`cond'_se ///
				longsnra_`cond'_fe longsnra_`cond'_se

			sort pn_preboot hrrnum_pn_preboot bootseq_within_hrrnum yidx
			capture mkdir surv_read/long_`cond'`sampsize'
			save surv_read/long_`cond'`sampsize'/bs`bsidx'.dta, replace

			restore
		}

		clear		
	}
}

if ($MAKEHCOMPARE) {
	* now deal with hospital compare...

	* bring in process of care measures
	
	* use the measures reported in all years 2006-2008
	* remove ami7 (thrombolytics at arrival) because 60-80% of hospitals report
	* no patients for the measure in each year
	
	local measures "ami1 ami2 ami3 ami4 ami5 ami8"
	local measures "`measures' hf1 hf2 hf3 hf4"
	local measures "`measures' pn1 pn2 pn3 pn4 pn5 pn6 pn7"
	
	* list of measures for the 'use' command
	local usestring ""
	foreach v in `measures' {
		local usestring "`usestring' `v'_*"
	}
	
	* bring in measures
	use pn year `usestring' if year>=2006 & year<=2008 using hcompare/poc.dta
	
	* limit to US states only

	merge m:1 pn using pos/pos_lastyear.dta, keep(match) keepusing(nonstate) nogenerate
	keep if nonstate==0
	drop nonstate
		
	* generate numerators
	
	foreach meas in `measures' {
		gen `meas'_numer = `meas'_score*`meas'_denom
		replace `meas'_numer = 0 if `meas'_denom==0
	}

	* rebase to dartmouth xwalk synthetic provider numbers
	merge m:1 pn using dartmouth_xwlk/pn2pn_new.dta, ///
		keep(master match) generate(match_dxwlk)
	display "Observations that did not match to dartmouth crosswalk for pn_new (dropping):"
	count if match_dxwlk==1
	drop if match_dxwlk==1
	drop match_dxwlk
	drop pn
	rename pn_new pn
		
	* sum together the three years and collapse over multiple pn's that were aggregated
	* into one synthetic pn
	* FIXME: this is a little weird 'cause some providers report just a sample of patients
	* to hospital compare not the total. if we sum together two pn's, one that reported
	* for all patients and another that reported for a sample, the resulting score
	* will weight the pn that reported for all patients too heavily
	* however i don't see a nice, simple way to deal with this. so i will sum
	collapse (sum) *_numer *_denom, by(pn)
	
	* generate scores for hospital-measures with at least 50 patients
	foreach meas in `measures' {
		gen `meas'_score = `meas'_numer/`meas'_denom if `meas'_denom>=50 & `meas'_denom!=.
	}
	
	keep pn *_score
	
	* standardize each measure

	foreach meas in `measures' {
		display "standardizing `meas'"
		egen `meas'_mean = mean(`meas'_score)
		egen `meas'_sd = sd(`meas'_score)
		gen `meas'_std = (`meas'_score-`meas'_mean)/`meas'_sd
		drop `meas'_mean `meas'_sd
		rename `meas'_score poc_`meas'_raw
	}

	* generate average measure over all conditions

	display "averaging together for all conditions"
	display "vars:"
	summ *_std

	* generate average measure for each condition

	foreach cond in ami hf pn {
		display "averaging together for condition `cond'"
		display "vars:"
		summ `cond'*_std
	
		* average together the standardized measures
		egen poc_`cond'_std = rowmean(`cond'*_std)
		egen byte poc_`cond'_nmeas = rownonmiss(`cond'*_std)
		drop `cond'*_std
	
		* and standardize that average
		egen poc_`cond'_std_mean = mean(poc_`cond'_std)
		egen poc_`cond'_std_sd = sd(poc_`cond'_std)
		replace poc_`cond'_std = (poc_`cond'_std-poc_`cond'_std_mean)/poc_`cond'_std_sd
		drop poc_`cond'_std_mean poc_`cond'_std_sd
		
		* average of the raw measures (for summary stats table)
		egen poc_`cond'_rawavg = rowmean(poc_`cond'*_raw)
		
		label variable poc_`cond'_std "`cond' process of care / synthetic z-score"
		label variable poc_`cond'_rawavg "`cond' process of care / raw average of scores reported"
		label variable poc_`cond'_nmeas "`cond' process of care / no of measures reported"
	}
	
	* get rid of individual measures
	drop poc_*_raw
	
	* switch condition abbreviations to match our own
	rename poc_hf_std poc_chf428_std
	rename poc_hf_nmeas poc_chf428_nmeas
	rename poc_hf_rawavg poc_chf428_rawavg
	rename poc_pn_std poc_pnu_std
	rename poc_pn_nmeas poc_pnu_nmeas
	rename poc_pn_rawavg poc_pnu_rawavg
	
	* let's call this yidx 1
	gen yidx = 1
	sort pn yidx
	
	save hcompare/poc_processed.dta, replace
	
	* bring in hcahps measures

	use pn year *_score if year==2008 using hcompare/hcahps.dta

	* limit to US states only
	merge 1:1 pn using pos/pos_lastyear.dta, keep(match) keepusing(nonstate) nogenerate
	keep if nonstate==0
	drop nonstate

	* some pn's are merged together to create synthetic pn's
	* the hcahps scores of these pn's must be averaged together so we get one score for
	* the synthetic pn
	* i'll take weighted avg with number of beds in POS data as average
	
	* bring in bed count
	merge m:1 pn year using pos/pos.dta, ///
		keepusing(beds_tot) ///
		keep(master match) generate(match_pos)
	* merge to POS for pn's that for some reason were not included in the POS in that year
	* merge to the last mention of the POS
	merge m:1 pn using pos/pos_lastyear.dta, ///
		keepusing(beds_tot) ///
		keep(master match match_update match_conflict) ///
		generate(match_pos_lastyear) update
	* we should have a count for everyone
	assert match_pos_lastyear != 1
	drop match_pos match_pos_lastyear

	* rebase to dartmouth xwalk synthetic provider numbers
	merge m:1 pn using dartmouth_xwlk/pn2pn_new.dta, ///
		keep(master match) generate(match_dxwlk)
	display "Observations that did not match to dartmouth crosswalk for pn_new (dropping):"
	count if match_dxwlk==1
	drop if match_dxwlk==1
	drop match_dxwlk
	drop pn
	rename pn_new pn
	
	* take weighted average to drop to synthetic pn-year level
	collapse (mean) *_score [aw=beds_tot], by(pn year)

	* standardize each measure within-year

	foreach var of varlist *_score {
		display "standardizing `var'"
		egen `var'_mean = mean(`var'), by(year)
		egen `var'_sd = sd(`var'), by(year)
		gen hcahps_`var'_std_tmp = (`var'-`var'_mean)/`var'_sd
		drop `var'_mean `var'_sd
		rename `var' hcahps_`var'_raw
	}

	* generate average over standardized HCAHPS question scores
	display "averaging together hcahps scores"
	display "vars:"
	summ hcahps_*_std_tmp

	egen hcahps_std = rowmean(hcahps_*_std_tmp)
	egen byte hcahps_questions = rownonmiss(hcahps_*_std_tmp)
	drop hcahps_*_std_tmp

	* and standardize that average
	egen hcahps_std_mean = mean(hcahps_std), by(year)
	egen hcahps_std_sd = sd(hcahps_std), by(year)
	replace hcahps_std = (hcahps_std-hcahps_std_mean)/hcahps_std_sd
	drop hcahps_std_mean hcahps_std_sd
	
	* get rid of individual questions EXCEPT overall
	foreach var of varlist hcahps_*_raw {
		if ("`var'"!="hcahps_overall_score_raw") {
			drop `var'
		}
	}
	
	label var hcahps_std "hcahps / synthetic z-score"
	label var hcahps_questions "hcahps / number of questions reported"
	label var hcahps_overall_score_raw "hcahps / raw overall score"
	
	sort pn year
	save hcompare/hcahps_processed.dta, replace
	clear

}

if ($MAKECOUNTS) {

	foreach cond in $CONDITIONS {
		display "***** WORKING ON `cond' *****"
		
		* first generate count mechancially allocating patients to nearest hospital
		tempfile closest
		
		use pn_closest year if year >= $DISTANCE_STARTYEAR & year <= $DISTANCE_ENDYEAR ///
			using processed_events/`cond'`sampsize'.dta

		gen nclosest_`cond' = 1
		collapse (sum) nclosest_`cond', by(pn_closest year) fast

		rename pn_closest pn
		sort pn year
		save `closest'
		
		* now generate counts for total, ed, non-ed patients
		use pn year ed noned nonedtr samehrr edbefore ipbefore ///
			using processed_events/`cond'`sampsize'.dta, clear
		
		gen npatients_`cond'_tot = 1
	
		* non-ed patients who were transferred in from another facility
		* only defined OP_STARTYEAR onward
		rename nonedtr npatients_`cond'_nonedtr
		
		* tot patients who stayed in the same hrr
		rename samehrr npatients_`cond'_hrr
						
		* make var names appropriate
		foreach var of varlist ed noned {
			rename `var' npatients_`cond'_`var'
		}
		
		* down to the pn-year
		collapse (sum) npatients_*, by(pn year) fast
				
		* bring in mechanical allocation counts
		merge 1:1 pn year using `closest', nogenerate
		
		* an issue: now we'll have missing values
		
		* for the actual counts variables, a missing value is tantamount to a zero
		foreach var of varlist npatients_`cond'_* {
			replace `var' = 0 if `var'==.
		}

		* blank out noned+transfer count for years op not reliably observed
		* FIXME maurice dealing with early years	
		replace npatients_`cond'_nonedtr = . if year < $OP_STARTYEAR
		
		* now generate the noned nontransfer count
		gen npatients_`cond'_nonednontr = npatients_`cond'_noned - npatients_`cond'_nonedtr
		assert npatients_`cond'_nonednontr != . if year >= $OP_STARTYEAR
		
		* for the mechanical allocation variable, a missing value is tantamount to a
		* zero in the years in which we calculated distance
		
		replace nclosest_`cond' = 0 if ///
			nclosest_`cond' == . & ///
			year >= $DISTANCE_STARTYEAR & ///
			year <= $DISTANCE_ENDYEAR
		
		label var nclosest_`cond' "`cond' patients - fake closest patients count"
		label var npatients_`cond'_hrr "`cond' patients - total patients from hospital's HRR"
		label var npatients_`cond'_tot "`cond' patients - total patients"
		label var npatients_`cond'_ed "`cond' patients - ed patients"
		label var npatients_`cond'_noned "`cond' patients - non-ed patients"
		label var npatients_`cond'_nonedtr "`cond' patients - non-ed transfer patients"
		label var npatients_`cond'_nonednontr "`cond' patients - non-ed non-transfer patients"
		
		compress
		sort pn year
		save counts/`cond'`sampsize'.dta, replace
		
		clear
	}
}

if ($BRINGTOGETHER) {
	clear
	
	gen int pn = .
	gen year = .
	
	label var pn "provider number (dartmouth synthetic)"
	label var year "year"
	
	* bring in counts
	foreach cond in $CONDITIONS {
		merge 1:1 pn year using counts/`cond'`sampsize'.dta, nogenerate
	}
	
	* bring in survival, readmission, productivity, spending, resources
	
	* yidx==1 corresponds to 2006-2008, we use it as a 2008 measure

	gen yidx = 1 if year==2008
	
	* yidx==-3 <=> 1994-1996, use as 1996 measure
	* yidx==-2 <=> 1997-1999, use as 1999 measure
	* yidx==-1 <=> 2000-2002, use as 2002 measure
	* yidx==0 <=> 2003-2005, use as 2005 measure
	
	replace yidx = -3 if year==1996
	replace yidx = -2 if year==1999
	replace yidx = -1 if year==2002
	replace yidx = 0 if year==2005
	
	label variable yidx "year index"
		
	* bring in process of care scores
	
	merge m:1 pn yidx using hcompare/poc_processed.dta, nogenerate
	
	* if a hospital had a survival or readmission but no associated count its year field
	* is now set to missing.

	* for yidx==1, let's make it 2008
	replace year=2008 if yidx==1 & year==.

	* yidx==-3 <=> 1994-1996, use as 1996 measure
	replace year=1996 if yidx==-3 & year==.
	* yidx==-2 <=> 1997-1999, use as 1999 measure
	replace year=1999 if yidx==-2 & year==.
	* yidx==-1 <=> 2000-2002, use as 2002 measure
	replace year=2002 if yidx==-1 & year==.
	* yidx==0 <=> 2003-2005, use as 2005 measure
	replace year=2005 if yidx==0 & year==.
	
	* bring in hcahps measure
	merge 1:1 pn year using hcompare/hcahps_processed.dta, nogenerate
	
	* bring in US news measure
	merge 1:1 pn year using usnews/usnews.dta, generate(match_usnews)

	* top rankings
	display "US news rankings:"
	summ *_ranking
	egen topranking = rowmin(*_ranking)
	gen byte anytop10 = (topranking <= 10 & topranking!=.) if year==2008 & match_usnews==3
	gen byte anytop25 = (topranking <= 25 & topranking!=.) if year==2008 & match_usnews==3
	gen byte anyranked = topranking!=. if year==2008 & match_usnews==3
	label var anytop10 "us news ranked top 10 any specialty"
	label var anytop25 "us news ranked top 25 any specialty"
	label var anyranked "us news ranked top 50 any specialty"

	egen anyeligible = rowmax(*_eligible)
	label var anyeligible "us news eligible to be ranked any specialty"

	foreach spec in heart resp ortho {
		gen byte `spec'_top10 = (`spec'_ranking <= 10 & `spec'_ranking!=.) if year==2008 & match_usnews==3
		gen byte `spec'_top25 = (`spec'_ranking <= 25 & `spec'_ranking!=.) if year==2008 & match_usnews==3
		gen byte `spec'_ranked = `spec'_ranking!=. if year==2008 & match_usnews==3
		
		label var `spec'_top10 "us news ranked top 10 in `spec'"
		label var `spec'_top25 "us news ranked top 25 in `spec'"
		label var `spec'_ranked "us news ranked top 50 in `spec'"
	}

	foreach spec in cancer ent endo gastro ger gyne kidney neuro uro {
		drop `spec'_eligible
		drop `spec'_ranking
	}
	
	drop match_usnews
	
	* if a PN had no patients and reported no quality measures in a year, then an obs for
	* that pn-year does not exist in the dataframe. but it should exist with a count set
	* to zero
	
	fillin pn year
	drop _fillin
	
	* now we'll merge in provider location info and then replace the missing counts
	* with zeroes

	* now deal with provider location variables

	* merge provider to dartmouth data to get zip/HRR/HSA
	rename pn pn_new
	merge m:1 pn_new using dartmouth_xwlk/pn_new.dta, nogenerate ///
		keepusing(zip hrrnum hsanum) ///
		assert(match using) keep(match)
	rename pn_new pn
	
	* merge to lat/lon
	merge m:1 zip using zip2latlon/zip, ///
		keep(master match) keepusing(lat lon src_sas) ///
		generate(match_zip)
	
	* drop hospitals we couldn't match to a lat/lon (or we could only match with the
	* geonames data)
	* these hospitals would have been removed from the processed_events file anyway
	* so we'd have no counts for them
	
	display "observations w/ zips that couldn't be matched to lat/lon (dropping):"
	count if match_zip==1
	display "Offending ZIP codes:"
	tab zip if match_zip==1
	drop if match_zip==1
	drop match_zip
	
	display "observations w/ zips that matched to geonames db (dropping):"
	count if src_sas==0
	display "Offending ZIP codes:"
	tab zip if src_sas==0
	drop if src_sas==0
	drop src_sas

	rename zip zip_pn
	rename hrrnum hrrnum_pn
	rename hsanum hsanum_pn
	rename lon lon_pn
	rename lat lat_pn

	* convert to radians
	replace lon_pn = lon_pn*(_pi/180)
	replace lat_pn = lat_pn*(_pi/180)

	* now deal with missing values in the npatients and nclosest variables
	* need to do this here 'cause the POC, US news, hcahps data files may have
	* added observations to the dataset, or we may have added observations from the
	* fillin command above
	
	* for the actual counts variables, a missing value is tantamount to a zero
	foreach var of varlist npatients_*_* {
		replace `var' = 0 if `var'==.
	}

	* blank out noned+transfer count for years op not reliably observed
	* FIXME maurice dealing with early years
	foreach var of varlist npatients_*_nonedtr npatients_*_nonednontr {
		replace `var' = . if year < $OP_STARTYEAR
	}
	
	* for the mechanical allocation counts, a missing value is tantamount to a
	* zero in the years in which we calculated distance
	foreach var of varlist nclosest_* {
		replace `var' = 0 if ///
			`var' == . & ///
			year >= $DISTANCE_STARTYEAR & ///
			year <= $DISTANCE_ENDYEAR
	}
	
	* fill in the yidx variables too, for hosps that lacked a survival/readmission
	* but had entries in hospital compare etc.
	* yidx==-3 <=> 1996
	replace yidx = -3 if year==1996
	* yidx==-2 <=> 1999
	replace yidx = -2 if year==1999
	* yidx==-1 <=> 2002
	replace yidx = -1 if year==2002
	* yidx==0 <=> 2005
	replace yidx = 0 if year==2005
	* yidx==1 <=> 2008
	replace yidx = 1 if year==2008
	
	sort pn year
	xtset pn year
	
	* make the overall sample indicator and the allocation metrics
	foreach cond in $CONDITIONS {
		foreach pfx in tot ed noned nonedtr nonednontr {
			* "overall" sample
			* restrictions:
			* the year is the base year of analysis
			* we observe (total, ed, non ed, etc.) patients at the hospital in the year (>=1 patient in 2008)
			gen byte insamp_`cond'_`pfx' = ///
				year==`baseyear' &	npatients_`cond'_`pfx' >= 1 & npatients_`cond'_`pfx' != .
			label var insamp_`cond'_`pfx' "`cond' in sample indicator - `pfx' patients"

			* static allocation measure 
			gen lnpatients_`cond'_`pfx' = ln(npatients_`cond'_`pfx')
			label var lnpatients_`cond'_`pfx' "`cond' static alloc measure - `pfx' patients"
				
			* dynamic allocation measure
			gen growth_`cond'_`pfx' = ///
				(F`d'.npatients_`cond'_`pfx' - npatients_`cond'_`pfx') ///
				/ ///
				( (1/2)*(F`d'.npatients_`cond'_`pfx' + npatients_`cond'_`pfx') )
			label var growth_`cond'_`pfx' "`cond' dynamic alloc measure - `pfx' patients - `d' year growth"

		}

		*** deal with the mechanical allocation (to closest hospital) stuff

		* "static allocation" overall sample

		* make same restrictions as regular static allocation sample but use mechanical
		* count instead of true count
		gen byte insamp_closest_`cond' = ///
			year==`baseyear' & nclosest_`cond' >= 1 & nclosest_`cond' != .
		label var insamp_closest_`cond' "`cond' in sample indicator - fake closest patients regression"

		* "static allocation" measure
		gen lnclosest_`cond' = ln(nclosest_`cond')
		label var lnclosest_`cond' "`cond' static alloc measure - fake closest patients regression"

		* "dynamic allocation" measure
		gen gclosest_`cond' = ///
			(F`d'.nclosest_`cond' - nclosest_`cond') ///
			/ ///
			( (1/2)*(F`d'.nclosest_`cond' + nclosest_`cond') )
		label var gclosest_`cond' "`cond' dynamic alloc measure - fake closest patients regression"
	}

	* long sample indicator (for year indices spanning 1996-2008)
	foreach cond in $CONDITIONS_SURV {
		gen byte longsamp_`cond'_tot = ///
			yidx!=. & year>=1996 & npatients_`cond'_tot >= 1 & npatients_`cond'_tot != .
		label var longsamp_`cond'_tot "`cond' in sample indicator - tot patients"
	}

	
	* make the bootstrap samples
	
	isid pn year

	forvalues bsidx = 0/$NBOOT {
		preserve
		
		* bring in the bootstrap weight
		merge m:1 hrrnum_pn using bootsample/bootsample`sampsize', assert(match) ///
			nogenerate keepusing(freq_bs`bsidx')
		
		* create freq_bs`bsidx' observations for each original observation
		drop if freq_bs`bsidx'==0
		expand freq_bs`bsidx'
		
		* create the new hrr IDs at the level of the bootstrap
		egen bootseq_within_hrrnum = seq(), by(pn year)
		egen hrrnum_pn_boot = group(bootseq_within_hrrnum hrrnum_pn)
		egen pn_boot = group(pn hrrnum_pn_boot)
		
		label var bootseq_within_hrrnum "instance of bootstrap HRR derived from original HRR"
		label var hrrnum_pn_boot "fake HRR ID = group(bootseq_within_hrrnum hrrnum_pn)"
		label var pn_boot "fake provider id = group(pn hrrnum_pn_boot)"
		
		* create HRR indicators, which will be used with ebayes2 later
		qui tab hrrnum_pn_boot, gen(hrI)
		
		* xtset the data
		sort year pn_boot
		xtset pn_boot year
		
		* move old HRR ID and hospital ID out of the way
		rename pn pn_preboot
		rename hrrnum_pn hrrnum_pn_preboot
	
		* merge in survival, readmission, productivity, and input (last two for ami only)
		* make the allocation metrics and in sample indicators
		foreach cond in $CONDITIONS {
	
			merge m:1 pn_preboot hrrnum_pn_preboot bootseq_within_hrrnum yidx ///
				using surv_read/`cond'`sampsize'/bs`bsidx'.dta, nogenerate ///
				assert(master match) keepusing( ///
					surv_`cond'_npats surv_`cond'_fe surv_`cond'_se surv_`cond'_mean ///
					sars_`cond'_npats sars_`cond'_fe sars_`cond'_se ///
					snra_`cond'_npats snra_`cond'_fe snra_`cond'_se ///
					read_`cond'_npats read_`cond'_fe read_`cond'_se read_`cond'_mean ///
					rars_`cond'_npats rars_`cond'_fe rars_`cond'_se ///
					rnra_`cond'_npats rnra_`cond'_fe rnra_`cond'_se ///
					surv_read_`cond'_v2 read_surv_`cond'_v2 surv_read_`cond'_v12 ///
				)
			
			* drop the bivariate survival-readmission component for hip/knee
			* we don't need this because the multivariate allocation won't include
			* survival for this condition
			if ("`cond'"=="hipkne") {
				drop surv_read_`cond'_v2 read_surv_`cond'_v2 surv_read_`cond'_v12
			}

	
			if ("`cond'"=="ami") {
				merge m:1 pn_preboot hrrnum_pn_preboot bootseq_within_hrrnum yidx ///
					using surv_read/`cond'`sampsize'/bs`bsidx'.dta, nogenerate ///
					assert(master match) keepusing( ///
						prsp_`cond'_npats prsp_`cond'_fe prsp_`cond'_se ///
						prre_`cond'_npats prre_`cond'_fe prre_`cond'_se ///
						drgw_`cond'_npats drgw_`cond'_fe drgw_`cond'_se ///
						spnd_`cond'_npats spnd_`cond'_fe spnd_`cond'_se ///
						surv_drgw_`cond'_v2 drgw_surv_`cond'_v2 surv_drgw_`cond'_v12 ///
						surv_spnd_`cond'_v2 spnd_surv_`cond'_v2 surv_spnd_`cond'_v12 ///
					)
			}

			* make EB adjusted effects
			* only EB adjust WITHIN the analysis sample

			local all_meas "surv sars snra read rars rnra"
			if ("`cond'"=="ami") {
				local all_meas "`all_meas' prsp prre drgw spnd"
			}
		
			* iterate over the measures
			foreach meas in `all_meas' {

				if ("`meas'" == "surv") {
					local longmeas "surv"
				}
				else if ("`meas'" == "sars") {
					local longmeas "surv-ars"
				}
				else if ("`meas'" == "snra") {
					local longmeas "surv-nora"
				}
				else if ("`meas'"=="read") {
					local longmeas "readm"
				}
				else if ("`meas'"=="rars") {
					local longmeas "readm-ars"
				}
				else if ("`meas'"=="rnra") {
					local longmeas "readm-nora"
				}
				else if ("`meas'"=="prsp") {
					local longmeas "prod-spend"
				}
				else if ("`meas'"=="prre") {
					local longmeas "prod-real"
				}
				else if ("`meas'"=="drgw") {
					local longmeas "ldrgwt"
				}
				else if ("`meas'"=="spnd") {
					local longmeas "lspend"
				}
				
			
				* perform univariate adjustment
				* only adjust within the analysis sample
				display "performing univariate EB adjustment for `longmeas'"
				capture noisily ebayes `meas'_`cond'_fe `meas'_`cond'_se ///
					if `meas'_`cond'_fe!=. & insamp_`cond'_tot==1, ///
					absorb(hrrnum_pn_boot) gen(`meas'_`cond'_eb) ///
					var(`meas'_`cond'_var) uvar(`meas'_`cond'_uvar)
				
				if (_rc!=0) {
					* adjustment failed
					display "EB procedure failed to converge"
					if ("`sampsize'"=="100") {
						*  in the full sample this is a problem
						error 430
					}
					else {
						* in smaller samples, no big deal
						* copy in the raw values
						display "continuing because it's < 100% sample"
						gen `meas'_`cond'_eb = `meas'_`cond'_fe ///
							if `meas'_`cond'_fe!=. & insamp_`cond'_tot==1
						qui summ `meas'_`cond'_fe ///
							if `meas'_`cond'_fe!=. & insamp_`cond'_tot==1
						replace `meas'_`cond'_var = r(Var) ///
							if `meas'_`cond'_fe!=. & insamp_`cond'_tot==1
						replace `meas'_`cond'_uvar = r(Var) ///
							if `meas'_`cond'_fe!=. & insamp_`cond'_tot==1
					}
				}
			
				label var `meas'_`cond'_uvar "`cond' `longmeas' / underlying var"
				label var `meas'_`cond'_var "`cond' `longmeas' / underlying var (w/in-HRR)"
				label var `meas'_`cond'_eb "`cond' `longmeas' / EB-adj FE"

				* perform bivariate EB adjustment between survival and the measure
				* non-hipkne: survival and readmission (within HRR + poc + hcahps)
				*             this is for the multivariate allocation regression
				* ami-only: survival and input measures (within HRR)
				*           this is for the productivity allocation regressions
				if ( ///
					("`cond'"!="hipkne" & "`meas'"=="read") | ///
					("`cond'"=="ami" & ("`meas'"=="drgw"|"`meas'"=="spnd") ) ///
				) {
					
					* set variables to adjust within (beyond HRR)
					* and set additional if statement component
					
					if ("`meas'"=="read") {
						* readmission measure: all conditions get hcahps
						local extra_within "hcahps_std"
						local extra_if "& hcahps_std!=."
						
						* ami, chf, pnu get poc
						if (regexm("`cond'","^(ami|chf428|pnu)$")) {
							local extra_within "`extra_within' poc_`cond'_std"
							local extra_if "`extra_if' & poc_`cond'_std!=."
						}
					}
					else {
						* input measures: no additional controls
						local extra_within ""
						local extra_if ""
					}
					
					display "performing bivariate EB adjustment for surv and `meas'"
					capture noisily ebayes2_reml ///
						surv_`cond'_fe `meas'_`cond'_fe ///
						surv_`meas'_`cond'_v2 `meas'_surv_`cond'_v2 surv_`meas'_`cond'_v12 ///
						hrI* `extra_within' ///
						if surv_`cond'_fe!=. & `meas'_`cond'_fe!=. ///
							& insamp_`cond'_tot==1 `extra_if', ///
						noconstant ///
						gen1(surv_`meas'_`cond'_eb2) gen2(`meas'_surv_`cond'_eb2) ///
						s11(surv_`meas'_`cond'_var2) s22(`meas'_surv_`cond'_var2) ///
						s12(surv_`meas'_`cond'_cov) ///
						us11(surv_`meas'_`cond'_uvar2) us22(`meas'_surv_`cond'_uvar2) ///
						us12(surv_`meas'_`cond'_ucov)

					if (_rc!=0) {
						* adjustment failed
						display "bivariate EB procedure failed to converge"
						if ("`sampsize'"=="100") {
							*  in the full sample this is a problem
							error 430
						}
						else {
							* in smaller samples, no big deal
							* copy in the raw values
							display "continuing because it's < 100% sample"
							replace surv_`meas'_`cond'_eb2 = surv_`cond'_fe ///
								if surv_`cond'_fe!=. & `meas'_`cond'_fe!=. ///
								& insamp_`cond'_tot==1 `extra_if'
							replace `meas'_surv_`cond'_eb2 = `meas'_`cond'_fe ///
								if surv_`cond'_fe!=. & `meas'_`cond'_fe!=. ///
								& insamp_`cond'_tot==1 `extra_if'

							qui correl surv_`cond'_fe `meas'_`cond'_fe ///
								if surv_`cond'_fe!=. & `meas'_`cond'_fe!=. ///
								& insamp_`cond'_tot==1 `extra_if', covar

							replace surv_`meas'_`cond'_var2 = r(Var_1) ///
								if surv_`cond'_fe!=. & `meas'_`cond'_fe!=. ///
								& insamp_`cond'_tot==1 `extra_if'
							replace `meas'_surv_`cond'_var2 = r(Var_2) ///
								if surv_`cond'_fe!=. & `meas'_`cond'_fe!=. ///
								& insamp_`cond'_tot==1 `extra_if'
							replace surv_`meas'_`cond'_uvar2 = r(Var_1) ///
								if surv_`cond'_fe!=. & `meas'_`cond'_fe!=. ///
								& insamp_`cond'_tot==1 `extra_if'
							replace `meas'_surv_`cond'_uvar2 = r(Var_2) ///
								if surv_`cond'_fe!=. & `meas'_`cond'_fe!=. ///
								& insamp_`cond'_tot==1 `extra_if'

						}
					}

	
					* label survival vars
					label var surv_`meas'_`cond'_eb2 "`cond' surv / EB-adj FE (bivariate)"
					label var surv_`meas'_`cond'_var2 "`cond' surv / underlying var (w/inHRR-yidx, bivariate)"
					label var surv_`meas'_`cond'_uvar2 "`cond' surv / underlying var (uncond, bivariate)"
					
					* label this measure
					label var `meas'_surv_`cond'_eb2 "`cond' `longmeas' / EB-adj FE (bivariate)"
					label var `meas'_surv_`cond'_var2 "`cond' `longmeas' / underlying var (w/inHRR-yidx, bivariate)"
					label var `meas'_surv_`cond'_uvar2 "`cond' `longmeas' / underlying var (uncond, bivariate)"
	
					* label the interactions
					label var surv_`meas'_`cond'_cov "`cond' surv-`longmeas' / underlying cov (w/inHRR-yidx, bivariate)"
					label var surv_`meas'_`cond'_ucov "`cond' surv-`longmeas' / underlying cov (uncond, bivariate)"
				}
			}
		}
		
		* for hip/knee, EB adjust readmission measure within HRR + hcahps
		* for the multivariate allocation regression
		display "performing EB adjustment for hip/knee readmission for multivariate allocation"
		capture noisily ebayes read_hipkne_fe read_hipkne_se hcahps_std ///
			if read_hipkne_fe!=. & insamp_hipkne_tot==1 & hcahps_std!=., ///
			absorb(hrrnum_pn_boot) gen(read_hipkne_eb_m) ///
			var(read_hipkne_var_m) uvar(read_hipkne_uvar_m)
		
		if (_rc!=0) {
			* adjustment failed
			display "EB procedure failed to converge"
			if ("`sampsize'"=="100") {
				*  in the full sample this is a problem
				error 430
			}
			else {
				* in smaller samples, no big deal
				* copy in the raw values
				display "continuing because it's < 100% sample"
				gen read_hipkne_eb_m = read_hipkne_fe ///
					if read_hipkne_fe!=. & insamp_hipkne_tot==1 & hcahps_std!=.
				qui summ read_hipkne_fe ///
					if read_hipkne_fe!=. & insamp_hipkne_tot==1 & hcahps_std!=.
				replace read_hipkne_var_m = r(Var) ///
					if read_hipkne_fe!=. & insamp_hipkne_tot==1 & hcahps_std!=.
				replace read_hipkne_uvar_m = r(Var) ///
					if read_hipkne_fe!=. & insamp_hipkne_tot==1 & hcahps_std!=.
			}
		}
	
		label var read_hipkne_uvar_m "hipkne readm / for multi alloc / underlying var"
		label var read_hipkne_var_m "hipkne readm / for multi alloc / underlying var (w/in-HRR)"
		label var read_hipkne_eb_m "hipkne readm / for multi alloc / EB-adj FE"

		drop hrI*
		
		foreach cond in $CONDITIONS_SURV {
			* bring in longsurv effects
			merge m:1 pn_preboot hrrnum_pn_preboot bootseq_within_hrrnum yidx ///
				using surv_read/long_`cond'`sampsize'/bs`bsidx'.dta, nogenerate ///
				assert(master match) keepusing( ///	
					longsurv_`cond'_npats longsurv_`cond'_mean ///
					longsurv_`cond'_fe longsurv_`cond'_se ///
					longsars_`cond'_fe longsars_`cond'_se ///
					longsnra_`cond'_fe longsnra_`cond'_se ///	
				)
		
			* make EB adjusted longsurv effects

			* iterate over the measures
			foreach meas in surv sars snra {

				if ("`meas'" == "surv") {
					local longmeas "long surv"
				}
				else if ("`meas'" == "sars") {
					local longmeas "long surv-ars"
				}
				else if ("`meas'" == "snra") {
					local longmeas "long surv-nora"
				}
					
				* perform univariate adjustment
				* only adjust within the analysis sample
				display "performing univariate EB adjustment for `longmeas'"
				capture noisily ebayes long`meas'_`cond'_fe long`meas'_`cond'_se ///
					if long`meas'_`cond'_fe!=. & longsamp_`cond'_tot==1, ///
					absorb(hrrnum_pn_boot) by(yidx) ///
					gen(long`meas'_`cond'_eb) var(long`meas'_`cond'_var) uvar(long`meas'_`cond'_uvar)
			
				if (_rc!=0) {
					* adjustment failed
					display "EB procedure failed to converge"
					if ("`sampsize'"=="100") {
						*  in the full sample this is a problem
						error 430
					}
					else {
						* in smaller samples, no big deal
						* copy in the raw values
						display "continuing because it's < 100% sample"
						gen long`meas'_`cond'_eb = long`meas'_`cond'_fe ///
							if long`meas'_`cond'_fe!=. & longsamp_`cond'_tot==1
						qui summ long`meas'_`cond'_fe ///
							if long`meas'_`cond'_fe!=. & longsamp_`cond'_tot==1
						replace long`meas'_`cond'_var = r(Var) ///
							if long`meas'_`cond'_fe!=. & longsamp_`cond'_tot==1
						replace long`meas'_`cond'_uvar = r(Var) ///
							if long`meas'_`cond'_fe!=. & longsamp_`cond'_tot==1
					}
				}
		
				label var long`meas'_`cond'_uvar "`cond' `longmeas' / underlying var"
				label var long`meas'_`cond'_var "`cond' `longmeas' / underlying var (w/in-HRR-yidx)"
				label var long`meas'_`cond'_eb "`cond' `longmeas' / EB-adj FE"
			}
		}
		
		sort pn_boot year
		save counts_allmeas/bs_`sampsize'/bs`bsidx'.dta, replace
		clear
		
		restore

	}
}

log close
