* Given two fixed effects regressions on the same set of observations,
* computes the covariance matrix of each individual's two fixed effects

* Syntax
* fese2 ( y1 x1 [if] ) ( y2 x2 [if] )  , v11( ) v22( ) v12( )
*     [ ehat1( ) ehat2( ) ]
* y1 x1 [if] - first regression equation (optional if sample selection)
* y2 x2 [if] - second regression equation (optional if sample selection)
* v11 variance of fixed effect in first regression
* v22 variance of fixed effect in second regression
* v12 covariance of fixed effects in two regressions
* ehat1 fitted disturbance terms from first regression
* ehat2 fitted disturbance terms from second regression
* specifying these two is optional but may speed things up
* because it allows fese2 to calculate the covariance matrices
* without re-running xtreg
* NOTE: data must be xtset


* 2.1.0A Jan 2013 Adam Sacarny - bivariate case
* 2.0.0A Jan 2012 Adam Sacarny - faster for homoscedastic case
*! 1.0.2 5 Sep 2008 Austin Nichols
* 1.0.2  5 Sep 2008 fixes bug that ignores i() and absorb()
* 1.0.1 28 Feb 2008 makes abs option absorb and exits with error in no-regressor case
* 1.0.0 10 Feb 2008 Austin Nichols
program fese2, sortpreserve

	forvalues i = 1/2 {

		gettoken eqn`i' 0 : 0, parse(" ,[") match(paren)

		if ("`paren'" != "(") {
			display as error "need an equation `i' in parentheses!"
			error 198
		}
		
		* temporarily store the remaining component of the call to fese2 so that
		* we can parse the component pertaining to this equation
		local tempcall "`0'"
		local 0 "`eqn`i''"
		display "equation `i' called with `0'"
		
		syntax varlist(min=2) [if]
		
		marksample touse`i'
		gettoken y`i' x`i': varlist
		
		* deal with collinearity
		_rmdcoll `y`i'' `x`i'' if `touse`i''
		if (r(k_omitted) > 0) {
			display as error "collinearity in RHS variables in equation `i'"
			error 198
		}
		
		* restore the remaining component of the call to fese2
		local 0 "`tempcall'"

	}
	
	* parse the rest of the call to fese2
	syntax , v11(name) v22(name) v12(name) [ehat1(name) ehat2(name)] 
	
	* deal with grouping variable
	local group :char _dta[iis]
	if ("`group'"=="") {
		display as error "data must be xtset"
		error 198
	}
	
	foreach var in `v11' `v22' `v12' {
		capture confirm variable `var'
		if (!_rc) {
			display as error "variable `var' must not exist in data yet"
			error 110
		}
	}
	
	* placeholders for covariance matrix
	gen `v11' = .
	gen `v22' = .
	gen `v12' = .
	
	forvalues i = 1/2 {
		if ("`ehat`i''"=="") {
			tempvar ehat`i'
			qui xtreg `y`i'' `x`i'' if `touse`i'', fe
			predict `ehat`i'', e
		}
	}
	
	* sample of any observation in X1 or X2
	tempvar touse_either
	gen byte `touse_either' = `touse1' | `touse2'
	
	sort `group'

	mata: fese2("`y1'","`x1'","`touse1'","`ehat1'","`y2'","`x2'","`touse2'","`ehat2'","`group'","`touse_either'","`v11' `v22' `v12'")
	
	* for each group, we copy the v11/v22/v12 estimate into any observation with either
	* an fe1 or fe2. so we could an observation with e.g. no FE1 but an FE2,
	* and we'd have a value for v11 for that observation
	
	* fix it so v11 is only defined for the fe1 sample, v22 for the fe2 sample, and
	* v12 for fe1&fe2 sample
	replace `v11'=. if !`touse1'
	replace `v22'=. if !`touse2'
	replace `v12'=. if !(`touse1'&`touse2')
	
end

	
version 9.2
mata:
void fese2(string scalar y1_st, string scalar X1_st, string scalar touse1_st, string scalar e1_st, string scalar y2_st, string scalar X2_st, string scalar touse2_st, string scalar e2_st, string scalar gp_st, string scalar touse_either_st, string scalar sv_st) {

	// open view to selector variables
	st_view(touse, ., (touse1_st,touse2_st), touse_either_st)
	
	// the X1 sample
	st_view(y1, ., y1_st, touse_either_st)
	st_view(X1, ., X1_st, touse_either_st)
	st_view(e1, ., e1_st, touse_either_st)

	// the X2 sample
	st_view(y2, ., y2_st, touse_either_st)
	st_view(X2, ., X2_st, touse_either_st)
	st_view(e2, ., e2_st, touse_either_st)
	
	// the groups
	st_view(gp, ., gp_st, touse_either_st)

	// the variables for saving the variances and covariances
	st_view(sv, ., tokens(sv_st), touse_either_st)
	
	info = panelsetup(gp, 1)
	
	// number of observations in 1 or 2
	N = rows(X1)
	// number of observations in 1, 2, and 1&2
	N1 = sum(touse[.,1])
	N2 = sum(touse[.,2])
	N12 = sum(touse[.,1] :& touse[.,2])
		
	// number of groups
	G = rows(info)
	// X variables
	K1 = cols(X1)
	K2 = cols(X2)
	
	// demeaned X's
	X1t=J(N,K1,.)
	X2t=J(N,K2,.)
	
	// demeaned X's orthogonalized with respect to the opposite sample's fixed effects
	M2X1t = J(N,K1,.)
	M1X2t = J(N,K2,.)
	
	// X_bar's at the group level
	X1_bar_gp = J(G,K1,.)
	X2_bar_gp = J(G,K2,.)

	// observations in each group
	T1 = J(G,1,.)
	T2 = J(G,1,.)
	// overlapping observations between 1 and 2
	T12 = J(G,1,.)
	
	// demeaned X sum's for observations common to both groups, at group level
	X1t_sum_common_gp = J(G,K1,.)
	X2t_sum_common_gp = J(G,K2,.)
	
	for (i=1; i<=G; i++) {
		//printf("processing group ")
		//printf(strofreal(i))
		//printf("\n")

		// bring in data for group i
		X1i = panelsubmatrix(X1, i, info)
		X2i = panelsubmatrix(X2, i, info)
		tousei = panelsubmatrix(touse, i, info)
		
		// for each set of X, set obs that were excluded from regression to missing
		
		// BUG!!!
		// if there is just one observation, (tousei[.,1]:==0) is a 1x1 matrix
		// and suppose that matrix is (0)
		// selectindex doesn't know if it's a row vector or a column vector
		// we want it to be a column vector but selectindex resolves the ambiguity by
		// saying it's a row vector. then selectindex() assumes i'm trying to select
		// none of the columns and returns a 1x0 null matrix
		// so rows(selectindex()) in this case == 1, NOT be the number of rows that
		// are being selected (which is 0)!
		
		// so we MUST count rows using sum(tousei[.,1]:==0)
		
		rowindices1 = selectindex(tousei[.,1]:==0)
		X1i[rowindices1,.] = J(sum(tousei[.,1]:==0),K1,.)
		rowindices2 = selectindex(tousei[.,2]:==0)
		X2i[rowindices2,.] = J(sum(tousei[.,2]:==0),K2,.)
		
		// set X_bar for the group
		X1_bar_gp[i,.] = mean(X1i)
		X2_bar_gp[i,.] = mean(X2i)
		
		//matlist( ( X1_bar_gp[i,.] \ X2_bar_gp[i,.] ) )
		
		// set observations for each group
		T1[i,1] = sum(tousei[.,1])
		T2[i,1] = sum(tousei[.,2])
		T12[i,1] = sum(tousei[.,1] :& tousei[.,2])
				
		//matlist((T1[i,1],T2[i,1],T12[i,1]))
		
		// demeaned data for group i
		X1ti = X1i:-mean(X1i)
		X2ti = X2i:-mean(X2i)
		
		// copy into X1t and X2t
		X1t[|info[i,1],1 \ info[i,2],K1 |] = X1ti
		X2t[|info[i,1],1 \ info[i,2],K2 |] = X2ti
						
		// sum demeaned X's for common observations
		// selectindex bug shouldn't matter here because under no circumstances
		// will we have one observation in the group with tousei[1,1]==tousei[1,2]==0
		rowscommon = selectindex(tousei[.,1] :& tousei[.,2])
		X1t_sum_common_gp[i,.] = colsum( X1ti[rowscommon,.] )
		X2t_sum_common_gp[i,.] = colsum( X2ti[rowscommon,.] )
		
		// demeaned data for group i, orthogonalized with respect to opposite
		// sample's fixed effects

		// construct all missing placeholders
		M2X1ti = J(rows(X1ti),K1,.)
		M1X2ti = J(rows(X2ti),K2,.)
		
		// copy in common observations
		M2X1ti[rowscommon,.] = X1ti[rowscommon,.]
		M1X2ti[rowscommon,.] = X2ti[rowscommon,.]
		
		// subtract the sum of the common observations divided by the number of
		// observations in the opposite sample
		M2X1ti[.,.] = M2X1ti :- (X1t_sum_common_gp[i,.]/T2[i,1])
		M1X2ti[.,.] = M1X2ti :- (X2t_sum_common_gp[i,.]/T1[i,1])
		
		// copy into M2X1t and M1X2t
		M2X1t[|info[i,1],1 \ info[i,2],K1 |] = M2X1ti
		M1X2t[|info[i,1],1 \ info[i,2],K2 |] = M1X2ti
		
		//matlist((M2X1ti,X1ti,M1X2ti,X2ti))
		
		//matlist( ( X1t_sum_common_gp[i,.] \ X2t_sum_common_gp[i,.] ) )

	}
	
	// number of groups in 1, 2, and 1&2
	G1 = sum(T1 :> 0)
	G2 = sum(T2 :> 0)
	G12 = sum(T12 :> 0)
	
	// create (Xt'Xt)^-1 for X1 and X2
	X1tX1t_inv=cross(X1t,X1t)
	_invsym(X1tX1t_inv)
	X2tX2t_inv=cross(X2t,X2t)
	_invsym(X2tX2t_inv)
		
	// create X2t'X1t
	X2tX1t = cross(X2t,X1t)	
	// create (X1t'X1t)^-1 * (X1t'X2t) * (X2t'X2t)^-1
	inner = cross(X1tX1t_inv, cross(X2tX1t,X2tX2t_inv) )
		
	// denominators for the sigma matrix diagonal elements
	denom11 = N1-K1-G1
	denom22 = N2-K2-G2
		
	// degree of freedom adjustment for the X1 and X2 variables
	dof_Xvars = trace(cross(M2X1t,M2X1t),X1tX1t_inv)
	dof_Xvars = dof_Xvars + trace(cross(M1X2t,M1X2t),X2tX2t_inv)
	dof_Xvars = dof_Xvars - trace(X2tX1t,inner)
	
	// degree of freedom adjustment for the fixed effects
	//printf("G12,sum term\n")
	//matlist((G12,sum( ( (T1:-T12):/T1 ) :* ( (T2:-T12):/T2 ) )))
	dof_FEs = G12 - sum( ( (T1:-T12):/T1 ) :* ( (T2:-T12):/T2 ) )
	//printf("N12, dof X, dof 1\n")
	//matlist((N12,dof_Xvars,dof_FEs))
	//printf("sigma_12 numerator,denominator\n")
	//matlist((cross(e1,touse[.,1] :& touse[.,2], e2),N12-dof_Xvars-dof_FEs))
	
	// denominator for the cross term
	denom12 = N12 - dof_Xvars - dof_FEs
	
	// estimate the sigma matrix
	//printf("sse11: "+strofreal(sum(e1:^2))+"\n")
	sigma11 = cross(e1,touse[.,1],e1)/denom11
	sigma22 = cross(e2,touse[.,2],e2)/denom22
	sigma12 = cross(e1,touse[.,1] :& touse[.,2], e2)/denom12

	printf("sigma matrix\n")
	matlist((sigma11,sigma22,sigma12))
	
	for (i=1; i<=rows(info); i++) {		
		// v11 = 1/T1 + ( X1bar * (X1t'X1t)^-1 * X1bar' )
		v11 = (1/T1[i,1]) + cross(cross(X1_bar_gp[i,.]',X1tX1t_inv)',X1_bar_gp[i,.]')
		
		// v22 = 1/T1 + ( X2bar * (X2t'X2t)^-1 * X2bar' )
		v22 = (1/T2[i,1]) + cross(cross(X2_bar_gp[i,.]',X2tX2t_inv)',X2_bar_gp[i,.]')
	
		// v12
		
		// first part: T12/(T1*T2)
		v12_one = T12[i,1]/(T1[i,1]*T2[i,1])
		
		// second part: X1bar * (X1t'X1t)^-1 * (X1t'X2t) * (X2t'X2t)^-1 * X2bar'
		v12_two = cross(cross(X1_bar_gp[i,.]',inner)',X2_bar_gp[i,.]')
		
		//printf("hi\n")
		//matlist(X1_bar_gp[i,.])
		//matlist(X1tX1t_inv')
		//matlist(X1t_sum_common_gp[i,.]')
		// third part: (1/T2) * X1bar * (X1t'X1t)^-1 * X1t_sum_common
		v12_three = (1/T2[i,1])*cross(X1_bar_gp[i,.]',cross(X1tX1t_inv,X1t_sum_common_gp[i,.]'))
		
		// fourth part: (1/T1) * X2bar * (X2t'X2t)^-1 * X2t_sum_common
		v12_four = (1/T1[i,1])*cross(X2_bar_gp[i,.]',cross(X2tX2t_inv,X2t_sum_common_gp[i,.]'))
		
		//matlist((v12_one,v12_two,v12_three,v12_four))
		v12 = v12_one + v12_two - v12_three - v12_four
		
		//matlist((v11,v22,v12))
		
		vvvi = (sigma11,sigma22,sigma12):*(v11,v22,v12)
		
		//matlist((info[i,2],info[i,1],rows(sv),cols(sv)))
		
		sv[| info[i,1],1 \ info[i,2],3 |] = J(info[i,2]-info[i,1]+1,1,vvvi)
		
	}

}

void matlist(
    real matrix X,
    | string scalar fmt
    )
{
    real scalar     i, j, wd, rw, cw
    string scalar   sfmt

    if (fmt=="") fmt = "%g"
    wd = strlen(sprintf(fmt,-1/3))

    if (length(X)==0) return

    rw = trunc(log10(rows(X))) + 1
    cw = trunc(log10(cols(X))) + 1
    wd = max((cw,wd)) + 2
    sfmt = "%"+strofreal(wd)+"s"

    printf("{txt}"+(2+rw+1+1)*" ")
    for (j=1;j<=cols(X);j++) {
        printf(sfmt+" ", sprintf("%g", j))
    }
    printf("  \n")
    printf((2+rw+1)*" " + "{c TLC}{hline " +
        strofreal((wd+1)*cols(X)+1) + "}{c TRC}\n")
    for (i=1;i<=rows(X);i++) {
        printf("{txt}  %"+strofreal(rw)+"s {c |}{res}", sprintf("%g", i))
        for (j=1;j<=cols(X);j++) {
            printf(sfmt+" ",sprintf(fmt, X[i,j]))
        }
        printf(" {txt}{c |}\n")
    }
    printf((2+rw+1)*" " + "{c BLC}{hline " +
        strofreal((wd+1)*cols(X)+1) + "}{c BRC}\n")
}

end
exit

