BUGFIX: DoF - nested-within-cluster

reghdfe was not checking if the errors were nested within the cluster variable This means the reported DoF were wrong on those cases and we were being too drastic when computing the variance estimates
sergiocorreia · Apr 4, 2015 · 50130db · 50130db
1 parent 41584f8
commit 50130db
Show file tree

Hide file tree

Showing 11 changed files with 145 additions and 9 deletions.
diff --git a/misc/reghdfe.zip b/misc/reghdfe.zip
diff --git a/package/hdfe.ado b/package/hdfe.ado
@@ -1,4 +1,4 @@
-*! hdfe 2.1.6 02apr2015
+*! hdfe 2.1.8 03apr2015
 *! Sergio Correia ([email protected])
 * (built from multiple source files using build.py)
 // -------------------------------------------------------------
@@ -1020,7 +1020,7 @@ end
 // -------------------------------------------------------------
 
 program define Version, eclass
-    local version "2.1.6 02apr2015"
+    local version "2.1.8 03apr2015"
     ereturn clear
     di as text "`version'"
     ereturn local version "`version'"
@@ -1320,6 +1320,7 @@ syntax, [DOFadjustments(string) group(name) uid(varname) groupdta(string)]
 					assert inlist(_rc, 0, 498)
 					if (!_rc) {
 						Debug, level(1) msg("(categorical variable " as result "`varlabel'" as text " is nested within cluster variable " as result "`clustervar_original'" as text ", so it doesn't count towards DoF)")
+						local absvar_in_clustervar 1
 						continue, break
 					}
 				}

diff --git a/package/hdfe.pkg b/package/hdfe.pkg
@@ -26,7 +26,7 @@ d KW: within
 d
 d Requires: Stata version 11.2
 d
-d Distribution-Date: 20150402
+d Distribution-Date: 20150403
 d
 
 f hdfe.ado

diff --git a/package/reghdfe.ado b/package/reghdfe.ado
@@ -1,4 +1,4 @@
-*! reghdfe 2.1.6 02apr2015
+*! reghdfe 2.1.8 03apr2015
 *! Sergio Correia ([email protected])
 * (built from multiple source files using build.py)
 // -------------------------------------------------------------
@@ -861,7 +861,7 @@ end
 // -------------------------------------------------------------
 
 program define Version, eclass
-    local version "2.1.6 02apr2015"
+    local version "2.1.8 03apr2015"
     ereturn clear
     di as text "`version'"
     ereturn local version "`version'"
@@ -3526,6 +3526,7 @@ syntax, [DOFadjustments(string) group(name) uid(varname) groupdta(string)]
 					assert inlist(_rc, 0, 498)
 					if (!_rc) {
 						Debug, level(1) msg("(categorical variable " as result "`varlabel'" as text " is nested within cluster variable " as result "`clustervar_original'" as text ", so it doesn't count towards DoF)")
+						local absvar_in_clustervar 1
 						continue, break
 					}
 				}

diff --git a/package/reghdfe.pkg b/package/reghdfe.pkg
@@ -16,7 +16,7 @@ d KW: instrumental variables
 d
 d Requires: Stata version 11.2
 d
-d Distribution-Date: 20150402
+d Distribution-Date: 20150403
 d
 
 f reghdfe.ado

diff --git a/source/_hdfe/EstimateDoF.ado b/source/_hdfe/EstimateDoF.ado
@@ -121,6 +121,7 @@ syntax, [DOFadjustments(string) group(name) uid(varname) groupdta(string)]
 					assert inlist(_rc, 0, 498)
 					if (!_rc) {
 						Debug, level(1) msg("(categorical variable " as result "`varlabel'" as text " is nested within cluster variable " as result "`clustervar_original'" as text ", so it doesn't count towards DoF)")
+						local absvar_in_clustervar 1
 						continue, break
 					}
 				}

diff --git a/source/hdfe.pkg b/source/hdfe.pkg
@@ -26,7 +26,7 @@ d KW: within
 d
 d Requires: Stata version 11.2
 d
-d Distribution-Date: 20150402
+d Distribution-Date: 20150403
 d
 
 f hdfe.ado

diff --git a/source/reghdfe.pkg b/source/reghdfe.pkg
@@ -16,7 +16,7 @@ d KW: instrumental variables
 d
 d Requires: Stata version 11.2
 d
-d Distribution-Date: 20150402
+d Distribution-Date: 20150403
 d
 
 f reghdfe.ado

diff --git a/source/version.txt b/source/version.txt
@@ -1 +1 @@
-2.1.6 02apr2015
+2.1.8 03apr2015
diff --git a/test/super.do b/test/super.do
@@ -21,6 +21,7 @@
 	run test-cluster
 	run test-ivreg2
 	run test-cluster-same-as-absvar
+	run test-cluster-nested // Prevent regression of bug reported by Michael Wittry (see email)
 	run test-vce-complex-bw
 	run test-weights
 	run test-mwc

diff --git a/test/test-cluster-nested.do b/test/test-cluster-nested.do
@@ -0,0 +1,132 @@
+cscript "reghdfe fixed effect nested in cluster" adofile reghdfe
+
+* Setup
+	discard
+	clear all
+	set more off
+	* cls
+
+* Convenience: "Trim <size>" will trim e(b) and e(V)
+	capture program drop TrimMatrix
+	program define TrimMatrix, eclass
+	args size
+		assert `size'>0
+		matrix trim_b = e(b)
+		matrix trim_V = e(V)
+		matrix trim_b = trim_b[1, 1..`size']
+		matrix trim_V = trim_V[1..`size',1..`size']
+		ereturn matrix trim_b = trim_b
+		ereturn matrix trim_V = trim_V
+	end
+
+* Create fake dataset
+	sysuse auto
+	bys turn: gen t = _n
+
+* [TEST]
+	local lhs price
+	local rhs weight length
+	local absvars turn // make it the same as panelvar
+	local clustervar `absvars'
+	fvunab tmp : `rhs'
+	local K : list sizeof tmp
+
+	tsset `absvars' t
+
+	* Custom adjustment: in this simple case we can compare _cons
+	drop if missing(rep)
+
+// -------------------------------------------------------------------------------------------------
+
+	* Check that _xtreg_chk_cl2 is working
+
+	reghdfe `lhs' `rhs', abs(`absvars') vce(cluster `clustervar') dof(clusters)
+	assert e(df_a)==1
+	reghdfe `lhs' `rhs', abs(`absvars') vce(cluster `clustervar')
+	assert e(df_a)==1
+
+// -------------------------------------------------------------------------------------------------
+
+	* Match against xtreg
+
+	* 1. Run benchmark
+	xtreg `lhs' `rhs', cluster(`clustervar') fe
+	TrimMatrix `K'
+	di e(df_a)
+	di e(df_m)
+	local bench_df_a = e(df_a)
+	local bench_within = e(r2_w)
+	storedresults save benchmark e()
+
+	* 2. Run reghdfe
+	reghdfe `lhs' `rhs', absorb(`absvars') vce(cluster `clustervar')
+	di e(df_a)
+	di e(df_m)
+	* NOTE: See statalist post for discussion on df_m discrepancy
+	* http://www.stata.com/statalist/archive/2010-03/msg00941.html	
+	* "So I think some explanation is necessary. I see no reason, conceptually, why xtreg,fe with small-sample statistics should not be exactly equivalent to areg"
+	TrimMatrix `K'
+
+	* 3. Compare
+	storedresults compare benchmark e(), tol(1e-12) include( ///
+		scalar: N tss rss F df_r ///  
+		matrix: trim_b trim_V ///
+		macros: wexp wtype )
+
+	* WILL NOT HOLD B/C xtreg still include the nested ones in e(df_a)
+	* assert `bench_df_a'==e(df_a)-1
+
+	assert abs(`bench_within'-e(r2_within))<1e-6
+	storedresults drop benchmark
+
+// -------------------------------------------------------------------------------------------------
+
+	* Check that _xtreg_chk_cl2 is working
+
+	reghdfe `lhs' `rhs', abs(`absvars') vce(cluster `clustervar') dof(clusters)
+	assert e(df_a)==1
+	reghdfe `lhs' `rhs', abs(`absvars') vce(cluster `clustervar')
+	assert e(df_a)==1
+
+// -------------------------------------------------------------------------------------------------
+// Now repeat the above but with a different clustervar name
+// -------------------------------------------------------------------------------------------------
+	gen samecluster = `absvars'
+	local clustervar samecluster
+
+	* Match against xtreg
+
+	* 1. Run benchmark
+	xtreg `lhs' `rhs', cluster(`clustervar') fe
+	TrimMatrix `K'
+	di e(df_a)
+	di e(df_m)
+	local bench_df_a = e(df_a)
+	local bench_within = e(r2_w)
+	storedresults save benchmark e()
+
+	* 2. Run reghdfe
+	reghdfe `lhs' `rhs', absorb(`absvars') vce(cluster `clustervar')
+	di e(df_a)
+	di e(df_m)
+	* NOTE: See statalist post for discussion on df_m discrepancy
+	* http://www.stata.com/statalist/archive/2010-03/msg00941.html	
+	* "So I think some explanation is necessary. I see no reason, conceptually, why xtreg,fe with small-sample statistics should not be exactly equivalent to areg"
+	TrimMatrix `K'
+
+	* 3. Compare
+	storedresults compare benchmark e(), tol(1e-12) include( ///
+		scalar: N tss rss F df_r ///  
+		matrix: trim_b trim_V ///
+		macros: wexp wtype )
+
+	* WILL NOT HOLD B/C xtreg still include the nested ones in e(df_a)
+	* assert `bench_df_a'==e(df_a)-1
+
+	assert abs(`bench_within'-e(r2_within))<1e-6
+	storedresults drop benchmark
+
+
+// -------------------------------------------------------------------------------------------------
+cd "D:/Github/reghdfe/test"
+exit