Skip to content

Commit

Permalink
BUGFIX: DoF - nested-within-cluster
Browse files Browse the repository at this point in the history
reghdfe was not checking if the errors were nested within the cluster
variable
This means the reported DoF were wrong on those cases and we were being
too drastic when computing the variance estimates
  • Loading branch information
sergiocorreia committed Apr 4, 2015
1 parent 41584f8 commit 50130db
Show file tree
Hide file tree
Showing 11 changed files with 145 additions and 9 deletions.
Binary file modified misc/reghdfe.zip
Binary file not shown.
5 changes: 3 additions & 2 deletions package/hdfe.ado
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
*! hdfe 2.1.6 02apr2015
*! hdfe 2.1.8 03apr2015
*! Sergio Correia ([email protected])
* (built from multiple source files using build.py)
// -------------------------------------------------------------
Expand Down Expand Up @@ -1020,7 +1020,7 @@ end
// -------------------------------------------------------------

program define Version, eclass
local version "2.1.6 02apr2015"
local version "2.1.8 03apr2015"
ereturn clear
di as text "`version'"
ereturn local version "`version'"
Expand Down Expand Up @@ -1320,6 +1320,7 @@ syntax, [DOFadjustments(string) group(name) uid(varname) groupdta(string)]
assert inlist(_rc, 0, 498)
if (!_rc) {
Debug, level(1) msg("(categorical variable " as result "`varlabel'" as text " is nested within cluster variable " as result "`clustervar_original'" as text ", so it doesn't count towards DoF)")
local absvar_in_clustervar 1
continue, break
}
}
Expand Down
2 changes: 1 addition & 1 deletion package/hdfe.pkg
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ d KW: within
d
d Requires: Stata version 11.2
d
d Distribution-Date: 20150402
d Distribution-Date: 20150403
d

f hdfe.ado
Expand Down
5 changes: 3 additions & 2 deletions package/reghdfe.ado
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
*! reghdfe 2.1.6 02apr2015
*! reghdfe 2.1.8 03apr2015
*! Sergio Correia ([email protected])
* (built from multiple source files using build.py)
// -------------------------------------------------------------
Expand Down Expand Up @@ -861,7 +861,7 @@ end
// -------------------------------------------------------------

program define Version, eclass
local version "2.1.6 02apr2015"
local version "2.1.8 03apr2015"
ereturn clear
di as text "`version'"
ereturn local version "`version'"
Expand Down Expand Up @@ -3526,6 +3526,7 @@ syntax, [DOFadjustments(string) group(name) uid(varname) groupdta(string)]
assert inlist(_rc, 0, 498)
if (!_rc) {
Debug, level(1) msg("(categorical variable " as result "`varlabel'" as text " is nested within cluster variable " as result "`clustervar_original'" as text ", so it doesn't count towards DoF)")
local absvar_in_clustervar 1
continue, break
}
}
Expand Down
2 changes: 1 addition & 1 deletion package/reghdfe.pkg
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ d KW: instrumental variables
d
d Requires: Stata version 11.2
d
d Distribution-Date: 20150402
d Distribution-Date: 20150403
d

f reghdfe.ado
Expand Down
1 change: 1 addition & 0 deletions source/_hdfe/EstimateDoF.ado
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ syntax, [DOFadjustments(string) group(name) uid(varname) groupdta(string)]
assert inlist(_rc, 0, 498)
if (!_rc) {
Debug, level(1) msg("(categorical variable " as result "`varlabel'" as text " is nested within cluster variable " as result "`clustervar_original'" as text ", so it doesn't count towards DoF)")
local absvar_in_clustervar 1
continue, break
}
}
Expand Down
2 changes: 1 addition & 1 deletion source/hdfe.pkg
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ d KW: within
d
d Requires: Stata version 11.2
d
d Distribution-Date: 20150402
d Distribution-Date: 20150403
d

f hdfe.ado
Expand Down
2 changes: 1 addition & 1 deletion source/reghdfe.pkg
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ d KW: instrumental variables
d
d Requires: Stata version 11.2
d
d Distribution-Date: 20150402
d Distribution-Date: 20150403
d

f reghdfe.ado
Expand Down
2 changes: 1 addition & 1 deletion source/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.1.6 02apr2015
2.1.8 03apr2015
1 change: 1 addition & 0 deletions test/super.do
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
run test-cluster
run test-ivreg2
run test-cluster-same-as-absvar
run test-cluster-nested // Prevent regression of bug reported by Michael Wittry (see email)
run test-vce-complex-bw
run test-weights
run test-mwc
Expand Down
132 changes: 132 additions & 0 deletions test/test-cluster-nested.do
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
cscript "reghdfe fixed effect nested in cluster" adofile reghdfe

* Setup
discard
clear all
set more off
* cls

* Convenience: "Trim <size>" will trim e(b) and e(V)
capture program drop TrimMatrix
program define TrimMatrix, eclass
args size
assert `size'>0
matrix trim_b = e(b)
matrix trim_V = e(V)
matrix trim_b = trim_b[1, 1..`size']
matrix trim_V = trim_V[1..`size',1..`size']
ereturn matrix trim_b = trim_b
ereturn matrix trim_V = trim_V
end

* Create fake dataset
sysuse auto
bys turn: gen t = _n

* [TEST]
local lhs price
local rhs weight length
local absvars turn // make it the same as panelvar
local clustervar `absvars'
fvunab tmp : `rhs'
local K : list sizeof tmp

tsset `absvars' t

* Custom adjustment: in this simple case we can compare _cons
drop if missing(rep)

// -------------------------------------------------------------------------------------------------

* Check that _xtreg_chk_cl2 is working

reghdfe `lhs' `rhs', abs(`absvars') vce(cluster `clustervar') dof(clusters)
assert e(df_a)==1
reghdfe `lhs' `rhs', abs(`absvars') vce(cluster `clustervar')
assert e(df_a)==1

// -------------------------------------------------------------------------------------------------

* Match against xtreg

* 1. Run benchmark
xtreg `lhs' `rhs', cluster(`clustervar') fe
TrimMatrix `K'
di e(df_a)
di e(df_m)
local bench_df_a = e(df_a)
local bench_within = e(r2_w)
storedresults save benchmark e()

* 2. Run reghdfe
reghdfe `lhs' `rhs', absorb(`absvars') vce(cluster `clustervar')
di e(df_a)
di e(df_m)
* NOTE: See statalist post for discussion on df_m discrepancy
* http://www.stata.com/statalist/archive/2010-03/msg00941.html
* "So I think some explanation is necessary. I see no reason, conceptually, why xtreg,fe with small-sample statistics should not be exactly equivalent to areg"
TrimMatrix `K'

* 3. Compare
storedresults compare benchmark e(), tol(1e-12) include( ///
scalar: N tss rss F df_r ///
matrix: trim_b trim_V ///
macros: wexp wtype )

* WILL NOT HOLD B/C xtreg still include the nested ones in e(df_a)
* assert `bench_df_a'==e(df_a)-1

assert abs(`bench_within'-e(r2_within))<1e-6
storedresults drop benchmark

// -------------------------------------------------------------------------------------------------

* Check that _xtreg_chk_cl2 is working

reghdfe `lhs' `rhs', abs(`absvars') vce(cluster `clustervar') dof(clusters)
assert e(df_a)==1
reghdfe `lhs' `rhs', abs(`absvars') vce(cluster `clustervar')
assert e(df_a)==1

// -------------------------------------------------------------------------------------------------
// Now repeat the above but with a different clustervar name
// -------------------------------------------------------------------------------------------------
gen samecluster = `absvars'
local clustervar samecluster

* Match against xtreg

* 1. Run benchmark
xtreg `lhs' `rhs', cluster(`clustervar') fe
TrimMatrix `K'
di e(df_a)
di e(df_m)
local bench_df_a = e(df_a)
local bench_within = e(r2_w)
storedresults save benchmark e()

* 2. Run reghdfe
reghdfe `lhs' `rhs', absorb(`absvars') vce(cluster `clustervar')
di e(df_a)
di e(df_m)
* NOTE: See statalist post for discussion on df_m discrepancy
* http://www.stata.com/statalist/archive/2010-03/msg00941.html
* "So I think some explanation is necessary. I see no reason, conceptually, why xtreg,fe with small-sample statistics should not be exactly equivalent to areg"
TrimMatrix `K'

* 3. Compare
storedresults compare benchmark e(), tol(1e-12) include( ///
scalar: N tss rss F df_r ///
matrix: trim_b trim_V ///
macros: wexp wtype )

* WILL NOT HOLD B/C xtreg still include the nested ones in e(df_a)
* assert `bench_df_a'==e(df_a)-1

assert abs(`bench_within'-e(r2_within))<1e-6
storedresults drop benchmark


// -------------------------------------------------------------------------------------------------
cd "D:/Github/reghdfe/test"
exit

0 comments on commit 50130db

Please sign in to comment.