ihdp_matching.rmd

---
output:
    html_document:
        toc: true
        toc_float:
            collapsed: false
        theme: lumen
        highlight: zenburn
        df_print: paged
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo=TRUE)
knitr::opts_chunk$set(include=TRUE)
knitr::opts_chunk$set(message=FALSE)
knitr::opts_chunk$set(warning=FALSE)
knitr::opts_chunk$set(tidy.opts=list(width.cutoff=80), tidy=TRUE)
knitr::opts_knit$set(root.dir='.')
```

```{r library}
library(tidyverse)
library(rstan)
library(rstanarm)
library(survey)
source("library/matching.R")
source("library/balance.R")
source("library/estimation.R")
library(cobalt)
library(rgenoud)
library(Matching)
library(CBPS)
library(WeightIt)
library(parallel)

set.cobalt.options(continuous='std', binary='raw')
```

```{r data}
load("data/cc2.Rdata")

covs_all <- setdiff(names(cc2), c("row.names", "row.names.1", "treat",
    "treat0", "ppvtr.36"))
covs <- c("bw", "preterm", "dayskidh", "sex", "first", "age", "black",
    "hispanic", "white", "b.marr", "lths", "hs", "ltcoll", "college",
    "work.dur", "prenatal", "momage")
cov_names <- c("birth weight", "weeks preterm", "days in hospital",
    "male", "first born", "age", "black", "hispanic", "white",
    "unmarried at birth", "less than high school", "high school graduate",
    "some college", "college graduate", "worked during pregnancy",
    "had no prenatal care", "age at birth")
```


Raw data balance
========================

```{r, include=TRUE}
ps_form <- formula(treat ~ bwg + hispanic + black + b.marr + lths + hs + ltcoll + work.dur + prenatal + sex + first + st5 + st9 + st12 + st25 + st36 + st42 + st48 + st53 + age + bw + preterm + momage + dayskidh)

bal.tab(ps_form, data=cc2, estimand='ATT', continuous='std', m.threshold=.1, v.threshold=1.1)
```


Propensity score models
==========================

```{r}
# Base logit model
{
    model <- 'models/ps_logit.rds'
    if (file.exists(model)){
        ps_logit <- readRDS(model)
    }  else{
        set.seed(20)
        ps_logit <- stan_glm(ps_form, family=binomial(link='logit'), data=cc2, algorithm='optimizing')
        saveRDS(ps_logit, model)
    }
    ps_logit <- readRDS(model)
    pscores_base <- apply(posterior_linpred(ps_logit, type='link'), 2, mean)
    bal.tab(ps_logit, m.threshold=0.1, v.threshold=1.2)
}

# CBPS
{
    model <- 'models/ps_cbps.rds'
    if (file.exists(model)){
        ps_cbps <- readRDS(model)
    } else{
        set.seed(20)
        ps_cbps <- CBPS(ps_form, data=cc2, ATT=1, method='over')
        saveRDS(ps_cbps, model)
    }
    ps_cbps <- readRDS(model)
    pscores_cbps <-  ps_cbps$fitted.values
    bal.tab(ps_cbps, m.threshold=0.1, v.threshold=1.2)
}

# Genetic matching
{
    # Matching w/o replacement
    model <- 'models/mgen_1.rds'
    mgen_1 <- readRDS(model)
    }

    # Matching w/ replacement
    model <- 'models/mgen_1_wr.rds'
    mgen_1_wr <- readRDS(model)
}
```


Matching w/o replacement
==========================
```{r}
# Base logit
matches_base <- matching(cc2$treat, score=pscores_base, replace=FALSE)
matched_base <- cc2[matches_base$match.ind,]

# CBPS
matches_cbps <- matching(cc2$treat, score=pscores_cbps, replace=FALSE)
matched_cbps <- cc2[matches_cbps$match.ind,]

# GenMatch
matches_mgen <- c(mgen_1$matches[,1], mgen_1$matches[,2])
matched_mgen <- cc2[matches_mgen,]
```


Matching w/ replacement
==========================
```{r}
# Base logit
matches.wr_base <- matching(cc2$treat, score=pscores_base, replace=TRUE)
matched.wr_base <- cc2[matches.wr_base$match.ind,]

# CBPS
matches.wr_cbps <- matching(cc2$treat, score=pscores_cbps, replace=TRUE)
matched.wr_cbps <- cc2[matches.wr_cbps$match.ind,]
```


Balance
==========================
```{r, include=TRUE}
bal.tab(ps_form, data=matched_base, estimand='ATT', continuous='std',m.threshold=0.1, v.threshold=1.2)
bal.tab(ps_form, data=matched.wr_base, estimand='ATT', continuous='std',m.threshold=0.1, v.threshold=1.2)

bal.tab(ps_form, data=matched_cbps, estimand='ATT', continuous='std',m.threshold=0.1, v.threshold=1.2)
bal.tab(ps_form, data=matched.wr_cbps, estimand='ATT', continuous='std',m.threshold=0.1, v.threshold=1.2)

bal.tab(ps_form, data=matched_mgen, estimand='ATT', continuous='std',m.threshold=0.1, v.threshold=1.2)
```


Treatment Effect
==========================
```{r, echo=TRUE, include=TRUE}
te_form <- formula(ppvtr.36 ~ treat + bwg + hispanic + black + b.marr + lths + hs + ltcoll + work.dur + prenatal + sex + first + st5 + st9 + st12 + st25 + st36 + st42 + st48 + st53 + bw + preterm + momage + dayskidh)


# Base logit mwor
set.seed(20)
summary(stan_glm(te_form, data=matched_base, algorithm='optimizing'))['treat', 1:2]

# CBPS mwor
set.seed(20)
summary(stan_glm(te_form, data=matched_cbps, algorithm='optimizing'))['treat', 1:2]

# GenMatch mwor
set.seed(20)
summary(stan_glm(te_form, data=matched_mgen, algorithm='optimizing'))['treat', 1:2]

# Base logit mwr
set.seed(20)
summary(stan_glm(te_form, data=matched.wr_base, algorithm='optimizing'))['treat', 1:2]

# CBPS mwr
set.seed(20)
summary(stan_glm(te_form, data=matched.wr_cbps, algorithm='optimizing'))['treat', 1:2]

```