In this appendix we provide the code to replicate the tables and figures in the article. By pressing the \(\triangleright\) code button you can see the R code that creates the relevant figure or table, with a description of the steps involved in computing the analyses.
Table 1: Medicaid Expansion Adoption
The following code block produces Table 1, which shows the roll-out of Medicaid Expansion under the Affordable Care Act (ACA) across states.
## Install packages from Github if needed# load packageslibrary(tidyverse)library(kableExtra)library(modelsummary)library(gt)library(did)library(ggthemes)library(fixest)library(latex2exp)library(HonestDiD)# omit NA from tablesoptions(knitr.kable.NA ='')# set baseline output for modelsummaryoptions(modelsummary_factory_default ='kableExtra')# set ggplot themetheme_set(theme_clean() +theme(plot.background =element_blank(),legend.background =element_rect(color ="white"),strip.background =element_rect(color ="black")))# load cleaned datamydata <-read_csv(here::here("data", "county_mortality_data.csv")) %>%# make state the abbreviationmutate(state =str_sub(county, nchar(county) -1, nchar(county))) %>%# drop District of Columbia from the datafilter(state !="DC") %>%# make a second adoption variable for the tablemutate(adopt =case_when(# missing is non-expansionis.na(yaca) ~"Non-Expansion",# fix a couple pre-2014 adoptions from Miller, Johnson and Wherry state %in%c("DE", "MA", "NY", "VT") ~"Pre-2014", TRUE~as.character(yaca) ))# get adoption year by stateadopts <- mydata %>%select(state, adopt) %>%distinct() %>%arrange(state)# first get the share of states, share of counties, and share of adults in 2013 by adoption category# first the states and share of statesstates <- adopts %>%group_by(adopt) %>%summarize(states =paste0(state, collapse =", "),state_share =length(state) /50)# next get the county share and the population sharecounties_pop <- mydata %>%# just for year 2013filter(year ==2013) %>%# get county and population totalsmutate(total_counties =n(),total_pop =sum(population_20_64, na.rm =TRUE)) %>%group_by(adopt) %>%# make into sharessummarize(county_share =n() /mean(total_counties),pop_share =sum(population_20_64, na.rm =TRUE) /mean(total_pop))# make a table and exportstates %>%left_join(counties_pop, by ="adopt") %>%slice(9, 1:8) %>%# format the numbers to two digitsmutate(across(state_share:pop_share, ~ scales::number(., accuracy =0.01))) %>%# format the tablekable(col.names =c("Expansion \n Year", "States", "Share of States", "Share of Counties", "Share of Adults (2013)"),booktabs = T, caption ="Medicaid Expansion Under the Affordable Care Act",label ="adoptions",align =c("c"),escape =FALSE,linesep ="" ) %>%kable_styling(latex_options =c("scale_down", "HOLD_position")) %>%column_spec(2, width ="20em")
Medicaid Expansion Under the Affordable Care Act
Expansion Year
| States
| Share of States
| Share of Counties
| Share of Adults (2013)
Pre-2014
DE, MA, NY, VT
0.08
0.03
0.09
2014
AR, AZ, CA, CO, CT, HI, IA, IL, KY, MD, MI, MN, ND, NH, NJ, NM, NV, OH, OR, RI, WA, WV
0.44
0.36
0.45
2015
AK, IN, PA
0.06
0.06
0.06
2016
LA, MT
0.04
0.04
0.02
2019
ME, VA
0.04
0.05
0.03
2020
ID, NE, UT
0.06
0.04
0.02
2021
MO, OK
0.04
0.06
0.03
2023
NC, SD
0.04
0.05
0.03
Non-Expansion
AL, FL, GA, KS, MS, SC, TN, TX, WI, WY
0.20
0.31
0.26
Code
clearcapturelogclose******************************************************************************* 1. Load original data****************************************************************************** cd "../../data"insheetusing"county_mortality_data.csv", clearkeep state county_code county year yaca population_20_64 *Define treatment timinggen treat_year = real(yaca)replace treat_year = 0 ifmissing(treat_year)tostring treat_year, gen(treat_str)destring yaca, replaceforcedestring population_20_64, replaceforce * Drop DC asin JEL Table 1dropif state == "District of Columbia"***************************************************** 2. Create adoption category *****************************************************gen adopt = ""replace adopt = "Pre-2014"ifinlist(state, "Delaware", "Massachusetts", "New York", "Vermont")replace adopt = string(yaca) if !missing(yaca) & adopt == ""replace adopt = "Non-Expansion"ifmissing(yaca)***************************************************** 3. Get number of states and state share *****************************************************preservegen st_abr = substr(county, max(1, strlen(county)-1), 2)keep st_abr adoptduplicatesdropsort adopt st_abrby adopt: gen strL state_list = st_abrby adopt: replace state_list = state_list[_n-1] + ", " + st_abr if adopt[_n-1] == adopt[_n]bysort adopt: gen state_count=_Ngen state_share = state_count / 50gen len= strlen(state_list)bysort adopt: egenmax=max(len)keepif len==maxkeep adopt state_list state_sharetempfile state_summarysave`state_summary', replacerestore***************************************************** 4. Get county & adult pop share (year = 2013) *****************************************************preservekeepifyear == 2013 * Total county count and total 20–64 pop in 2013 su population_20_64scalar total_pop = r(sum) su county_codescalar total_counties = r(N)collapse (count) county_count=county_code (sum) pop=population_20_64, by(adopt)gen county_share = county_count / total_countiesgen pop_share = pop / total_poptempfile county_summarysave`county_summary'restore***************************************************** 5. Merge and order categories fordisplay *****************************************************use`state_summary', clearmerge 1:1 adopt using`county_summary', nogen * Order like JEL Table 1genorder = .replaceorder = 1 if adopt == "Pre-2014"replaceorder = 2 if adopt == "2014"replaceorder = 3 if adopt == "2015"replaceorder = 4 if adopt == "2016"replaceorder = 5 if adopt == "2019"replaceorder = 6 if adopt == "2020"replaceorder = 7 if adopt == "2021"replaceorder = 8 if adopt == "2023"replaceorder = 9 if adopt == "Non-Expansion"sortorder * Round to match JEL formattinggen state_share_fmt = string(state_share, "%4.2f")gen county_share_fmt = string(county_share, "%4.2f")gen pop_share_fmt = string(pop_share, "%4.2f")***************************************************** 6. Export table *****************************************************export excel adopt state_list state_share_fmt county_share_fmt pop_share_fmt ///using"../data/adoptions_table.xlsx", firstrow(variables) replacerestore
\(2\times2\) DiD setups
The following tables and figures show our analyses within the \(2\times2\) section of the paper.
Table 2: Simple \(2\times2\) DiD
Table 2 shows how we can calculate the simple \(2\times2\) DiD comparing the change in the crude mortality rates between states that chose to expand Medicaid in 2014 and those that had not yet expanded by 2019.
# set seed for reproducibilityset.seed(20240924)# set filepath locations for tables and figuresfile.loc.tab <- here::here("tables/")file.loc.fig <- here::here("figures/")# load cleaned datamydata <-read_csv(here::here("data", "county_mortality_data.csv")) %>%# make state the abbreviationmutate(state =str_sub(county, nchar(county) -1, nchar(county))) %>%# drop DC and pre-2014 adoption statesfilter(!(state %in%c("DC", "DE", "MA", "NY", "VT"))) %>%# drop states that adopt between 2014 and 2019filter(yaca ==2014|is.na(yaca) | yaca >2019)# set the covariates that we're going to use.covs <-c("perc_female","perc_white", "perc_hispanic", "unemp_rate", "poverty_rate", "median_income")## Clean data and add in covariatesmydata <- mydata %>%# make variablesmutate(perc_white = population_20_64_white / population_20_64 *100,perc_hispanic = population_20_64_hispanic / population_20_64 *100,perc_female = population_20_64_female/ population_20_64 *100,unemp_rate = unemp_rate *100,median_income = median_income /1000) %>%# keep just subset of variables that we will use laterselect(state, county, county_code, year, population_20_64, yaca,starts_with("perc_"), crude_rate_20_64, all_of(covs))# keep only counties with full observations for outcome and covariates in 2013 and 2014mydata <- mydata %>%# allow the aca expansion variable to be missingdrop_na(!yaca) %>%group_by(county_code) %>%# need full covariates for 2013 and 2014filter(length(which(year ==2013| year ==2014)) ==2) %>%ungroup()# finally, keep only counties with full mortality data for 2009 to 2019mydata <- mydata %>%group_by(county_code) %>%drop_na(crude_rate_20_64) %>%filter(n() ==11)# make a smaller dataset with only years 2013 and 2014short_data <- mydata %>%# make a binary variable that identifies ACA expansion and post-yearsmutate(Treat =if_else(yaca ==2014&!is.na(yaca), 1, 0),Post =if_else(year ==2014, 1, 0)) %>%# filter years for just 2013 and 2014filter(year %in%c(2013, 2014)) %>%# make a variable with population weight in 2013group_by(county_code) %>%mutate(set_wt = population_20_64[which(year ==2013)]) %>%ungroup()# get group means for Table 2# unweighted expansion pre-reformT_pre <- short_data %>%filter(Treat ==1& year ==2013) %>%summarize(mean =mean(crude_rate_20_64)) %>%pull(mean)# unweighted non-expansion pre-form C_pre <- short_data %>%filter(Treat ==0& year ==2013) %>%summarize(mean =mean(crude_rate_20_64)) %>%pull(mean)# unweighted expansion post-reformT_Post <- short_data %>%filter(Treat ==1& year ==2014) %>%summarize(mean =mean(crude_rate_20_64)) %>%pull(mean)# unweighted non-expansion post-reformC_Post <- short_data %>%filter(Treat ==0& year ==2014) %>%summarize(mean =mean(crude_rate_20_64)) %>%pull(mean)# get the same group means but this time weightedT_pre_weight <- short_data %>%filter(Treat ==1& year ==2013) %>%summarize(mean =weighted.mean(crude_rate_20_64, w = set_wt)) %>%pull(mean)C_pre_weight <- short_data %>%filter(Treat ==0& year ==2013) %>%summarize(mean =weighted.mean(crude_rate_20_64, w = set_wt)) %>%pull(mean)T_Post_weight <- short_data %>%filter(Treat ==1& year ==2014) %>%summarize(mean =weighted.mean(crude_rate_20_64, w = set_wt)) %>%pull(mean)C_Post_weight <- short_data %>%filter(Treat ==0& year ==2014) %>%summarize(mean =weighted.mean(crude_rate_20_64, w = set_wt)) %>%pull(mean)# function to do a reasonable roundingg_round <-function(x, k) {format(round(x, k), nsmall = k)}# make table and exporttribble(~"", ~"Expansion", ~"No Expansion", ~"Gap/DiD", ~"Expansion", ~"No Expansion", ~"Gap/DiD","2013", as.character(g_round(T_pre, 1)), as.character(g_round(C_pre, 1)), as.character(g_round(T_pre - C_pre, 1)), as.character(g_round(T_pre_weight, 1)), as.character(g_round(C_pre_weight, 1)), as.character(g_round(T_pre_weight - C_pre_weight, 1)),"2014", as.character(g_round(T_Post, 1)), as.character(g_round(C_Post, 1)), as.character(g_round(T_Post - C_Post, 1)), as.character(g_round(T_Post_weight, 1)), as.character(g_round(C_Post_weight, 1)), as.character(g_round(T_Post_weight - C_Post_weight, 1)),"Trend/DiD", as.character(g_round(T_Post - T_pre, 1)), as.character(g_round(C_Post-C_pre, 1)), as.character(g_round((T_Post - T_pre) - (C_Post-C_pre), 1)),as.character(g_round(T_Post_weight - T_pre_weight, 1)), as.character(g_round(C_Post_weight-C_pre_weight, 1)), as.character(g_round((T_Post_weight - T_pre_weight) - (C_Post_weight-C_pre_weight), 1))) %>%# format tablekable(align ='c',booktabs = T, escape = F,caption ="Simple 2 X 2 DiD") %>%kable_styling() %>%row_spec(3, color ="BrickRed") %>%column_spec(c(1:3, 5:6), color ="black") %>%row_spec(3, italic =TRUE) %>%column_spec(c(4, 7), italic =TRUE) %>%add_header_above(c(" "=1, "Unweighted Averages"=3, "Weighted Averages"=3))
Simple 2 X 2 DiD
Unweighted Averages
Weighted Averages
Expansion
No Expansion
Gap/DiD
Expansion
No Expansion
Gap/DiD
2013
419.2
474.0
-54.8
322.7
376.4
-53.7
2014
428.5
483.1
-54.7
326.5
382.7
-56.2
Trend/DiD
9.3
9.1
0.1
3.7
6.3
-2.6
Code
********************************* 1. Load and prepare data********************************clearallsetmoreoffcapturelogcloseinsheetusing"county_mortality_data.csv", clear********************************* 2. Prepare timing groups******************************** *Drop early and mid adoptersdropifinlist(state, "District of Columbia", "Delaware", "Massachusetts", "New York", "Vermont") *Define treatment timinggen treat_year = real(yaca)replace treat_year = 0 ifmissing(treat_year)tostring treat_year, gen(treat_str)destring yaca, replaceforce********************************* 3. Prepare covariates******************************** *destring covariatesdestring deaths population_20_64 crude_rate_20_64 population_total population_20_64_hispanic population_20_64_female population_20_64_white unemployed labor_force unemp_rate poverty_rate median_income, replaceforceren population_total total_population *Drop counties with missing covariatesgen perc_white = population_20_64_white / population_20_64 * 100gen perc_hispanic = population_20_64_hispanic/ population_20_64 * 100gen perc_female = population_20_64_female / population_20_64 * 100gen unemp_rate_pc = unemp_rate * 100gen median_income_k = median_income/1000dropifmissing(crude_rate_20_64, population_20_64, /// perc_white, perc_hispanic, perc_female, /// unemp_rate_pc, poverty_rate, median_income_k)********************************* 4. Sample construction******************************** *Keep counties with full 11-year panelbys county_code (year): gen panel_n = _Nbys county_code: keepif panel_n == 11drop panel_n replace treat_year=0 if treat_year>2019********************************* 5. Construct population weight********************************bys county_code: egen set_wt = max(cond(year==2013, population_20_64, .))********************************* 6. Label variables********************************labelvariable crude_rate_20_64 "Crude Mortality Rate"save"did_jel_aca_replication_data", replace****************************************************************1. Table 2: weighted and unweighted averages for the simple 2x2 DiD*************************************************************** *Read indata, keep 2013–2014 and 2014 expansion vs "non expansion" statesuse"did_jel_aca_replication_data", clearkeepifinlist(year, 2013, 2014) & inlist(yaca,2014,2020,2021,2023,.)gen Treat = (yaca==2014) // 1 = ACA in 2014gen Post = (year == 2014) // 1 = post-expansion year *Means *unweightedsum crude_rate if Treat==1 & year==2014global u12014 = r(mean)sum crude_rate if Treat==1 & year==2013global u12013 = r(mean)sum crude_rate if Treat==0 & year==2014global u02014 = r(mean)sum crude_rate if Treat==0 & year==2013global u02013 = r(mean) *weightedsum crude_rate if Treat==1 & year==2014 [aw=set_wt]global w12014 = r(mean)sum crude_rate if Treat==1 & year==2013 [aw=set_wt]global w12013 = r(mean)sum crude_rate if Treat==0 & year==2014 [aw=set_wt]global w02014 = r(mean)sum crude_rate if Treat==0 & year==2013 [aw=set_wt]global w02013 = r(mean) *Gaps, trends, DiDs * Unweightedglobal Gap13_u = ($u12013 - $u02013)global Gap14_u = ($u12014 - $u02014)global DiD_u = ( ($u12014 - $u12013) - ($u02014 - $u02013) )global Trend_T_u = ($u12014 - $u12013)global Trend_C_u = ($u02014 - $u02013) * Weightedglobal Gap13_w = ($w12013 - $w02013)global Gap14_w = ($w12014 - $w02014)global DiD_w = ( $w12014 - $w12013 ) - ( $w02014 - $w02013 )global Trend_T_w = ($w12014 - $w12013)global Trend_C_w = ($w02014 - $w02013) *Rounding for the tableforeach nm in u12013 u02013 u12014 u02014 /// w12013 w02013 w12014 w02014 /// Gap13_u Gap14_u DiD_u Trend_T_u Trend_C_u /// Gap13_w Gap14_w DiD_w Trend_T_w Trend_C_w {local`nm' : display %6.1f ${`nm'}display("`nm'") }display("`u12013'")display"$u12013" * Brick-red DiD cellslocal DiD_u_cell "\em{\em{\textcolor{BrickRed}{`DiD_u'}}}"local DiD_w_cell "\em{\em{\textcolor{BrickRed}{`DiD_w'}}}"/*-------------------------------------------------------------------- 8. Export Excel Table for Rendering--------------------------------------------------------------------*/clearsetobs 3// row labelsgen str10 row = ""replacerow = "2013"in 1replacerow = "2014"in 2replacerow = "Trend/DiD"in 3// columns (numeric); convert locals like `u12013' to numbers with real()gendouble Expansion_unw = .gendouble NoExpansion_unw = .gendouble Gap_DiD_unw = .gendouble Expansion_wt = .gendouble NoExpansion_wt = .gendouble Gap_DiD_wt = .replace Expansion_unw = real("`u12013'") in 1replace Expansion_unw = real("`u12014'") in 2replace Expansion_unw = real("`Trend_T_u'") in 3replace NoExpansion_unw = real("`u02013'") in 1replace NoExpansion_unw = real("`u02014'") in 2replace NoExpansion_unw = real("`Trend_C_u'") in 3replace Gap_DiD_unw = real("`Gap13_u'") in 1replace Gap_DiD_unw = real("`Gap14_u'") in 2replace Gap_DiD_unw = real("`DiD_u'") in 3replace Expansion_wt = real("`w12013'") in 1replace Expansion_wt = real("`w12014'") in 2replace Expansion_wt = real("`Trend_T_w'") in 3replace NoExpansion_wt = real("`w02013'") in 1replace NoExpansion_wt = real("`w02014'") in 2replace NoExpansion_wt = real("`Trend_C_w'") in 3replace Gap_DiD_wt = real("`Gap13_w'") in 1replace Gap_DiD_wt = real("`Gap14_w'") in 2replace Gap_DiD_wt = real("`DiD_w'") in 3format Expansion_unw NoExpansion_unw Gap_DiD_unw /// Expansion_wt NoExpansion_wt Gap_DiD_wt %9.1fexport excel using"../data/two_by_two_table.xlsx", firstrow(variables) replace
Table 3: Regression DiD
Table 3 shows how we can recover the same \(2\times2\) estimate using one of three regression specifications:
1) Regressing the crude mortality rate on indicators for treatment (Medicaid expansion states), post-treatment periods (2014), and the interaction of the two.
2) Regressing the crude mortality rate on the interaction of indicators for Medicaid expansion and post-expansion periods, as well as county and year fixed effects.
3) Regressing the difference between 2014 and 2013 county-level crude mortality rates on an indicator for whether the county is in a state that expanded Medicaid.
# show that you can get the same estimates with regression# make a different short data with long differences in mortality by county and treatment indicatorsshort_data2 <- short_data %>%group_by(county_code) %>%summarize(state = state[1],set_wt =mean(set_wt),# long difference between 2014 and 2013 ratesdiff = crude_rate_20_64[which(year ==2014)] - crude_rate_20_64[which(year ==2013)],Treat =mean(Treat),Post =1)# estimate three models without weights. These are Treat*Post with no fixed effects,# fixed effects + Treat:Post, and then the long difference model with no fixed effects.mod1 <-feols(crude_rate_20_64 ~ Treat*Post, data = short_data, cluster =~county_code)mod2 <-feols(crude_rate_20_64 ~ Treat:Post | county_code + year, data = short_data, cluster =~county_code)mod3 <-feols(diff ~ Treat:Post, data = short_data2, cluster =~county_code)# estimate the same three three models with weightsmod4 <-feols(crude_rate_20_64 ~ Treat*Post, data = short_data, weights =~set_wt, cluster =~county_code)mod5 <-feols(crude_rate_20_64 ~ Treat:Post | county_code + year, data = short_data, weights =~set_wt, cluster =~county_code)mod6 <-feols(diff ~ Treat:Post, data = short_data2, weights =~set_wt, cluster =~county_code)# this dictionary maps names to labels for the tabledict <-c("(Intercept)"="Constant","Treat"="Medicaid Expansion","Post"="Post","Treat:Post"="Medicaid Expansion X Post")# rows to add to tablerows =tribble(~x1, ~x2, ~x3, ~x4, ~x5, ~x6, ~x7,"County fixed effects", "No", "Yes", "No", "No", "Yes", "No","Year fixed effects", "No", "Yes", "No", "No", "Yes", "No")# export table 3modelsummary(list("(1)"= mod1, "(2)"= mod2, "(3)"= mod3, "(4)"= mod4, "(5)"= mod5, "(6)"= mod6),coef_map = dict, escape =FALSE, gof_map =NA,fmt =1, stars =FALSE,estimate ="{estimate}", add_rows = rows,title ="Regression 2 X 2 DiD") %>%add_header_above(c(" "=1, "Crude Morality Rate"=2, "\\(\\Delta\\)"=1, "Crude Mortality Rate"=2, "\\(\\Delta\\)"=1), escape =FALSE)
Regression 2 X 2 DiD
Crude Morality Rate
\(\Delta\)
Crude Mortality Rate
\(\Delta\)
(1)
(2)
(3)
(4)
(5)
(6)
Constant
474.0
9.1
376.4
6.3
(4.3)
(2.6)
(7.6)
(1.1)
Medicaid Expansion
−54.8
−53.7
(6.3)
(11.5)
Post
9.1
6.3
(2.6)
(1.1)
Medicaid Expansion X Post
0.1
0.1
0.1
−2.6
−2.6
−2.6
(3.7)
(3.7)
(3.7)
(1.5)
(1.5)
(1.5)
County fixed effects
No
Yes
No
No
Yes
No
Year fixed effects
No
Yes
No
No
Yes
No
Code
****************************************************************2. Table 3: regression 2x2 DiD with balanced panel, no controls*************************************************************** *Read indata, keep 2013–2014 and 2014 expansion vs "non expansion" statesuse"did_jel_aca_replication_data", clearkeepifinlist(year, 2013, 2014) & inlist(yaca,2014,2020,2021,2023,.)gen Treat = (yaca==2014) // 1 = ACA in 2014gen Post = (year == 2014) // 1 = post-expansion year *Panel declaration (once) xtset county_code year *UNWEIGHTED regressions eststo clear * (1) Treat × Post, no FEreg crude_rate_20_64 c.Treat##c.Post, vce(cluster county_code) eststo m1 estadd local countyfe "No" estadd local yearfe "No" * (2) Treat × Post with county & year FE (xtreg, fe → no constant)xtreg crude_rate_20_64 c.Treat#c.Post i.year, fevce(cluster county_code) eststo m2 estadd local countyfe "Yes" estadd local yearfe "Yes"* (3) Long-difference, unweightedpreservekeep county_code year crude_rate_20_64 Treatreshapewide crude_rate_20_64, i(county_code) j(year)gendiff = crude_rate_20_642014 - crude_rate_20_642013labelvariablediff"\$\\Delta\$"regdiff Treat, vce(cluster county_code)// rename Treat -> c.Treat#c.Post and re-post BEFORE eststotempname b Vmatrix`b' = e(b)matrix`V' = e(V)localN = e(N)local df = e(df_r)tempvar tousegenbyte`touse' = e(sample)local cn : colnames`b'local newcnforeach nm oflocal cn {if ("`nm'" == "Treat") local newcn `newcn' c.Treat#c.Postelselocal newcn `newcn'`nm' }matrixcolnames`b' = `newcn'matrixcolnames`V' = `newcn'matrixrownames`V' = `newcn'ereturnpost`b'`V', depname(diff) esample(`touse') obs(`N') dof(`df') eststo m3 estadd local countyfe "No" estadd local yearfe "No"restore *WEIGHTED regressions * (4) Weighted, no FEreg crude_rate_20_64 c.Treat##c.Post [aw=set_wt], vce(cluster county_code) eststo m4 estadd local countyfe "No" estadd local yearfe "No" * (5) Weighted, county & year FE (areg keeps _cons; we'll hide it)xtreg crude_rate_20_64 c.Treat#c.Post i.year [aw=set_wt], ///fevce(cluster county_code) eststo m5 estadd local countyfe "Yes" estadd local yearfe "Yes"* (6) Weighted long-differencepreservekeep county_code year crude_rate_20_64 Treat set_wtreshapewide crude_rate_20_64, i(county_code) j(year)gendiff = crude_rate_20_642014 - crude_rate_20_642013regdiff Treat [aw=set_wt], vce(cluster county_code)// rename Treat -> c.Treat#c.Post and re-post BEFORE eststotempname b Vmatrix`b' = e(b)matrix`V' = e(V)localN = e(N)local df = e(df_r)tempvar tousegenbyte`touse' = e(sample)local cn : colnames`b'local newcnforeach nm oflocal cn {if ("`nm'" == "Treat") local newcn `newcn' c.Treat#c.Postelselocal newcn `newcn'`nm' }matrixcolnames`b' = `newcn'matrixcolnames`V' = `newcn'matrixrownames`V' = `newcn'ereturnpost`b'`V', depname(diff) esample(`touse') obs(`N') dof(`df') eststo m6 estadd local countyfe "No" estadd local yearfe "No"restore *Build the LaTeX tablelocal labels ///_cons"Constant"/// Treat "Medicaid Expansion"/// Post "Post"/// c.Treat#c.Post "Medicaid Expansion × Post" * Add Treat as the interaction labelinlong-diff specs (m3 and m6)estimatesrestore m3 estadd local intlabel "Medicaid Expansion × Post"estimatesrestore m6 estadd local intlabel "Medicaid Expansion × Post"esttab m1 m2 m3 m4 m5 m6 using"../data/regdid_2x2.csv", replacekeep(_cons Treat Post c.Treat#c.Post) order(_cons Treat Post c.Treat#c.Post) varlabels(_cons"Constant" Treat "Medicaid Expansion" Post "Post" c.Treat#c.Post "Medicaid Expansion × Post") cells(b(fmt(%9.1f) ) se(fmt(%9.1f) )) collabels(none) stats( countyfe yearfe, fmt(%9.0g %9s %9s) labels("County fixed effects""Year fixed effects")) nomtitles nonotesnoobs numbers plain csv
Table 4: Covariate Balance
In Table 4 we show how to check for covariate balance across the covariates, using both 2013 levels and the changes between 2014 and 2013. We include a measure of the normalized difference as well as the (unweighted and weighted) sample means.
# Now, make the covariate balance table (Table 4).# unweighted - preunweighted <- short_data %>%filter(year ==2013) %>%select(Treat, all_of(covs)) %>%group_by(Treat) %>%# get mean and standard deviationsummarize_all(list(mean, var)) %>%# pivot the data longerpivot_longer(cols =!Treat, names_to ="variable", values_to ="value") %>%# now make separate columns for treated and untreatedpivot_wider(names_from ="Treat", values_from ="value",names_prefix ="group") %>%# separate mean and standard deviationsextract(variable, into =c("variable", "fx"), "(.*)_(.*)") %>%pivot_wider(id_cols = variable,names_from = fx,values_from =c(group0, group1)) %>%# make normalized differencemutate(norm_diff = (group1_fn1 - group0_fn1)/sqrt((group1_fn2 + group0_fn2)/2)) %>%select(variable, group0_fn1, group1_fn1, norm_diff)# make a weighted variance functionwtd.var <-function (x, weights =NULL, normwt =FALSE, na.rm =TRUE, method =c("unbiased", "ML")) { method <-match.arg(method)if (!length(weights)) {if (na.rm) x <- x[!is.na(x)]return(var(x)) }if (na.rm) { s <-!is.na(x + weights) x <- x[s] weights <- weights[s] }if (normwt) weights <- weights *length(x)/sum(weights)if (normwt || method =="ML") return(as.numeric(stats::cov.wt(cbind(x), weights, method = method)$cov)) sw <-sum(weights)if (sw <=1) warning("only one effective observation; variance estimate undefined") xbar <-sum(weights * x)/swsum(weights * ((x - xbar)^2))/(sw -1)}# weighted - preweighted <- short_data %>%filter(year ==2013) %>%select(Treat, all_of(covs), set_wt) %>%group_by(Treat) %>%# get mean and standard deviationsummarize(across(all_of(covs), list(~weighted.mean(x = ., w = set_wt),~wtd.var(x = ., weights = set_wt, normwt =TRUE)))) %>%# pivot the data longerpivot_longer(cols =!Treat, names_to ="variable", values_to ="value") %>%# now make separate columns for treated and untreatedpivot_wider(names_from ="Treat", values_from ="value",names_prefix ="group") %>%# separate mean and standard deviationsextract(variable, into =c("variable", "fx"), "(.*)_(.*)") %>%pivot_wider(id_cols = variable,names_from = fx,values_from =c(group0, group1)) %>%# make normalized differencemutate(norm_diff = (group1_1 - group0_1)/sqrt((group1_2 + group0_2)/2)) %>%select(variable, group0_1, group1_1, norm_diff)# make the top panel (weighted and unweighted Pre)top_panel <-bind_cols(unweighted, weighted %>%select(-variable))# make the bottom panel, which is the same thing but with the difference in X between 2014 and 2013.# unweightedunweighted <- short_data %>%select(county_code, year, Treat, all_of(covs)) %>%arrange(county_code, year) %>%group_by(county_code, Treat) %>%summarize(across(all_of(covs), function(x) x[2] - x[1]) ) %>%ungroup() %>%select(-county_code) %>%group_by(Treat) %>%# get mean and standard deviationsummarize_all(list(mean, var)) %>%# pivot the data longerpivot_longer(cols =!Treat, names_to ="variable", values_to ="value") %>%# now make separate columns for treated and untreatedpivot_wider(names_from ="Treat", values_from ="value",names_prefix ="group") %>%# separate mean and standard deviationsextract(variable, into =c("variable", "fx"), "(.*)_(.*)") %>%pivot_wider(id_cols = variable,names_from = fx,values_from =c(group0, group1)) %>%# make normalized differencemutate(norm_diff = (group1_fn1 - group0_fn1)/sqrt((group1_fn2 + group0_fn2)/2)) %>%select(variable, group0_fn1, group1_fn1, norm_diff)# weightedweighted <- short_data %>%select(county_code, year, Treat, all_of(covs)) %>%arrange(county_code, year) %>%group_by(county_code, Treat) %>%summarize(across(all_of(covs), function(x) x[2] - x[1]) ) %>%ungroup() %>%left_join(short_data %>%filter(year ==2013) %>%select(county_code, set_wt),join_by(county_code)) %>%select(-county_code) %>%group_by(Treat) %>%# get mean and standard deviationsummarize(across(all_of(covs), list(~weighted.mean(x = ., w = set_wt),~wtd.var(x = ., weights = set_wt, normwt =TRUE)))) %>%# pivot the data longerpivot_longer(cols =!Treat, names_to ="variable", values_to ="value") %>%# now make separate columns for treated and untreatedpivot_wider(names_from ="Treat", values_from ="value",names_prefix ="group") %>%# separate mean and standard deviationsextract(variable, into =c("variable", "fx"), "(.*)_(.*)") %>%pivot_wider(id_cols = variable,names_from = fx,values_from =c(group0, group1)) %>%# make normalized differencemutate(norm_diff = (group1_1 - group0_1)/sqrt((group1_2 + group0_2)/2)) %>%select(variable, group0_1, group1_1, norm_diff)# make the bottom panelbottom_panel <-bind_cols(unweighted, weighted %>%select(-variable))# bind the two panelstable <-bind_rows(top_panel, bottom_panel) %>%# reformat all columns to two digitsmutate(across(-variable, \(x) scales::comma(x, accuracy =0.01)))# format Table 4 and exporttable %>%# change column namesmutate(variable =case_match(variable,"perc_female"~"% Female","perc_white"~"% White","perc_hispanic"~"% Hispanic","unemp_rate"~"Unemployment Rate","poverty_rate"~"Poverty Rate","median_income"~"Median Income")) %>%# format latex tablekable(col.names =c("Variable", "Non-Adopt", "Adopt", "Norm. Diff.", "Non-Adopt", "Adopt", "Norm. Diff."),align ='lcccccc',escape = T,booktabs = T,label ="cov_balance",caption ="Covariate Balance Statistics",linesep ="") %>%kable_styling(latex_options =c("scale_down", "hold_position")) %>%pack_rows("2013 Covariate Levels", 1, 6) %>%pack_rows("2014 - 2013 Covariate Differences", 7, 12) %>%add_header_above(c(" "=1, "Unweighted"=3, "Weighted"=3))
Covariate Balance Statistics
Unweighted
Weighted
Variable
Non-Adopt
Adopt
Norm. Diff.
Non-Adopt
Adopt
Norm. Diff.
2013 Covariate Levels
% Female
49.43
49.33
-0.03
50.48
50.07
-0.24
% White
81.64
90.48
0.59
77.91
79.54
0.11
% Hispanic
9.64
8.23
-0.10
17.01
18.86
0.11
Unemployment Rate
7.61
8.01
0.16
7.00
8.01
0.50
Poverty Rate
19.28
16.53
-0.42
17.24
15.29
-0.37
Median Income
43.04
47.97
0.43
49.31
57.86
0.68
2014 - 2013 Covariate Differences
% Female
-0.02
-0.02
0.00
0.02
0.01
-0.09
% White
-0.21
-0.21
0.01
-0.32
-0.33
-0.04
% Hispanic
0.20
0.21
0.04
0.25
0.33
0.29
Unemployment Rate
-1.16
-1.30
-0.21
-1.08
-1.36
-0.55
Poverty Rate
-0.55
-0.28
0.14
-0.41
-0.35
0.05
Median Income
0.98
1.11
0.06
1.10
1.74
0.32
Code
****************************************************************3. Table 4: Covariate-balance table* NB:the table construction is a lot of lines here...*************************************************************** *Read indata, keep 2013–2014 and 2014 expansion vs "non expansion" statesuse"did_jel_aca_replication_data", clearkeepifinlist(year, 2013, 2014) & inlist(yaca,2014,2020,2021,2023,.)gen Treat = (yaca==2014) // 1 = ACA in 2014gen Post = (year == 2014) // 1 = post-expansion year *local with covariate nameslocal covs perc_female perc_white perc_hispanic /// unemp_rate_pc poverty_rate median_income_k *2013 levels — unweighted means, SDs, and normalized diffpreservekeepifyear == 2013 *---- means and SDs in a single collapse -----------------------collapse (mean) m_perc_female = perc_female /// m_perc_white = perc_white /// m_perc_hispanic = perc_hispanic /// m_unemp_rate_pc = unemp_rate_pc /// m_poverty_rate = poverty_rate /// m_median_income_k = median_income_k /// (sd) sd_perc_female = perc_female /// sd_perc_white = perc_white /// sd_perc_hispanic = perc_hispanic /// sd_unemp_rate_pc = unemp_rate_pc /// sd_poverty_rate = poverty_rate /// sd_median_income_k = median_income_k, ///by(Treat)generatebyte id = 1 // same value for both rows (Treat 0/1) *---- reshape into one widerow -------------------------------------reshapewidem* sd_*, i(id) j(Treat)drop id // no longer needed *---- normalized differences ----------------------------------foreach x in`covs'{gen nd_`x' = (m_`x'1 - m_`x'0) / ///sqrt( (sd_`x'1^2 + sd_`x'0^2) / 2 ) }keepm* nd_* // single-row datasettempfile unwt_presave`unwt_pre'listrestore *2013 levels — weighted means, SDs, and normalized diffpreservekeepifyear == 2013 *---- means and SDs in a single collapse -----------------------collapse (mean) wm_perc_female = perc_female /// wm_perc_white = perc_white /// wm_perc_hispanic = perc_hispanic /// wm_unemp_rate_pc = unemp_rate_pc /// wm_poverty_rate = poverty_rate /// wm_median_income_k = median_income_k /// (sd) sd_perc_female = perc_female /// sd_perc_white = perc_white /// sd_perc_hispanic = perc_hispanic /// sd_unemp_rate_pc = unemp_rate_pc /// sd_poverty_rate = poverty_rate /// sd_median_income_k = median_income_k /// [aw = set_wt], ///by(Treat)generatebyte id = 1 // same value for both rows (Treat 0/1) *---- reshape into one widerow -------------------------------------reshapewide wm* sd_*, i(id) j(Treat)drop id // no longer needed *---- normalized differences ----------------------------------foreach x in`covs'{gen wnd_`x' = (wm_`x'1 - wm_`x'0) / ///sqrt( (sd_`x'1^2 + sd_`x'0^2) / 2 ) }keep wm* wnd_* // single-row datasettempfile wt_presave`wt_pre'listrestore * Differences - unweighted means, SDs, and normalized diffpreserve *create differencesforvar`covs': egen bX = total(X*(year==2013)), by(county_code)forvar`covs': gen d_X = X-bXkeepifyear == 2014 *---- means and SDs in a single collapse -----------------------collapse (mean) m_d_perc_female = d_perc_female /// m_d_perc_white = d_perc_white /// m_d_perc_hispanic = d_perc_hispanic /// m_d_unemp_rate_pc = d_unemp_rate_pc /// m_d_poverty_rate = d_poverty_rate /// m_d_median_income_k = d_median_income_k /// (sd) sd_perc_female = d_perc_female /// sd_perc_white = d_perc_white /// sd_perc_hispanic = d_perc_hispanic /// sd_unemp_rate_pc = d_unemp_rate_pc /// sd_poverty_rate = d_poverty_rate /// sd_median_income_k = d_median_income_k, ///by(Treat)generatebyte id = 1 // same value for both rows (Treat 0/1) *---- reshape into one widerow -------------------------------------reshapewidem* sd_*, i(id) j(Treat)drop id // no longer needed *---- normalized differences ----------------------------------foreach x in`covs'{gen nd_d_`x' = (m_d_`x'1 - m_d_`x'0) / ///sqrt( (sd_`x'1^2 + sd_`x'0^2) / 2 ) }keep m_d* nd_d* // single-row datasettempfile unwt_diffsave`unwt_diff'listrestore * Differences - weighted means, SDs, and normalized diffpreserve *create differencesforvar`covs': egen bX = total(X*(year==2013)), by(county_code)forvar`covs': gen d_X = X-bXkeepifyear == 2014 *---- means and SDs in a single collapse -----------------------collapse (mean) wm_d_perc_female = d_perc_female /// wm_d_perc_white = d_perc_white /// wm_d_perc_hispanic = d_perc_hispanic /// wm_d_unemp_rate_pc = d_unemp_rate_pc /// wm_d_poverty_rate = d_poverty_rate /// wm_d_median_income_k = d_median_income_k /// (sd) sd_perc_female = d_perc_female /// sd_perc_white = d_perc_white /// sd_perc_hispanic = d_perc_hispanic /// sd_unemp_rate_pc = d_unemp_rate_pc /// sd_poverty_rate = d_poverty_rate /// sd_median_income_k = d_median_income_k /// [aw = set_wt], ///by(Treat)generatebyte id = 1 // same value for both rows (Treat 0/1) *---- reshape into one widerow -------------------------------------reshapewide wm* sd_*, i(id) j(Treat)drop id // no longer needed *---- normalized differences ----------------------------------foreach x in`covs'{gen wnd_d_`x' = (wm_d_`x'1 - wm_d_`x'0) / ///sqrt( (sd_`x'1^2 + sd_`x'0^2) / 2 ) }keep wm* wnd_* // single-row datasettempfile wt_diffsave`wt_diff'listrestore **table creation *Assemble the four blocks into one matrixuse`unwt_pre', clearmerge 1:1 _nusing`wt_pre', nogen // adds mw* sw* wnd_*merge 1:1 _nusing`unwt_diff', nogen // adds md* nd_d*merge 1:1 _nusing`wt_diff', nogen // adds mwd* swd* wnd_d_*de,f * ---- row labels ----------------------------------------------------local rowlbl "% Female""% White""% Hispanic"///"Unemployment Rate""Poverty Rate""Median Income"local outrowslocal i = 1foreach v in`covs' {local outrows `"`outrows'`"`:word `i'of`rowlbl''"'"'local ++i }/* ---- build the 12 × 6 matrix ----------------------------------- */matrix T = J(12,6,.)localr 0foreach v oflocal covs {local ++r * rows 1–6 : 2013 levelsmatrix T[`r',1] = m_`v'0[1] // un-wtd mean, Non-adoptmatrix T[`r',2] = m_`v'1[1] // un-wtd mean, Adoptmatrix T[`r',3] = nd_`v'[1] // un-wtd norm-diffmatrix T[`r',4] = wm_`v'0[1] // wtd mean, Non-adoptmatrix T[`r',5] = wm_`v'1[1] // wtd mean, Adoptmatrix T[`r',6] = wnd_`v'[1] // wtd norm-diff }foreach v oflocal covs {local ++r * rows 7–12 : 2014–2013 long-differencesmatrix T[`r',1] = m_d_`v'0[1] // un-wtd ∆, Non-adoptmatrix T[`r',2] = m_d_`v'1[1] // un-wtd ∆, Adoptmatrix T[`r',3] = nd_d_`v'[1] // un-wtd norm-diffmatrix T[`r',4] = wm_d_`v'0[1] // **underscore added**matrix T[`r',5] = wm_d_`v'1[1] // **underscore added**matrix T[`r',6] = wnd_d_`v'[1] // wtd norm-diff }matrixrownames T = `outrows'`outrows'matrixcolnames T = "Non-Adopt""Adopt""Norm. Diff."///"Non-Adopt""Adopt""Norm. Diff."*--------------------------------------------------------------------* 5. Export to CSV instead of LaTeX*--------------------------------------------------------------------* Convert matrix T to a datasetclearsvmat T* Reorder so variable names come first* Rename columns to something clearrename T1 NonAdopt_unwtrename T2 Adopt_unwtrename T3 NormDiff_unwtrename T4 NonAdopt_wtrename T5 Adopt_wtrename T6 NormDiff_wt* After svmat/renamesforeach v ofvarlist NonAdopt_unwt Adopt_unwt NormDiff_unwt /// NonAdopt_wt Adopt_wt NormDiff_wt {replace`v' = round(`v', .01)tostring`v', replaceformat(%9.2f) force }* Save as CSVexport delimited using"../data/cov_balance.csv", replace
Table 5: Regression 2 \(\times\) 2 DiD with Covariates
In Table 5 we show how the 2 \(\times\) 2 DiD estimates change with the inclusion of covariates using linear regression. You can do this in two ways - either controlling for the 2013 levels of the covariates, or the change in the covariates between 2014 and 2013. Even in this simple setting the results differ considerably.
# make a table that includes the model 1) without covariates, 2) long regression with # 2013 covariates values, 3) long regression with *difference* in covariate values.reg_data_2013 <- short_data %>%# make long diff in ygroup_by(county_code) %>%summarize(long_y = crude_rate_20_64[which(year ==2014)] - crude_rate_20_64[which(year ==2013)]) %>%# merge in 2013 covariatesleft_join(short_data %>%filter(year ==2013) %>%select(county_code, state, Treat, set_wt, all_of(covs)), by ="county_code")# this dataset has the change in X between 2014 and 2013 as controlsreg_data_change <- short_data %>%# make long diff in ygroup_by(county_code) %>%summarize(long_y = crude_rate_20_64[which(year ==2014)] - crude_rate_20_64[which(year ==2013)]) %>%# merge in change in covariate valuesleft_join(short_data %>%group_by(county_code) %>%mutate(set_wt = set_wt[which(year ==2013)]) %>%group_by(county_code, state, Treat, set_wt) %>%summarize(across(all_of(covs), function(x) x[which(year ==2014)] - x[which(year ==2013)]) ), by ="county_code")# run the six models# first unweighted - long diff no covs, 2013 covs, then change in covsmod1 <-feols(long_y ~ Treat, data = reg_data_2013, cluster =~county_code)mod2 <-feols(long_y ~ Treat + .[covs], data = reg_data_2013, cluster =~county_code)mod3 <-feols(long_y ~ Treat + .[covs], data = reg_data_change, cluster =~county_code)# same thing but weightedmod4 <-feols(long_y ~ Treat, data = reg_data_2013, weights =~set_wt, cluster =~county_code)mod5 <-feols(long_y ~ Treat + .[covs], data = reg_data_2013, weights =~set_wt, cluster =~county_code)mod6 <-feols(long_y ~ Treat + .[covs], data = reg_data_change, weights =~set_wt, cluster =~county_code)# export table 3modelsummary(list("(1)"= mod1, "(2)"= mod2, "(3)"= mod3, "(4)"= mod4, "(5)"= mod5, "(6)"= mod6),coef_map = dict, coef_omit ="(Intercept)", escape =FALSE, gof_map =NA,fmt =2, estimate ="{estimate}",title ="Regression 2 X 2 DiD with Covariates") %>%add_header_above(c(" "=1, "No Covs"=1, "$$X_{i, t = 2013}$$"=1, "$$X_{i, t}$$"=1,"No Covs"=1, "$$X_{i, t = 2013}$$"=1, "$$X_{i, t}$$"=1), escape =FALSE, extra_css ='vertical-align: middle !important;', line = F) %>%add_header_above(c(" "=1, "Unweighted"=3, "Weighted"=3), escape =FALSE)
Table 6: Outcome Regression and Propensity Score Models
We next show how to adjust for covariates in the 2 \(\times\) 2 setting in a disciplined manner. Table 6 reports the outcome regression and propensity score model results that feed into the doubly-robust estimator from Callaway and Sant’Anna (2021).
## Next we make Table 6 which shows the pscore and outcome models that feed into CS.# These are done by regressing long y on the covariates for the untreated units (the outcome model)# and regressing the expansion indicator on the covariates for 2013 data (the propensity model) # We do it with and without weightsmod1 <-feols(long_y ~ .[covs], data = reg_data_2013 %>%filter(Treat ==0), cluster =~county_code)mod2 <-feglm(Treat ~ .[covs], data = short_data %>%filter(year ==2013), family ="binomial", vcov ="hetero")mod3 <-feols(long_y ~ .[covs], data = reg_data_2013 %>%filter(Treat ==0), cluster =~county_code, weights =~set_wt)mod4 <-feglm(Treat ~ .[covs], data = short_data %>%filter(year ==2013), family ="binomial", vcov ="hetero", weights =~set_wt)# This is the dictionary to map variables to data labels for the tabledict <-c("(Intercept)"="Constant", "perc_female"="% Female","perc_white"="% White","perc_hispanic"="% Hispanic","crude_rate_20_64"="Crude Mortality Rate","unemp_rate"="Unemployment Rate","poverty_rate"="Poverty Rate","median_income"="Median Income")# export table 3modelsummary(list("(1)"= mod1, "(2)"= mod2, "(3)"= mod3, "(4)"= mod4),coef_map = dict, escape =FALSE, gof_map =NA,fmt =2, estimate ="{estimate}",title ="Outcome Regression and Propensity Score Models") %>%add_header_above(c(" "=1, "Regression"=1, "Propensity Score"=1, "Regression"=1, "Propensity Score"=1), escape =FALSE, extra_css ='vertical-align: middle !important;', line = F) %>%add_header_above(c(" "=1, "Unweighted"=2, "Weighted"=2), escape =FALSE)
Table 7 shows how to calculate the DiD estimate using the estimators in Sant’Anna and Zhao (2020). The paper describes three different estimation methods: outcome regression, inverse-probability weighted (IPW), and doubly robust models.
# finally, get the same estimates using Sant'Anna and Zhao (2020) and Callaway and Sant'Anna (2021) using the # three adjustment methods# You need to reformat the group variable (untreated = 0) and the unit ID variable needs to be numericdata_cs <- short_data %>%mutate(treat_year =if_else(yaca ==2014&!is.na(yaca), 2014, 0),county_code =as.numeric(county_code))# create a function to run the CS estimator allowing the # estimation method to differrun_cs <-function(method, wt) {# estimate the att_gt atts <-att_gt(yname ="crude_rate_20_64",tname ="year",idname ="county_code",gname ="treat_year",xformla =as.formula(paste("~", paste(covs, collapse ="+"))),data = data_cs,panel =TRUE,control_group ="nevertreated",bstrap =TRUE,cband =TRUE,est_method = method,weightsname = wt,# faster_mode = TRUE,base_period ="universal",biters =25000 )# aggregate estimates aggte(atts, na.rm =TRUE, biters =25000) %>% broom::tidy() %>%filter(group ==2014) %>%mutate(type = method)}# run it three ways - regression adjustment, IPW, and doubly robust# these are unweightedests <-map_dfr(c("reg", "ipw", "dr"), run_cs, wt =NULL)# make a table - Panel A is with unweighted modelstablea <- ests %>%# reformat the dataselect(type, estimate, std.error) %>%# format the estimate and std errormutate(estimate = scales::number(estimate, accuracy =0.01),std.error =paste0("(", scales::number(std.error, accuracy =0.01), ")")) %>%# reshape the datapivot_longer(cols =-type,names_to ="statistic",values_to ="value") %>%pivot_wider(id_cols ="statistic",names_from ="type",values_from ="value") %>%mutate(statistic =if_else(statistic =="estimate", "Medicaid Expansion", " "))# make weighted results for the three CS models.ests_w <-map_dfr(c("reg", "ipw", "dr"), run_cs, wt ="set_wt")# Panel B is the same thing but with weighted resultstableb <- ests_w %>%select(type, estimate, std.error) %>%mutate(estimate = scales::number(estimate, accuracy =0.01),std.error =paste0("(", scales::number(std.error, accuracy =0.01), ")")) %>%pivot_longer(cols =-type,names_to ="statistic",values_to ="value") %>%pivot_wider(id_cols ="statistic",names_from ="type",values_from ="value") %>%select(reg, ipw, dr)# Combine Panels A and B together and format the tablebind_cols(tablea, tableb) %>%kable(col.names =c(" ", "Regression", "IPW", "Doubly Robust","Regression", "IPW", "Doubly Robust"),align ='lcccccc',escape = F,booktabs = T,label ="2x2_csdid",caption ="Callaway and Sant'Anna (2021) DiD",linesep ="") %>%kable_styling(latex_options =c("hold_position")) %>%add_header_above(c(" "=1, "Unweighted"=3, "Weighted"=3))
Callaway and Sant'Anna (2021) DiD
Unweighted
Weighted
Regression
IPW
Doubly Robust
Regression
IPW
Doubly Robust
Medicaid Expansion
-1.62
-0.86
-1.23
-3.46
-3.84
-3.76
(4.68)
(4.61)
(4.94)
(2.39)
(3.39)
(3.24)
Code
**************************************************************** Table 7: 2x2 DiD with covariates (Sant'Anna and Zhao 2020)*************************************************************** *read datain again and restrict to 2013/2014use"did_jel_aca_replication_data", clearkeepifinlist(year, 2013, 2014) & inlist(yaca,2014,2020,2021,2023,.)gen Treat = (yaca==2014) // 1 = ACA in 2014gen Post = (year == 2014) // 1 = post-expansion year *local with covariate nameslocal covs perc_female perc_white perc_hispanic /// unemp_rate_pc poverty_rate median_income_k *create baseline covariates (drdid would dothis but we do it explicitly here) *drop the time-varying versionof the covariate; not used hereforeach x in`covs'{egen`x'2013 =total(`x'*(year==2013)),by(county_code)drop`x' } * rename the baseline covariates just created rename (perc_female2013 perc_white2013 perc_hispanic2013 /// unemp_rate_pc2013 poverty_rate2013 median_income_k2013) /// (perc_female perc_white perc_hispanic /// unemp_rate_pc poverty_rate median_income_k) * Set panel xtset county_code year * Unweighted DRDID Estimation *request all estimators in one shot (we only usereg, stdipw, and dripw) drdid crude_rate_20_64 `covs', /// ivar(county_code) time(year) tr(Treat) ///all/// pscoretrim(0.995) /// wboot(reps(25000) rseed(20240924) wbtype(rademacher)) ///cluster(county_code) * Extract point estimatesscalarreg = e(b)[1,3]scalar ipw = e(b)[1,5]scalar dripw = e(b)[1,1] * Extract standard errorsscalar se_reg = sqrt(e(V)[3,3])scalar se_ipw = sqrt(e(V)[5,5])scalar se_dripw = sqrt(e(V)[1,1]) * Format for LaTeX exportlocal reg_str : display %6.2f reglocal ipw_str : display %6.2f ipwlocal dripw_str : display %6.2f dripwlocal se_reg_str : display %6.2f se_reglocal se_ipw_str : display %6.2f se_ipwlocal se_dripw_str : display %6.2f se_dripw * Weighted DRDID Estimation *request all estimators in one shot (we only usereg, stdipw, and dripw) drdid crude_rate_20_64 `covs' [iweight = set_wt], /// ivar(county_code) time(year) tr(Treat) ///all/// pscoretrim(0.995) /// wboot(reps(25000) rseed(20240924) wbtype(rademacher)) ///cluster(county_code) * Extract point estimatesscalar reg_w = e(b)[1,3]scalar ipw_w = e(b)[1,5]scalar dripw_w = e(b)[1,1]* Extract standard errorsscalar se_reg_w = sqrt(e(V)[3,3])scalar se_ipw_w = sqrt(e(V)[5,5])scalar se_dripw_w = sqrt(e(V)[1,1])*Make matrixof the results of interest matrix results_wide = ( ///reg , ipw , dripw , reg_w , ipw_w , dripw_w \ /// se_reg , se_ipw , se_dripw , se_reg_w , se_ipw_w , se_dripw_w )matrixrownames results_wide = b sematrixcolnames results_wide = Unw_REG Unw_IPW Unw_DRIPW W_REG W_IPW W_DRIPWmatrixlist results_wideclearsvmat results_wide, names(col)* add row labels b/segen stat = ""replace stat = "Medicaid Expansion"in 1replace stat = ""in 2order stat Unw_REG Unw_IPW Unw_DRIPW W_REG W_IPW W_DRIPWexport delimited using"../data/reg_t7.csv", replace
Figure 1: Distribution of Propensity Scores
Figure 1 plots the distribution of the propensity scores from the underlying propensity model for the unweighted and weighted models.
# Finally, we make Figure 1 which reports the distribution of the propensity scores between # expansion and non-expansion counties.# first add in propensity scores to the dataplot_data <-bind_rows( short_data %>%filter(year ==2013) %>%mutate(propensity =predict(mod2, ., type ="response"),mod ="Unweighted",wt =1), short_data %>%filter(year ==2013) %>%mutate(propensity =predict(mod4, ., type ="response"),mod ="Weighted", wt = set_wt) )# plot the propensity score distributions by groupplot_data %>%mutate(expand =if_else(Treat ==1, "Expansion Counties", "Non-Expansion Counties")) %>%ggplot(aes(x = propensity, y =after_stat(density), group = expand, color = expand, weight = wt)) +geom_histogram(fill ="white", position ="identity", linewidth =1, alpha =0.5) +facet_wrap(~mod) +scale_color_brewer(palette ='Set1') +labs(x ="Propensity Score", y ="Density") +theme(legend.position ='bottom',axis.title.y =element_text(hjust =0.5, vjust =0.5, angle =360),strip.text =element_text(size =14),panel.border =element_rect(color ="black", fill =NA, linewidth =1),legend.title =element_blank(),axis.title =element_text(size =12),axis.text =element_text(size =10))
Code
*************************************************** Figure 1: Distribution of Propensity Scores************************************************* *read datain again and restrict to 2013/2014use"did_jel_aca_replication_data", clearkeepifinlist(year, 2013, 2014) & inlist(yaca,2014,2020,2021,2023,.)gen Treat = (yaca==2014) // 1 = ACA in 2014gen Post = (year == 2014) // 1 = post-expansion year *local with covariate nameslocal covs perc_female perc_white perc_hispanic /// unemp_rate_pc poverty_rate median_income_k * Keep only relevant variableskeep county_code year Treat set_wt crude_rate_20_64 `covs' * Reshape everything (outcome + covariates) at oncereshapewide crude_rate_20_64 `covs', i(county_code) j(year) * Generate change in covariatesforeach v oflocal covs {gen d_`v' = `v'2014 - `v'2013 } * Generate outcome: change in crude mortalitygen long_y = crude_rate_20_642014 - crude_rate_20_642013 * Drop missing outcomedropifmissing(long_y) *Unweighted p-score distributions eststo ps1: logit Treat /// perc_female2013 perc_white2013 perc_hispanic2013 /// unemp_rate_pc2013 poverty_rate2013 median_income_k2013, ///vce(cluster county_code) *estimatesuse ps1predict pscore_unw * Expansion counties summ pscore_unw ifinlist(Treat,0,1)localmin = r(min)localmax = r(max)localw = (`max' - `min')/30locals = floor(`min'/`w')*`w'// align start to a bin edgedi`w'di`s'twowayhistogram pscore_unw if Treat == 1 , width(`w') start(`s') fcolor(none) lcolor(red) || ///histogram pscore_unw if Treat == 0 , width(`w') start(`s') fcolor(none) lcolor(blue) ///ytitle("Density",angle(horizontal)) xtitle("") title("Unweighted") legend(off) ylabel(0(1)4.5) xlabel(,nogrid)graphsave"pscore_uw.gph", replace *Weighted p-score distributions eststo ps2: logit Treat /// perc_female2013 perc_white2013 perc_hispanic2013 /// unemp_rate_pc2013 poverty_rate2013 median_income_k2013, /// [iw = set_wt], vce(cluster county_code) *estimatesuse ps2predict pscore_w * Define groupforlegendgengroup = cond(Treat == 1, "Expansion", "Non-Expansion") * Expansion counties summ pscore_w ifinlist(Treat,0,1)localmin = r(min)localmax = r(max)localw = (`max' - `min')/30locals = floor(`min'/`w')*`w'// align start to a bin edgetwowayhistogram pscore_w if Treat == 1 [fw=set_wt], width(`w') start(`s') fcolor(none) lcolor(red) || ///histogram pscore_w if Treat == 0 [fw=set_wt], width(`w') start(`s') fcolor(none) lcolor(blue) ///ytitle("") xtitle("Propensity Score") title("Weighted") legend( rows(1)) ylabel(0(1)4.5) xlabel(, nogrid)graphsave"pscore_w.gph", replace *combine graphs and use only one legend and y-axis title **NB: differences between the Stata and R histograms are purely down to *some arcane disagreement between histogram and ggplot; as can be seen from *the actual p-scoremodelestimates, the pscores themselves are identical grc1leg2 pscore_uw.gph pscore_w.gph, ///title("") xtob1title xtitlefrom("pscore_w.gph") /// legendfrom(pscore_w.gph) position(6) ring(2) ///cols(2) imargin(4 4 4 4)graphdisplay, xsize(10) ysize(5)
2 \(\times\) T DiD setups
In the next section we show how to estimate DiD when you have more than two time periods, what we call the 2 \(\times\) T setting.
Figure 2: County Mortality Trends by Expansion Decision
In Figure 2 we plot the weighted average county mortality rates over our sample period (2009-2019) for our two groups: those who expand in 2014 and those who have not expanded by 2019.
# set seed for reproducibilityset.seed(20240924)# load cleaned datamydata <-read_csv(here::here("data", "county_mortality_data.csv")) %>%# make state the abbreviationmutate(state =str_sub(county, nchar(county) -1, nchar(county))) %>%# drop DC and pre-2014 adoption statesfilter(!(state %in%c("DC", "DE", "MA", "NY", "VT"))) %>%# drop states that adopt between 2014 and 2019filter(yaca ==2014|is.na(yaca) | yaca >2019)# set the covariates that we're going to use.covs <-c("perc_female","perc_white", "perc_hispanic", "unemp_rate", "poverty_rate", "median_income")## Clean data and add in covariatesmydata <- mydata %>%# make variablesmutate(perc_white = population_20_64_white / population_20_64 *100,perc_hispanic = population_20_64_hispanic / population_20_64 *100,perc_female = population_20_64_female/ population_20_64 *100,unemp_rate = unemp_rate *100,median_income = median_income /1000) %>%# keep just subset of variables that we will use laterselect(state, county, county_code, year, population_20_64, yaca,starts_with("perc_"), crude_rate_20_64, all_of(covs))# keep only counties with full observations for outcome and covariates in 2013 and 2014mydata <- mydata %>%# allow the aca expansion variable to be missingdrop_na(!yaca) %>%group_by(county_code) %>%# need full covariates for 2013 and 2014filter(length(which(year ==2013| year ==2014)) ==2) %>%ungroup()# finally, keep only counties with full mortality data for 2009 to 2019mydata <- mydata %>%group_by(county_code) %>%drop_na(crude_rate_20_64) %>%filter(n() ==11)# make ACA and post variables, as well as the weighting variable which is the relevant county # population in 2013mydata <- mydata %>%mutate(Treat =if_else(yaca ==2014&!is.na(yaca), 1, 0),Post =if_else(year >=2014, 1, 0)) %>%group_by(county_code) %>%# make a variable with population weight in 2013mutate(set_wt = population_20_64[which(year ==2013)]) %>%ungroup()# make time series plotmydata %>%mutate(expand =if_else(Treat ==1, "Expansion Counties", "Non-Expansion Counties")) %>%# get the weighted averages by expansion type and yeargroup_by(expand, year) %>%summarize(mortality =weighted.mean(crude_rate_20_64, set_wt)) %>%# plotggplot(aes(x = year, y = mortality, group = expand, color = expand)) +geom_point(size =2) +geom_line(linewidth =1) +geom_vline(xintercept =2014, linetype ="dashed") +scale_color_brewer(palette ='Set1') +scale_x_continuous(breaks =2009:2019) +labs(x ="", y ="Mortality (20-64) \n Per 100,000") +theme(legend.position ='bottom',axis.title.y =element_text(hjust =0.5, vjust =0.5, angle =360),strip.text =element_text(size =14),panel.border =element_rect(color ="black", fill =NA, linewidth =1),legend.title =element_blank(),axis.title =element_text(size =12),axis.text =element_text(size =10))
# Make event study plots# first add in the variables needed to run the CS model - timing group variable (treat_year)# and county code needs to be numericmydata <- mydata %>%mutate(treat_year =if_else(yaca ==2014&!is.na(yaca), 2014, 0),county_code =as.numeric(county_code),time_to_treat =if_else(Treat ==1, year - treat_year, 0))# estimate CS models# get the individual ATT(g,t) estimatesmod <- did::att_gt(yname ="crude_rate_20_64",tname ="year",idname ="county_code",gname ="treat_year",xformla =NULL,data = mydata,panel =TRUE,control_group ="nevertreated",bstrap =TRUE,cband =TRUE,est_method ="reg",weightsname ="set_wt",# faster_mode = TRUE,base_period ="universal",biters =25000)# confirm you get the same with OLS (standard errors differ because of bootstrap only)cs_out <- modols_out <-feols(crude_rate_20_64 ~i(time_to_treat, Treat, ref =-1) | county_code + year, data = mydata, cluster =~county_code, weights =~set_wt)# get the Rambachan/Roth confidence interval# first save our aggregate event study estimateses <- did::aggte( mod,type ="dynamic",min_e =-5,max_e =0,bstrap =TRUE,biters =25000)# source the honest_did code made to fit our estimatessource(here::here('scripts/R/5_honestdid.R'))# get robust CIrobust_ci <-honest_did(es = es, type ="relative_magnitude")$robust_ci# get the aggregate value for e = 0:5agg <- mod %>%aggte(type ="dynamic", min_e =0, max_e =5,bstrap =TRUE, biters =25000)# make labels for the estimate, std error, and confidence intervalslabel1 <-paste0("Estimate~(e %in% '{0, 5}')~'='~'", scales::number(agg$overall.att, accuracy =0.01), "'")label2 <-paste0("Std. Error = ", scales::number(agg$overall.se, 0.01), " \n","Conf. Int = [", scales::number(agg$overall.att -1.96*agg$overall.se, 0.01), ", ", scales::number(agg$overall.att +1.96*agg$overall.se, 0.01), "]")# make event study plot for Figure 3mod %>%# Aggregate in event time ("dynamic")aggte(type ="dynamic", biters =25000) %>%# get the two confidence intervals broom::tidy(conf.int =TRUE) %>%# plotggplot(aes(x = event.time, y = estimate)) +geom_linerange(aes(ymin = conf.low, ymax = conf.high), color ="darkred") +geom_linerange(aes(ymin = point.conf.low, ymax = point.conf.high)) +geom_point() +geom_vline(xintercept =-1, linetype ="dashed") +geom_hline(yintercept =0, linetype ="dashed") +scale_x_continuous(breaks =-5:5) +annotate("text", x =3, y =11, label = label1, parse =TRUE) +annotate("text", x =3, y =9, label = label2) +labs(x ="Event Time", y ="Treatment Effect \n Mortality Per 100,000") +theme(axis.title.y =element_text(hjust =0.5, vjust =0.5, angle =360),strip.text =element_text(size =14),panel.border =element_rect(color ="black", fill =NA, linewidth =1),legend.title =element_blank(),axis.title =element_text(size =12),axis.text =element_text(size =10))
Code
****************************************************************2. Figure 3: 2xT event-study with no covariates*************************************************************** *read datain again and restrict to 2013/2014use"did_jel_aca_replication_data", clearkeepifinlist(yaca,2014,2020,2021,2023,.) *Use csdid for simple ES estimation (uniform and non-uniform CIs); use long2!! csdid crude_rate_20_64 [iw=set_wt], /// ivar(county_code) time(year) gvar(treat_year) /// long2 /// wboot(reps(25000) rseed(20240924) wbtype(rademacher)) /// agg(event) *you do actually have to grab the uniform CIs from the posted outputpreserveclearmatrix T = r(table)local cn : colnames Tmatrix llu = T[5,1..colsof(T)]matrix ulu = T[6,1..colsof(T)]matrix uci = (llu', ulu')matrixcolnames uci = uci_lower uci_uppersvmat uci, names(col)genvar = ""local i = 0foreachroflocal cn{local ++ireplacevar = "`r'"in`i' }save"testo", replacerestore *save the posted output to a dataset and merge back on the uniform CIs regsave, cimerge 1:1 varusing"testo"keepif_merge==3drop_merge *get average post coefficient, standard error, and CIquisum coef ifvar=="Post_avg"local postcoef : display %03.2f r(mean)quisum stderr ifvar=="Post_avg"local postse : display %03.2f r(mean)local postlci : display %03.2f `postcoef'-1.96*`postse'local postuci : display %03.2f `postcoef'+1.96*`postse' *now only keep the event-study estimateskeepifsubstr(var,1,1)=="T"keep coef ci* uci* var *extract timing components from event aggregation notation in csdidgen pre = substr(var,2,1)=="m"genpost = substr(var,2,1)=="p"gene = real(substr(var,3,.))replacee = -eif pre *add omitted periodlocalobs = _N+1setobs`obs'replace coef = 0 in`obs'replacee = -1 in`obs' *plot the ATT(g,t) for g=2014twoway/// (rcap uci_lower uci_upper e, /// lcolor(red*1.2) msize(0)) /// (rcap ci_lower ci_upper e, /// lcolor(black) msize(0)) /// (scatter coef e, /// msym(O) mcolor(black) /// ), ///legend(off) ///xlabel(-5(1)5, nogrid) ///ylabel(-5(5)10) /// yline(0, lpattern(dash)) ///xline(-1, lpattern(dash)) ///xtitle("Event-Time") ///ytitle("Treatment Effect""Mortality Per 100,000", orientation(horizontal) size(small)) ///text(10.5 3 "Estimate e ∈ {0,5}=`postcoef'", size(small)) ///text(9 3 "Std. Error = `postse'""Conf. Int = [`postlci',`postuci']", size(small)) ///xsize(10) ysize(5)
Rambachan & Roth (2023) Sensitivity Analysis
In the text surrounding the \(2 \times T\) event study, we also discuss how you can use the analysis developed in Rambachan and Roth (2023) to test the sensitivity of the ATT estimates to violations of the parallel trends assumption. In particular, we focus on their “relative magnitude” analysis, wherein we assume that any parallel trends violations in the post-period are no larger than the largest violation in the pre-period. In the code below we show how to estimate the credible interval under this bounding approach for \(ATT(2014)\) - or the treatment effect in the first year of treatment for those counties that adopted Medicaid expansion.
# get robust CIrobust_ci %>%# keep just relative magnitude = 1filter(Mbar ==1) %>%# drop extraneous variablesselect(lb, ub, Mbar) %>%# roundmutate_at(c("lb", "ub"), scales::number, accuracy =0.01) %>%# make tablekable(col.names =c("lower_bound", "upper_bound", "relative_magnitude"),align ='c', booktabs = T) %>%kable_styling()
lower_bound
upper_bound
relative_magnitude
-11.13
5.11
1
Code
*************************************************1. read datain again and restrict to 2013/2014************************************************use"did_jel_aca_replication_data", clearkeepifinlist(yaca,2014,2020,2021,2023,.)*************************************************Estimate unconditional csdid version 1.81 (Oct 2025)************************************************ csdid crude_rate_20_64 [iw=set_wt], /// ivar(county_code) time(year) gvar(treat_year) /// long2 /// never /// wboot(reps(25000) rseed(20240924) wbtype(rademacher)) /// agg(event)estat event, window(-5,0) post*************************************************Honest DiD, relative magnitude only using M=1 fore=0************************************************ honestdid, type(relative_magnitude) pre(3/6) post(7) mvec(1)exit
Figure 4: \(2 \times T\) Event Study With Covariates
In Figure 4 we show the \(2 \times T\) event studies with covariates using the Callaway and Sant’Anna (2021) estimators - i.e. using outcome regression, IPW, and doubly-robust estimators.
# set seed for reproducibilityset.seed(20240924)# load cleaned datamydata <-read_csv(here::here("data", "county_mortality_data.csv")) %>%# make state the abbreviationmutate(state =str_sub(county, nchar(county) -1, nchar(county))) %>%# drop DC and pre-2014 adoption statesfilter(!(state %in%c("DC", "DE", "MA", "NY", "VT"))) # set the covariates that we're going to use.covs <-c("perc_female","perc_white", "perc_hispanic", "unemp_rate", "poverty_rate", "median_income")## Clean data and add in covariatesmydata <- mydata %>%# make variablesmutate(perc_white = population_20_64_white / population_20_64 *100,perc_hispanic = population_20_64_hispanic / population_20_64 *100,perc_female = population_20_64_female/ population_20_64 *100,unemp_rate = unemp_rate *100,median_income = median_income /1000) %>%# keep just subset of variables that we will use laterselect(state, county, county_code, year, population_20_64, yaca,starts_with("perc_"), crude_rate_20_64, all_of(covs))# keep only counties with full observations for outcome and covariates in 2013 and 2014mydata <- mydata %>%# allow the aca expansion variable to be missingdrop_na(!yaca) %>%group_by(county_code) %>%# need full covariates for 2013 and 2014filter(length(which(year ==2013| year ==2014)) ==2) %>%ungroup()# finally, keep only counties with full mortality data for 2009 to 2019mydata <- mydata %>%group_by(county_code) %>%drop_na(crude_rate_20_64) %>%filter(n() ==11)# make smaller datasetmydata <- mydata %>%mutate(Treat =if_else(!is.na(yaca) & yaca <=2019, 1, 0),treat_year =if_else(!is.na(yaca) & yaca <=2019, yaca, 0),Post =if_else(year >=2014, 1, 0)) %>%group_by(county_code) %>%# make a variable with population weight in 2013mutate(set_wt = population_20_64[which(year ==2013)]) %>%ungroup()# make time series plot by timing group (Figure 5)mydata %>%# identify the groupsmutate(treat_year =if_else(treat_year ==0, "Non-Expansion Counties", as.character(treat_year))) %>%# get weighted average mortality by gtiming-group and yeargroup_by(treat_year, year) %>%summarize(mortality =weighted.mean(crude_rate_20_64, set_wt)) %>%ggplot(aes(x = year, y = mortality, group =as.factor(treat_year), color =as.factor(treat_year))) +geom_point(size =2) +geom_line(linewidth =1) +scale_color_manual(values =c("#7C7189", "#D04E59", "#BC8E7D", "#2F3D70", "#CABEE9")) +scale_x_continuous(breaks =2009:2019) +labs(x ="", y ="Mortality (20-64) \n Per 100,000") +theme(legend.position ='bottom',axis.title.y =element_text(hjust =0.5, vjust =0.5, angle =360),strip.text =element_text(size =14),panel.border =element_rect(color ="black", fill =NA, linewidth =1),legend.title =element_blank(),axis.title =element_text(size =12),axis.text =element_text(size =10))
With the estimator from Callaway & Sant’Anna (2021) we can estimate the group-time average treatment effects - what we call ATT(g, t) - for each group and time period. We plot these cohort-specific treatment effects in Figure 6.
# Make a different plot that reports the ATT(g,t)s in relative timemod_no_x %>%# get the estimates and CIs broom::tidy() %>%# define relative time for each groupmutate(rel_time = time - group) %>%# plotggplot(aes(x = rel_time, y = estimate, group =as.factor(group), color =as.factor(group))) +geom_point(size =2) +geom_line(linewidth =1) +geom_vline(xintercept =-1, linetype ="dashed", linewidth =1) +scale_color_manual(values =c("#7C7189", "#D04E59", "#BC8E7D", "#2F3D70")) +scale_x_continuous(breaks =-10:5) +labs(x ="", y ="Treatment Effect \n Mortality Per 100,000") +theme(legend.position ='bottom',axis.title.y =element_text(hjust =0.5, vjust =0.5, angle =360),strip.text =element_text(size =14),panel.border =element_rect(color ="black", fill =NA, linewidth =1),legend.title =element_blank(),axis.title =element_text(size =12),axis.text =element_text(size =10))
Code
***************************************************************************3. Figure 7: ATT(g,t) in event-time************************************************************************** *read datainuse"did_jel_aca_replication_data", clear *Estimate group × time effects csdid crude_rate [iw=set_wt], /// ivar(county_code) /// time(year) /// gvar(treat_year) /// long2 /// notyet /// wboot(reps(25000) rseed(20240924) wbtype(rademacher)) *adding the omitted category to every g is very annoying ni the matrix style, so do the graphs using regsave regsave *extract timing components from csdidgen g = real(substr(var,2,4))gen t1 = real(substr(var,9,4)) gen t2 = real(substr(var,14,4)) gen t = (t1)*(t2==g-1) + (t2)*(t1==g-1) *add omitted period preservekeep gduplicatesdropgen t = g-1gen coef = 0save testo, replacerestoreappendusing testo *make CIsgen lci = coef - 1.96*stderrgen uci = coef + 1.96*stderr keep coef lci uci g tsort g tgene = t-gkeep g e coefkeepif g<.reshapewide coef, i(e) j(g)twoway///scatter coef2014 coef2015 coef2016 coef2019 e, /// msym(o o o o) mcolor(gray*1.2 red*1.5 brownnavy) /// c(llll) lcolor(gray*1.2 red*1.5 brownnavy) lwidth(medthickmedthickmedthickmedthick) ///legend(order(1 "2014" 2 "2015" 3 "2016" 4 "2019") rows(1) pos(6) ring(2) size(small)) ///xlabel(-10(1)5, nogrid) ///ylabel(-20(10)20) /// xline(-1, lpattern(dash)) ///title("") ///xtitle("") ///ytitle() ///xsize(10) ysize(5)
Figure 8: G \(\times\) T Event Study Without Covariates
Finally, we demonstrate how to estimate DiD in the \(G \times T\) setting by leverage treatment timing and not-yet-treated groups for identification using the doubly-robust method from Callaway and Sant’Anna (2021). First, in Figure 8 we show the DiD event study estimates without covariates.
# Now re-estimate and plot the same GXT estimates with covariates# set.seed(20240924)mod_with_x <-att_gt(yname ="crude_rate_20_64",tname ="year",idname ="county_code",gname ="treat_year",xformla =as.formula(paste("~", paste(covs, collapse ="+"))),data = mydata,panel =TRUE,control_group ="notyettreated",bstrap =TRUE,biters =25000,cband =TRUE,est_method ="dr",weightsname ="set_wt",# faster_mode = TRUE,base_period ="universal")# get the aggregate value for e = 0:5agg <- mod_with_x %>%aggte(type ="dynamic", min_e =0, max_e =5,bstrap =TRUE, biters =25000)# make labels for the estimate, std error, and confidence intervalslabel1 <-paste0("Estimate~(e %in% '{0, 5}')~'='~'", scales::number(agg$overall.att, accuracy =0.01), "'")label2 <-paste0("Std. Error = ", scales::number(agg$overall.se, 0.01), " \n","Conf. Int = [", scales::number(agg$overall.att -1.96*agg$overall.se, 0.01), ", ", scales::number(agg$overall.att +1.96*agg$overall.se, 0.01), "]")# make event study plot for GXT with covariatesmod_with_x %>%# aggregate to event time (dynamic)aggte(type ="dynamic") %>% broom::tidy(conf.int =TRUE) %>%# filter yearsfilter(event.time %>%between(-5, 5)) %>%# plotggplot(aes(x = event.time, y = estimate)) +geom_linerange(aes(ymin = conf.low, ymax = conf.high), color ="darkred") +geom_linerange(aes(ymin = point.conf.low, ymax = point.conf.high)) +geom_point() +geom_vline(xintercept =-1, linetype ="dashed") +geom_hline(yintercept =0, linetype ="dashed") +scale_x_continuous(breaks =-5:5) +annotate("text", x =3, y =18, label = label1, parse =TRUE) +annotate("text", x =3, y =15, label = label2) +labs(x ="Event Time", y ="Treatment Effect \n Mortality Per 100,000") +theme(axis.title.y =element_text(hjust =0.5, vjust =0.5, angle =360),strip.text =element_text(size =14),panel.border =element_rect(color ="black", fill =NA, linewidth =1),legend.title =element_blank(),axis.title =element_text(size =12),axis.text =element_text(size =10))
Code
/**************************************************************************5. Figure 9: Event study with covariates**************************************************************************/ *read datainuse"did_jel_aca_replication_data", clear *local with covariate nameslocal covs perc_female perc_white perc_hispanic /// unemp_rate_pc poverty_rate median_income_k *Use csdid for simple ES estimation (uniform and non-uniform CIs); use long2!! csdid crude_rate_20_64 `covs' [iw=set_wt], /// ivar(county_code) time(year) gvar(treat_year) /// method(dripw) /// long2 /// notyet /// pscoretrim(0.995) /// wboot(reps(25000) rseed(20240924) wbtype(rademacher)) /// agg(event) *you do actually have to grab the uniform CIs from the posted outputpreserveclearmatrix T = r(table)local cn : colnames Tmatrix llu = T[5,1..colsof(T)]matrix ulu = T[6,1..colsof(T)]matrix uci = (llu', ulu')matrixcolnames uci = uci_lower uci_uppersvmat uci, names(col)genvar = ""local i = 0foreachroflocal cn{local ++ireplacevar = "`r'"in`i' }save"testo", replacerestore *save the posted output to a dataset and merge back on the uniform CIs regsave, cimerge 1:1 varusing"testo"keepif_merge==3drop_merge *get average post coefficient, standard error, and CIquisum coef ifvar=="Post_avg"local postcoef : display %03.2f r(mean)quisum stderr ifvar=="Post_avg"local postse : display %03.2f r(mean)local postlci : display %03.2f `postcoef'-1.96*`postse'local postuci : display %03.2f `postcoef'+1.96*`postse' *now only keep the event-study estimateskeepifsubstr(var,1,1)=="T"keep coef ci* uci* var *extract timing components from event aggregation notation in csdidgen pre = substr(var,2,1)=="m"genpost = substr(var,2,1)=="p"gene = real(substr(var,3,.))replacee = -eif pre *add omitted periodlocalobs = _N+1setobs`obs'replace coef = 0 in`obs'replacee = -1 in`obs' *plot only -5 to 5keepifinrange(e,-5,5) *plot the ATT(g,t) for g=2014twoway/// (rcap uci_lower uci_upper e, /// lcolor(red*1.2) msize(0)) /// (rcap ci_lower ci_upper e, /// lcolor(black) msize(0)) /// (scatter coef e, /// msym(O) mcolor(black) /// ), ///legend(off) ///xlabel(-5(1)5, nogrid) ///ylabel(-20(10)20) /// yline(0, lpattern(dash)) ///xline(-1, lpattern(dash)) ///xtitle("Event-Time") ///ytitle("Treatment Effect""Mortality Per 100,000", orientation(horizontal) size(small)) ///text(19.4 3 "Estimate e ∈ {0,5}=`postcoef'", size(small)) ///text(15.5 3 "Std. Error = `postse'""Conf. Int = [`postlci',`postuci']", size(small)) ///xsize(10) ysize(5)
Source Code
---title: "Code for Replication in R and Stata"format: html: page-layout: article code-fold: show code-tools: true toc: true toc-location: left toc-depth: 3 toc_float: collapsed: no smooth_scroll: yes toc-expand: 3 self-contained: true theme: zephyr grid: margin-width: 100px body-width: 1200px---# IntroductionIn this appendix we provide the code to replicate the tables and figures in the article. By pressing the $\triangleright$ code button you can see the R code that creates the relevant figure or table, with a description of the steps involved in computing the analyses.# Table 1: Medicaid Expansion AdoptionThe following code block produces **Table 1**, which shows the roll-out of Medicaid Expansion under the Affordable Care Act (ACA) across states.```{r, include = FALSE}# we need the package that integrates stata before we can run itlibrary(Statamarkdown)library(readxl)library(knitr)library(readr)library(purrr)knitr::opts_chunk$set(engine.opts = list(cleanup = TRUE))```::: panel-tabset### R```{r, message = FALSE, warning = FALSE, error = FALSE, results = 'asis'}## Install packages from Github if needed# load packageslibrary(tidyverse)library(kableExtra)library(modelsummary)library(gt)library(did)library(ggthemes)library(fixest)library(latex2exp)library(HonestDiD)# omit NA from tablesoptions(knitr.kable.NA = '')# set baseline output for modelsummaryoptions(modelsummary_factory_default = 'kableExtra')# set ggplot themetheme_set( theme_clean() + theme(plot.background = element_blank(), legend.background = element_rect(color = "white"), strip.background = element_rect(color = "black")))# load cleaned datamydata <- read_csv(here::here("data", "county_mortality_data.csv")) %>% # make state the abbreviation mutate(state = str_sub(county, nchar(county) - 1, nchar(county))) %>% # drop District of Columbia from the data filter(state != "DC") %>% # make a second adoption variable for the table mutate(adopt = case_when( # missing is non-expansion is.na(yaca) ~ "Non-Expansion", # fix a couple pre-2014 adoptions from Miller, Johnson and Wherry state %in% c("DE", "MA", "NY", "VT") ~ "Pre-2014", TRUE ~ as.character(yaca) ))# get adoption year by stateadopts <- mydata %>% select(state, adopt) %>% distinct() %>% arrange(state)# first get the share of states, share of counties, and share of adults in 2013 by adoption category# first the states and share of statesstates <- adopts %>% group_by(adopt) %>% summarize(states = paste0(state, collapse = ", "), state_share = length(state) / 50)# next get the county share and the population sharecounties_pop <- mydata %>% # just for year 2013 filter(year == 2013) %>% # get county and population totals mutate(total_counties = n(), total_pop = sum(population_20_64, na.rm = TRUE)) %>% group_by(adopt) %>% # make into shares summarize(county_share = n() / mean(total_counties), pop_share = sum(population_20_64, na.rm = TRUE) / mean(total_pop))# make a table and exportstates %>% left_join(counties_pop, by = "adopt") %>% slice(9, 1:8) %>% # format the numbers to two digits mutate(across(state_share:pop_share, ~ scales::number(., accuracy = 0.01))) %>% # format the table kable( col.names = c("Expansion \n Year", "States", "Share of States", "Share of Counties", "Share of Adults (2013)"), booktabs = T, caption = "Medicaid Expansion Under the Affordable Care Act", label = "adoptions", align = c("c"), escape = FALSE, linesep = "" ) %>% kable_styling(latex_options = c("scale_down", "HOLD_position")) %>% column_spec(2, width = "20em")```### Stata```{stata, eval = FALSE}clearcapture log close******************************************************************************* 1. Load original data ****************************************************************************** cd "../../data" insheet using "county_mortality_data.csv", clear keep state county_code county year yaca population_20_64 *Define treatment timing gen treat_year = real(yaca) replace treat_year = 0 if missing(treat_year) tostring treat_year, gen(treat_str) destring yaca, replace force destring population_20_64, replace force * Drop DC as in JEL Table 1 drop if state == "District of Columbia"***************************************************** 2. Create adoption category ***************************************************** gen adopt = "" replace adopt = "Pre-2014" if inlist(state, "Delaware", "Massachusetts", "New York", "Vermont") replace adopt = string(yaca) if !missing(yaca) & adopt == "" replace adopt = "Non-Expansion" if missing(yaca)***************************************************** 3. Get number of states and state share *****************************************************preservegen st_abr = substr(county, max(1, strlen(county)-1), 2)keep st_abr adoptduplicates dropsort adopt st_abrby adopt: gen strL state_list = st_abrby adopt: replace state_list = state_list[_n-1] + ", " + st_abr if adopt[_n-1] == adopt[_n]bysort adopt: gen state_count=_Ngen state_share = state_count / 50gen len= strlen(state_list)bysort adopt: egen max=max(len)keep if len==max keep adopt state_list state_sharetempfile state_summarysave `state_summary', replacerestore***************************************************** 4. Get county & adult pop share (year = 2013) ***************************************************** preserve keep if year == 2013 * Total county count and total 20–64 pop in 2013 su population_20_64 scalar total_pop = r(sum) su county_code scalar total_counties = r(N) collapse (count) county_count=county_code (sum) pop=population_20_64, by(adopt) gen county_share = county_count / total_counties gen pop_share = pop / total_pop tempfile county_summary save `county_summary' restore***************************************************** 5. Merge and order categories for display ***************************************************** use `state_summary', clear merge 1:1 adopt using `county_summary', nogen * Order like JEL Table 1 gen order = . replace order = 1 if adopt == "Pre-2014" replace order = 2 if adopt == "2014" replace order = 3 if adopt == "2015" replace order = 4 if adopt == "2016" replace order = 5 if adopt == "2019" replace order = 6 if adopt == "2020" replace order = 7 if adopt == "2021" replace order = 8 if adopt == "2023" replace order = 9 if adopt == "Non-Expansion" sort order * Round to match JEL formatting gen state_share_fmt = string(state_share, "%4.2f") gen county_share_fmt = string(county_share, "%4.2f") gen pop_share_fmt = string(pop_share, "%4.2f")***************************************************** 6. Export table *****************************************************export excel adopt state_list state_share_fmt county_share_fmt pop_share_fmt /// using "../data/adoptions_table.xlsx", firstrow(variables) replacerestore ```:::# $2\times2$ DiD setupsThe following tables and figures show our analyses within the $2\times2$ section of the paper.## Table 2: Simple $2\times2$ DiD**Table 2** shows how we can calculate the simple $2\times2$ DiD comparing the change in the crude mortality rates between states that chose to expand Medicaid in 2014 and those that had not yet expanded by 2019.::: panel-tabset### R```{r, message = FALSE, warning = FALSE, error = FALSE}# set seed for reproducibilityset.seed(20240924)# set filepath locations for tables and figuresfile.loc.tab <- here::here("tables/")file.loc.fig <- here::here("figures/")# load cleaned datamydata <- read_csv(here::here("data", "county_mortality_data.csv")) %>% # make state the abbreviation mutate(state = str_sub(county, nchar(county) - 1, nchar(county))) %>% # drop DC and pre-2014 adoption states filter(!(state %in% c("DC", "DE", "MA", "NY", "VT"))) %>% # drop states that adopt between 2014 and 2019 filter(yaca == 2014 | is.na(yaca) | yaca > 2019)# set the covariates that we're going to use.covs <- c("perc_female","perc_white", "perc_hispanic", "unemp_rate", "poverty_rate", "median_income")## Clean data and add in covariatesmydata <- mydata %>% # make variables mutate(perc_white = population_20_64_white / population_20_64 * 100, perc_hispanic = population_20_64_hispanic / population_20_64 * 100, perc_female = population_20_64_female/ population_20_64 * 100, unemp_rate = unemp_rate * 100, median_income = median_income / 1000) %>% # keep just subset of variables that we will use later select(state, county, county_code, year, population_20_64, yaca, starts_with("perc_"), crude_rate_20_64, all_of(covs))# keep only counties with full observations for outcome and covariates in 2013 and 2014mydata <- mydata %>% # allow the aca expansion variable to be missing drop_na(!yaca) %>% group_by(county_code) %>% # need full covariates for 2013 and 2014 filter(length(which(year == 2013 | year == 2014)) == 2) %>% ungroup()# finally, keep only counties with full mortality data for 2009 to 2019mydata <- mydata %>% group_by(county_code) %>% drop_na(crude_rate_20_64) %>% filter(n() == 11)# make a smaller dataset with only years 2013 and 2014short_data <- mydata %>% # make a binary variable that identifies ACA expansion and post-years mutate(Treat = if_else(yaca == 2014 & !is.na(yaca), 1, 0), Post = if_else(year == 2014, 1, 0)) %>% # filter years for just 2013 and 2014 filter(year %in% c(2013, 2014)) %>% # make a variable with population weight in 2013 group_by(county_code) %>% mutate(set_wt = population_20_64[which(year == 2013)]) %>% ungroup()# get group means for Table 2# unweighted expansion pre-reformT_pre <- short_data %>% filter(Treat == 1 & year == 2013) %>% summarize(mean = mean(crude_rate_20_64)) %>% pull(mean)# unweighted non-expansion pre-form C_pre <- short_data %>% filter(Treat == 0 & year == 2013) %>% summarize(mean = mean(crude_rate_20_64)) %>% pull(mean)# unweighted expansion post-reformT_Post <- short_data %>% filter(Treat == 1 & year == 2014) %>% summarize(mean = mean(crude_rate_20_64)) %>% pull(mean)# unweighted non-expansion post-reformC_Post <- short_data %>% filter(Treat == 0 & year == 2014) %>% summarize(mean = mean(crude_rate_20_64)) %>% pull(mean)# get the same group means but this time weightedT_pre_weight <- short_data %>% filter(Treat == 1 & year == 2013) %>% summarize(mean = weighted.mean(crude_rate_20_64, w = set_wt)) %>% pull(mean)C_pre_weight <- short_data %>% filter(Treat == 0 & year == 2013) %>% summarize(mean = weighted.mean(crude_rate_20_64, w = set_wt)) %>% pull(mean)T_Post_weight <- short_data %>% filter(Treat == 1 & year == 2014) %>% summarize(mean = weighted.mean(crude_rate_20_64, w = set_wt)) %>% pull(mean)C_Post_weight <- short_data %>% filter(Treat == 0 & year == 2014) %>% summarize(mean = weighted.mean(crude_rate_20_64, w = set_wt)) %>% pull(mean)# function to do a reasonable roundingg_round <- function(x, k) {format(round(x, k), nsmall = k)}# make table and exporttribble( ~"", ~"Expansion", ~ "No Expansion", ~"Gap/DiD", ~"Expansion", ~"No Expansion", ~"Gap/DiD", "2013", as.character(g_round(T_pre, 1)), as.character(g_round(C_pre, 1)), as.character(g_round(T_pre - C_pre, 1)), as.character(g_round(T_pre_weight, 1)), as.character(g_round(C_pre_weight, 1)), as.character(g_round(T_pre_weight - C_pre_weight, 1)), "2014", as.character(g_round(T_Post, 1)), as.character(g_round(C_Post, 1)), as.character(g_round(T_Post - C_Post, 1)), as.character(g_round(T_Post_weight, 1)), as.character(g_round(C_Post_weight, 1)), as.character(g_round(T_Post_weight - C_Post_weight, 1)), "Trend/DiD", as.character(g_round(T_Post - T_pre, 1)), as.character(g_round(C_Post-C_pre, 1)), as.character(g_round((T_Post - T_pre) - (C_Post-C_pre), 1)), as.character(g_round(T_Post_weight - T_pre_weight, 1)), as.character(g_round(C_Post_weight-C_pre_weight, 1)), as.character(g_round((T_Post_weight - T_pre_weight) - (C_Post_weight-C_pre_weight), 1))) %>% # format table kable(align = 'c', booktabs = T, escape = F, caption = "Simple 2 X 2 DiD") %>% kable_styling() %>% row_spec(3, color = "BrickRed") %>% column_spec(c(1:3, 5:6), color = "black") %>% row_spec(3, italic = TRUE) %>% column_spec(c(4, 7), italic = TRUE) %>% add_header_above(c(" " = 1, "Unweighted Averages" = 3, "Weighted Averages" = 3))```### Stata```{stata, eval = FALSE}********************************* 1. Load and prepare data******************************** clear all set more off capture log close insheet using "county_mortality_data.csv", clear********************************* 2. Prepare timing groups******************************** *Drop early and mid adopters drop if inlist(state, "District of Columbia", "Delaware", "Massachusetts", "New York", "Vermont") *Define treatment timing gen treat_year = real(yaca) replace treat_year = 0 if missing(treat_year) tostring treat_year, gen(treat_str) destring yaca, replace force********************************* 3. Prepare covariates******************************** *destring covariates destring deaths population_20_64 crude_rate_20_64 population_total population_20_64_hispanic population_20_64_female population_20_64_white unemployed labor_force unemp_rate poverty_rate median_income, replace force ren population_total total_population *Drop counties with missing covariates gen perc_white = population_20_64_white / population_20_64 * 100 gen perc_hispanic = population_20_64_hispanic/ population_20_64 * 100 gen perc_female = population_20_64_female / population_20_64 * 100 gen unemp_rate_pc = unemp_rate * 100 gen median_income_k = median_income/1000 drop if missing(crude_rate_20_64, population_20_64, /// perc_white, perc_hispanic, perc_female, /// unemp_rate_pc, poverty_rate, median_income_k)********************************* 4. Sample construction******************************** *Keep counties with full 11-year panel bys county_code (year): gen panel_n = _N bys county_code: keep if panel_n == 11 drop panel_n replace treat_year=0 if treat_year>2019********************************* 5. Construct population weight******************************** bys county_code: egen set_wt = max(cond(year==2013, population_20_64, .))********************************* 6. Label variables******************************** label variable crude_rate_20_64 "Crude Mortality Rate"save "did_jel_aca_replication_data", replace****************************************************************1. Table 2: weighted and unweighted averages for the simple 2x2 DiD*************************************************************** *Read in data, keep 2013–2014 and 2014 expansion vs "non expansion" states use "did_jel_aca_replication_data", clear keep if inlist(year, 2013, 2014) & inlist(yaca,2014,2020,2021,2023,.) gen Treat = (yaca==2014) // 1 = ACA in 2014 gen Post = (year == 2014) // 1 = post-expansion year *Means *unweighted sum crude_rate if Treat==1 & year==2014 global u12014 = r(mean) sum crude_rate if Treat==1 & year==2013 global u12013 = r(mean) sum crude_rate if Treat==0 & year==2014 global u02014 = r(mean) sum crude_rate if Treat==0 & year==2013 global u02013 = r(mean) *weighted sum crude_rate if Treat==1 & year==2014 [aw=set_wt] global w12014 = r(mean) sum crude_rate if Treat==1 & year==2013 [aw=set_wt] global w12013 = r(mean) sum crude_rate if Treat==0 & year==2014 [aw=set_wt] global w02014 = r(mean) sum crude_rate if Treat==0 & year==2013 [aw=set_wt] global w02013 = r(mean) *Gaps, trends, DiDs * Unweighted global Gap13_u = ($u12013 - $u02013) global Gap14_u = ($u12014 - $u02014) global DiD_u = ( ($u12014 - $u12013) - ($u02014 - $u02013) ) global Trend_T_u = ($u12014 - $u12013) global Trend_C_u = ($u02014 - $u02013) * Weighted global Gap13_w = ($w12013 - $w02013) global Gap14_w = ($w12014 - $w02014) global DiD_w = ( $w12014 - $w12013 ) - ( $w02014 - $w02013 ) global Trend_T_w = ($w12014 - $w12013) global Trend_C_w = ($w02014 - $w02013) *Rounding for the table foreach nm in u12013 u02013 u12014 u02014 /// w12013 w02013 w12014 w02014 /// Gap13_u Gap14_u DiD_u Trend_T_u Trend_C_u /// Gap13_w Gap14_w DiD_w Trend_T_w Trend_C_w { local `nm' : display %6.1f ${`nm'} display("`nm'") } display("`u12013'") display "$u12013" * Brick-red DiD cells local DiD_u_cell "\em{\em{\textcolor{BrickRed}{`DiD_u'}}}" local DiD_w_cell "\em{\em{\textcolor{BrickRed}{`DiD_w'}}}"/*-------------------------------------------------------------------- 8. Export Excel Table for Rendering--------------------------------------------------------------------*/clearset obs 3// row labelsgen str10 row = ""replace row = "2013" in 1replace row = "2014" in 2replace row = "Trend/DiD" in 3// columns (numeric); convert locals like `u12013' to numbers with real()gen double Expansion_unw = .gen double NoExpansion_unw = .gen double Gap_DiD_unw = .gen double Expansion_wt = .gen double NoExpansion_wt = .gen double Gap_DiD_wt = .replace Expansion_unw = real("`u12013'") in 1replace Expansion_unw = real("`u12014'") in 2replace Expansion_unw = real("`Trend_T_u'") in 3replace NoExpansion_unw = real("`u02013'") in 1replace NoExpansion_unw = real("`u02014'") in 2replace NoExpansion_unw = real("`Trend_C_u'") in 3replace Gap_DiD_unw = real("`Gap13_u'") in 1replace Gap_DiD_unw = real("`Gap14_u'") in 2replace Gap_DiD_unw = real("`DiD_u'") in 3replace Expansion_wt = real("`w12013'") in 1replace Expansion_wt = real("`w12014'") in 2replace Expansion_wt = real("`Trend_T_w'") in 3replace NoExpansion_wt = real("`w02013'") in 1replace NoExpansion_wt = real("`w02014'") in 2replace NoExpansion_wt = real("`Trend_C_w'") in 3replace Gap_DiD_wt = real("`Gap13_w'") in 1replace Gap_DiD_wt = real("`Gap14_w'") in 2replace Gap_DiD_wt = real("`DiD_w'") in 3format Expansion_unw NoExpansion_unw Gap_DiD_unw /// Expansion_wt NoExpansion_wt Gap_DiD_wt %9.1fexport excel using "../data/two_by_two_table.xlsx", firstrow(variables) replace```:::## Table 3: Regression DiD**Table 3** shows how we can recover the same $2\times2$ estimate using one of three regression specifications:1\) Regressing the crude mortality rate on indicators for treatment (Medicaid expansion states), post-treatment periods (2014), and the interaction of the two.2\) Regressing the crude mortality rate on the interaction of indicators for Medicaid expansion and post-expansion periods, as well as county and year fixed effects.3\) Regressing the difference between 2014 and 2013 county-level crude mortality rates on an indicator for whether the county is in a state that expanded Medicaid.::: panel-tabset### R```{r, message = FALSE, warning = FALSE, error = FALSE}# show that you can get the same estimates with regression# make a different short data with long differences in mortality by county and treatment indicatorsshort_data2 <- short_data %>% group_by(county_code) %>% summarize(state = state[1], set_wt = mean(set_wt), # long difference between 2014 and 2013 rates diff = crude_rate_20_64[which(year == 2014)] - crude_rate_20_64[which(year == 2013)], Treat = mean(Treat), Post = 1)# estimate three models without weights. These are Treat*Post with no fixed effects,# fixed effects + Treat:Post, and then the long difference model with no fixed effects.mod1 <- feols(crude_rate_20_64 ~ Treat*Post, data = short_data, cluster = ~county_code)mod2 <- feols(crude_rate_20_64 ~ Treat:Post | county_code + year, data = short_data, cluster = ~county_code)mod3 <- feols(diff ~ Treat:Post, data = short_data2, cluster = ~county_code)# estimate the same three three models with weightsmod4 <- feols(crude_rate_20_64 ~ Treat*Post, data = short_data, weights = ~set_wt, cluster = ~county_code)mod5 <- feols(crude_rate_20_64 ~ Treat:Post | county_code + year, data = short_data, weights = ~set_wt, cluster = ~county_code)mod6 <- feols(diff ~ Treat:Post, data = short_data2, weights = ~set_wt, cluster = ~county_code)# this dictionary maps names to labels for the tabledict <- c("(Intercept)" = "Constant", "Treat" = "Medicaid Expansion", "Post" = "Post", "Treat:Post" = "Medicaid Expansion X Post")# rows to add to tablerows = tribble( ~x1, ~x2, ~x3, ~x4, ~x5, ~x6, ~x7, "County fixed effects", "No", "Yes", "No", "No", "Yes", "No", "Year fixed effects", "No", "Yes", "No", "No", "Yes", "No")# export table 3modelsummary(list("(1)" = mod1, "(2)" = mod2, "(3)" = mod3, "(4)" = mod4, "(5)" = mod5, "(6)" = mod6), coef_map = dict, escape = FALSE, gof_map = NA, fmt = 1, stars = FALSE, estimate = "{estimate}", add_rows = rows, title = "Regression 2 X 2 DiD") %>% add_header_above(c(" " = 1, "Crude Morality Rate" = 2, "\\(\\Delta\\)" = 1, "Crude Mortality Rate" = 2, "\\(\\Delta\\)" = 1), escape = FALSE)```### Stata```{stata, eval = FALSE}****************************************************************2. Table 3: regression 2x2 DiD with balanced panel, no controls*************************************************************** *Read in data, keep 2013–2014 and 2014 expansion vs "non expansion" states use "did_jel_aca_replication_data", clear keep if inlist(year, 2013, 2014) & inlist(yaca,2014,2020,2021,2023,.) gen Treat = (yaca==2014) // 1 = ACA in 2014 gen Post = (year == 2014) // 1 = post-expansion year *Panel declaration (once) xtset county_code year *UNWEIGHTED regressions eststo clear * (1) Treat × Post, no FE reg crude_rate_20_64 c.Treat##c.Post, vce(cluster county_code) eststo m1 estadd local countyfe "No" estadd local yearfe "No" * (2) Treat × Post with county & year FE (xtreg, fe → no constant) xtreg crude_rate_20_64 c.Treat#c.Post i.year, fe vce(cluster county_code) eststo m2 estadd local countyfe "Yes" estadd local yearfe "Yes"* (3) Long-difference, unweightedpreserve keep county_code year crude_rate_20_64 Treat reshape wide crude_rate_20_64, i(county_code) j(year) gen diff = crude_rate_20_642014 - crude_rate_20_642013 label variable diff "\$\\Delta\$" reg diff Treat, vce(cluster county_code) // rename Treat -> c.Treat#c.Post and re-post BEFORE eststo tempname b V matrix `b' = e(b) matrix `V' = e(V) local N = e(N) local df = e(df_r) tempvar touse gen byte `touse' = e(sample) local cn : colnames `b' local newcn foreach nm of local cn { if ("`nm'" == "Treat") local newcn `newcn' c.Treat#c.Post else local newcn `newcn' `nm' } matrix colnames `b' = `newcn' matrix colnames `V' = `newcn' matrix rownames `V' = `newcn' ereturn post `b' `V', depname(diff) esample(`touse') obs(`N') dof(`df') eststo m3 estadd local countyfe "No" estadd local yearfe "No"restore *WEIGHTED regressions * (4) Weighted, no FE reg crude_rate_20_64 c.Treat##c.Post [aw=set_wt], vce(cluster county_code) eststo m4 estadd local countyfe "No" estadd local yearfe "No" * (5) Weighted, county & year FE (areg keeps _cons; we'll hide it) xtreg crude_rate_20_64 c.Treat#c.Post i.year [aw=set_wt], /// fe vce(cluster county_code) eststo m5 estadd local countyfe "Yes" estadd local yearfe "Yes"* (6) Weighted long-differencepreserve keep county_code year crude_rate_20_64 Treat set_wt reshape wide crude_rate_20_64, i(county_code) j(year) gen diff = crude_rate_20_642014 - crude_rate_20_642013 reg diff Treat [aw=set_wt], vce(cluster county_code) // rename Treat -> c.Treat#c.Post and re-post BEFORE eststo tempname b V matrix `b' = e(b) matrix `V' = e(V) local N = e(N) local df = e(df_r) tempvar touse gen byte `touse' = e(sample) local cn : colnames `b' local newcn foreach nm of local cn { if ("`nm'" == "Treat") local newcn `newcn' c.Treat#c.Post else local newcn `newcn' `nm' } matrix colnames `b' = `newcn' matrix colnames `V' = `newcn' matrix rownames `V' = `newcn' ereturn post `b' `V', depname(diff) esample(`touse') obs(`N') dof(`df') eststo m6 estadd local countyfe "No" estadd local yearfe "No"restore *Build the LaTeX table local labels /// _cons "Constant" /// Treat "Medicaid Expansion" /// Post "Post" /// c.Treat#c.Post "Medicaid Expansion × Post" * Add Treat as the interaction label in long-diff specs (m3 and m6) estimates restore m3 estadd local intlabel "Medicaid Expansion × Post" estimates restore m6 estadd local intlabel "Medicaid Expansion × Post"esttab m1 m2 m3 m4 m5 m6 using "../data/regdid_2x2.csv", replace keep(_cons Treat Post c.Treat#c.Post) order(_cons Treat Post c.Treat#c.Post) varlabels(_cons "Constant" Treat "Medicaid Expansion" Post "Post" c.Treat#c.Post "Medicaid Expansion × Post") cells(b(fmt(%9.1f) ) se(fmt(%9.1f) )) collabels(none) stats( countyfe yearfe, fmt(%9.0g %9s %9s) labels("County fixed effects" "Year fixed effects")) nomtitles nonotes noobs numbers plain csv ```:::## Table 4: Covariate BalanceIn **Table 4** we show how to check for covariate balance across the covariates, using both 2013 levels and the changes between 2014 and 2013. We include a measure of the normalized difference as well as the (unweighted and weighted) sample means.::: panel-tabset### R```{r, message = FALSE, warning = FALSE, error = FALSE}# Now, make the covariate balance table (Table 4).# unweighted - preunweighted <- short_data %>% filter(year == 2013) %>% select(Treat, all_of(covs)) %>% group_by(Treat) %>% # get mean and standard deviation summarize_all(list(mean, var)) %>% # pivot the data longer pivot_longer(cols = !Treat, names_to = "variable", values_to = "value") %>% # now make separate columns for treated and untreated pivot_wider(names_from = "Treat", values_from = "value", names_prefix = "group") %>% # separate mean and standard deviations extract(variable, into = c("variable", "fx"), "(.*)_(.*)") %>% pivot_wider(id_cols = variable, names_from = fx, values_from = c(group0, group1)) %>% # make normalized difference mutate(norm_diff = (group1_fn1 - group0_fn1)/sqrt((group1_fn2 + group0_fn2)/2)) %>% select(variable, group0_fn1, group1_fn1, norm_diff)# make a weighted variance functionwtd.var <- function (x, weights = NULL, normwt = FALSE, na.rm = TRUE, method = c("unbiased", "ML")) { method <- match.arg(method) if (!length(weights)) { if (na.rm) x <- x[!is.na(x)] return(var(x)) } if (na.rm) { s <- !is.na(x + weights) x <- x[s] weights <- weights[s] } if (normwt) weights <- weights * length(x)/sum(weights) if (normwt || method == "ML") return(as.numeric(stats::cov.wt(cbind(x), weights, method = method)$cov)) sw <- sum(weights) if (sw <= 1) warning("only one effective observation; variance estimate undefined") xbar <- sum(weights * x)/sw sum(weights * ((x - xbar)^2))/(sw - 1)}# weighted - preweighted <- short_data %>% filter(year == 2013) %>% select(Treat, all_of(covs), set_wt) %>% group_by(Treat) %>% # get mean and standard deviation summarize(across(all_of(covs), list( ~weighted.mean(x = ., w = set_wt), ~wtd.var(x = ., weights = set_wt, normwt = TRUE)))) %>% # pivot the data longer pivot_longer(cols = !Treat, names_to = "variable", values_to = "value") %>% # now make separate columns for treated and untreated pivot_wider(names_from = "Treat", values_from = "value", names_prefix = "group") %>% # separate mean and standard deviations extract(variable, into = c("variable", "fx"), "(.*)_(.*)") %>% pivot_wider(id_cols = variable, names_from = fx, values_from = c(group0, group1)) %>% # make normalized difference mutate(norm_diff = (group1_1 - group0_1)/sqrt((group1_2 + group0_2)/2)) %>% select(variable, group0_1, group1_1, norm_diff)# make the top panel (weighted and unweighted Pre)top_panel <- bind_cols(unweighted, weighted %>% select(-variable))# make the bottom panel, which is the same thing but with the difference in X between 2014 and 2013.# unweightedunweighted <- short_data %>% select(county_code, year, Treat, all_of(covs)) %>% arrange(county_code, year) %>% group_by(county_code, Treat) %>% summarize( across(all_of(covs), function(x) x[2] - x[1]) ) %>% ungroup() %>% select(-county_code) %>% group_by(Treat) %>% # get mean and standard deviation summarize_all(list(mean, var)) %>% # pivot the data longer pivot_longer(cols = !Treat, names_to = "variable", values_to = "value") %>% # now make separate columns for treated and untreated pivot_wider(names_from = "Treat", values_from = "value", names_prefix = "group") %>% # separate mean and standard deviations extract(variable, into = c("variable", "fx"), "(.*)_(.*)") %>% pivot_wider(id_cols = variable, names_from = fx, values_from = c(group0, group1)) %>% # make normalized difference mutate(norm_diff = (group1_fn1 - group0_fn1)/sqrt((group1_fn2 + group0_fn2)/2)) %>% select(variable, group0_fn1, group1_fn1, norm_diff)# weightedweighted <- short_data %>% select(county_code, year, Treat, all_of(covs)) %>% arrange(county_code, year) %>% group_by(county_code, Treat) %>% summarize( across(all_of(covs), function(x) x[2] - x[1]) ) %>% ungroup() %>% left_join(short_data %>% filter(year == 2013) %>% select(county_code, set_wt), join_by(county_code)) %>% select(-county_code) %>% group_by(Treat) %>% # get mean and standard deviation summarize(across(all_of(covs), list( ~weighted.mean(x = ., w = set_wt), ~wtd.var(x = ., weights = set_wt, normwt = TRUE)))) %>% # pivot the data longer pivot_longer(cols = !Treat, names_to = "variable", values_to = "value") %>% # now make separate columns for treated and untreated pivot_wider(names_from = "Treat", values_from = "value", names_prefix = "group") %>% # separate mean and standard deviations extract(variable, into = c("variable", "fx"), "(.*)_(.*)") %>% pivot_wider(id_cols = variable, names_from = fx, values_from = c(group0, group1)) %>% # make normalized difference mutate(norm_diff = (group1_1 - group0_1)/sqrt((group1_2 + group0_2)/2)) %>% select(variable, group0_1, group1_1, norm_diff)# make the bottom panelbottom_panel <- bind_cols(unweighted, weighted %>% select(-variable))# bind the two panelstable <- bind_rows(top_panel, bottom_panel) %>% # reformat all columns to two digits mutate(across(-variable, \(x) scales::comma(x, accuracy = 0.01)))# format Table 4 and exporttable %>% # change column names mutate(variable = case_match(variable, "perc_female" ~ "% Female", "perc_white" ~ "% White", "perc_hispanic" ~ "% Hispanic", "unemp_rate" ~ "Unemployment Rate", "poverty_rate" ~ "Poverty Rate", "median_income" ~ "Median Income")) %>% # format latex table kable(col.names = c("Variable", "Non-Adopt", "Adopt", "Norm. Diff.", "Non-Adopt", "Adopt", "Norm. Diff."), align = 'lcccccc', escape = T, booktabs = T, label = "cov_balance", caption = "Covariate Balance Statistics", linesep = "") %>% kable_styling(latex_options = c("scale_down", "hold_position")) %>% pack_rows("2013 Covariate Levels", 1, 6) %>% pack_rows("2014 - 2013 Covariate Differences", 7, 12) %>% add_header_above(c(" " = 1, "Unweighted" = 3, "Weighted" = 3))```### Stata```{stata, eval = FALSE}****************************************************************3. Table 4: Covariate-balance table* NB:the table construction is a lot of lines here...*************************************************************** *Read in data, keep 2013–2014 and 2014 expansion vs "non expansion" states use "did_jel_aca_replication_data", clear keep if inlist(year, 2013, 2014) & inlist(yaca,2014,2020,2021,2023,.) gen Treat = (yaca==2014) // 1 = ACA in 2014 gen Post = (year == 2014) // 1 = post-expansion year *local with covariate names local covs perc_female perc_white perc_hispanic /// unemp_rate_pc poverty_rate median_income_k *2013 levels — unweighted means, SDs, and normalized diff preserve keep if year == 2013 *---- means and SDs in a single collapse ----------------------- collapse (mean) m_perc_female = perc_female /// m_perc_white = perc_white /// m_perc_hispanic = perc_hispanic /// m_unemp_rate_pc = unemp_rate_pc /// m_poverty_rate = poverty_rate /// m_median_income_k = median_income_k /// (sd) sd_perc_female = perc_female /// sd_perc_white = perc_white /// sd_perc_hispanic = perc_hispanic /// sd_unemp_rate_pc = unemp_rate_pc /// sd_poverty_rate = poverty_rate /// sd_median_income_k = median_income_k, /// by(Treat) generate byte id = 1 // same value for both rows (Treat 0/1) *---- reshape into one wide row ------------------------------------- reshape wide m* sd_*, i(id) j(Treat) drop id // no longer needed *---- normalized differences ---------------------------------- foreach x in `covs'{ gen nd_`x' = (m_`x'1 - m_`x'0) / /// sqrt( (sd_`x'1^2 + sd_`x'0^2) / 2 ) } keep m* nd_* // single-row dataset tempfile unwt_pre save `unwt_pre' list restore *2013 levels — weighted means, SDs, and normalized diff preserve keep if year == 2013 *---- means and SDs in a single collapse ----------------------- collapse (mean) wm_perc_female = perc_female /// wm_perc_white = perc_white /// wm_perc_hispanic = perc_hispanic /// wm_unemp_rate_pc = unemp_rate_pc /// wm_poverty_rate = poverty_rate /// wm_median_income_k = median_income_k /// (sd) sd_perc_female = perc_female /// sd_perc_white = perc_white /// sd_perc_hispanic = perc_hispanic /// sd_unemp_rate_pc = unemp_rate_pc /// sd_poverty_rate = poverty_rate /// sd_median_income_k = median_income_k /// [aw = set_wt], /// by(Treat) generate byte id = 1 // same value for both rows (Treat 0/1) *---- reshape into one wide row ------------------------------------- reshape wide wm* sd_*, i(id) j(Treat) drop id // no longer needed *---- normalized differences ---------------------------------- foreach x in `covs'{ gen wnd_`x' = (wm_`x'1 - wm_`x'0) / /// sqrt( (sd_`x'1^2 + sd_`x'0^2) / 2 ) } keep wm* wnd_* // single-row dataset tempfile wt_pre save `wt_pre' list restore * Differences - unweighted means, SDs, and normalized diff preserve *create differences for var `covs': egen bX = total(X*(year==2013)), by(county_code) for var `covs': gen d_X = X-bX keep if year == 2014 *---- means and SDs in a single collapse ----------------------- collapse (mean) m_d_perc_female = d_perc_female /// m_d_perc_white = d_perc_white /// m_d_perc_hispanic = d_perc_hispanic /// m_d_unemp_rate_pc = d_unemp_rate_pc /// m_d_poverty_rate = d_poverty_rate /// m_d_median_income_k = d_median_income_k /// (sd) sd_perc_female = d_perc_female /// sd_perc_white = d_perc_white /// sd_perc_hispanic = d_perc_hispanic /// sd_unemp_rate_pc = d_unemp_rate_pc /// sd_poverty_rate = d_poverty_rate /// sd_median_income_k = d_median_income_k, /// by(Treat) generate byte id = 1 // same value for both rows (Treat 0/1) *---- reshape into one wide row ------------------------------------- reshape wide m* sd_*, i(id) j(Treat) drop id // no longer needed *---- normalized differences ---------------------------------- foreach x in `covs'{ gen nd_d_`x' = (m_d_`x'1 - m_d_`x'0) / /// sqrt( (sd_`x'1^2 + sd_`x'0^2) / 2 ) } keep m_d* nd_d* // single-row dataset tempfile unwt_diff save `unwt_diff' list restore * Differences - weighted means, SDs, and normalized diff preserve *create differences for var `covs': egen bX = total(X*(year==2013)), by(county_code) for var `covs': gen d_X = X-bX keep if year == 2014 *---- means and SDs in a single collapse ----------------------- collapse (mean) wm_d_perc_female = d_perc_female /// wm_d_perc_white = d_perc_white /// wm_d_perc_hispanic = d_perc_hispanic /// wm_d_unemp_rate_pc = d_unemp_rate_pc /// wm_d_poverty_rate = d_poverty_rate /// wm_d_median_income_k = d_median_income_k /// (sd) sd_perc_female = d_perc_female /// sd_perc_white = d_perc_white /// sd_perc_hispanic = d_perc_hispanic /// sd_unemp_rate_pc = d_unemp_rate_pc /// sd_poverty_rate = d_poverty_rate /// sd_median_income_k = d_median_income_k /// [aw = set_wt], /// by(Treat) generate byte id = 1 // same value for both rows (Treat 0/1) *---- reshape into one wide row ------------------------------------- reshape wide wm* sd_*, i(id) j(Treat) drop id // no longer needed *---- normalized differences ---------------------------------- foreach x in `covs'{ gen wnd_d_`x' = (wm_d_`x'1 - wm_d_`x'0) / /// sqrt( (sd_`x'1^2 + sd_`x'0^2) / 2 ) } keep wm* wnd_* // single-row dataset tempfile wt_diff save `wt_diff' list restore **table creation *Assemble the four blocks into one matrix use `unwt_pre', clear merge 1:1 _n using `wt_pre', nogen // adds mw* sw* wnd_* merge 1:1 _n using `unwt_diff', nogen // adds md* nd_d* merge 1:1 _n using `wt_diff', nogen // adds mwd* swd* wnd_d_* de,f * ---- row labels ---------------------------------------------------- local rowlbl "% Female" "% White" "% Hispanic" /// "Unemployment Rate" "Poverty Rate" "Median Income" local outrows local i = 1 foreach v in `covs' { local outrows `"`outrows' `"`:word `i' of `rowlbl''"'"' local ++i } /* ---- build the 12 × 6 matrix ----------------------------------- */ matrix T = J(12,6,.) local r 0 foreach v of local covs { local ++r * rows 1–6 : 2013 levels matrix T[`r',1] = m_`v'0[1] // un-wtd mean, Non-adopt matrix T[`r',2] = m_`v'1[1] // un-wtd mean, Adopt matrix T[`r',3] = nd_`v'[1] // un-wtd norm-diff matrix T[`r',4] = wm_`v'0[1] // wtd mean, Non-adopt matrix T[`r',5] = wm_`v'1[1] // wtd mean, Adopt matrix T[`r',6] = wnd_`v'[1] // wtd norm-diff } foreach v of local covs { local ++r * rows 7–12 : 2014–2013 long-differences matrix T[`r',1] = m_d_`v'0[1] // un-wtd ∆, Non-adopt matrix T[`r',2] = m_d_`v'1[1] // un-wtd ∆, Adopt matrix T[`r',3] = nd_d_`v'[1] // un-wtd norm-diff matrix T[`r',4] = wm_d_`v'0[1] // **underscore added** matrix T[`r',5] = wm_d_`v'1[1] // **underscore added** matrix T[`r',6] = wnd_d_`v'[1] // wtd norm-diff } matrix rownames T = `outrows' `outrows' matrix colnames T = "Non-Adopt" "Adopt" "Norm. Diff." /// "Non-Adopt" "Adopt" "Norm. Diff."*--------------------------------------------------------------------* 5. Export to CSV instead of LaTeX*--------------------------------------------------------------------* Convert matrix T to a datasetclearsvmat T* Reorder so variable names come first* Rename columns to something clearrename T1 NonAdopt_unwtrename T2 Adopt_unwtrename T3 NormDiff_unwtrename T4 NonAdopt_wtrename T5 Adopt_wtrename T6 NormDiff_wt* After svmat/renamesforeach v of varlist NonAdopt_unwt Adopt_unwt NormDiff_unwt /// NonAdopt_wt Adopt_wt NormDiff_wt { replace `v' = round(`v', .01) tostring `v', replace format(%9.2f) force }* Save as CSVexport delimited using "../data/cov_balance.csv", replace```:::## Table 5: Regression 2 $\times$ 2 DiD with CovariatesIn **Table 5** we show how the 2 $\times$ 2 DiD estimates change with the inclusion of covariates using linear regression. You can do this in two ways - either controlling for the 2013 levels of the covariates, or the change in the covariates between 2014 and 2013. Even in this simple setting the results differ considerably.::: panel-tabset### R```{r, message = FALSE, warning = FALSE, error = FALSE}# make a table that includes the model 1) without covariates, 2) long regression with # 2013 covariates values, 3) long regression with *difference* in covariate values.reg_data_2013 <- short_data %>% # make long diff in y group_by(county_code) %>% summarize(long_y = crude_rate_20_64[which(year == 2014)] - crude_rate_20_64[which(year == 2013)]) %>% # merge in 2013 covariates left_join(short_data %>% filter(year == 2013) %>% select(county_code, state, Treat, set_wt, all_of(covs)), by = "county_code")# this dataset has the change in X between 2014 and 2013 as controlsreg_data_change <- short_data %>% # make long diff in y group_by(county_code) %>% summarize(long_y = crude_rate_20_64[which(year == 2014)] - crude_rate_20_64[which(year == 2013)]) %>% # merge in change in covariate values left_join(short_data %>% group_by(county_code) %>% mutate(set_wt = set_wt[which(year == 2013)]) %>% group_by(county_code, state, Treat, set_wt) %>% summarize( across(all_of(covs), function(x) x[which(year == 2014)] - x[which(year == 2013)]) ), by = "county_code")# run the six models# first unweighted - long diff no covs, 2013 covs, then change in covsmod1 <- feols(long_y ~ Treat, data = reg_data_2013, cluster = ~county_code)mod2 <- feols(long_y ~ Treat + .[covs], data = reg_data_2013, cluster = ~county_code)mod3 <- feols(long_y ~ Treat + .[covs], data = reg_data_change, cluster = ~county_code)# same thing but weightedmod4 <- feols(long_y ~ Treat, data = reg_data_2013, weights = ~set_wt, cluster = ~county_code)mod5 <- feols(long_y ~ Treat + .[covs], data = reg_data_2013, weights = ~set_wt, cluster = ~county_code)mod6 <- feols(long_y ~ Treat + .[covs], data = reg_data_change, weights = ~set_wt, cluster = ~county_code)# export table 3modelsummary(list("(1)" = mod1, "(2)" = mod2, "(3)" = mod3, "(4)" = mod4, "(5)" = mod5, "(6)" = mod6), coef_map = dict, coef_omit = "(Intercept)", escape = FALSE, gof_map = NA, fmt = 2, estimate = "{estimate}", title = "Regression 2 X 2 DiD with Covariates") %>% add_header_above(c(" " = 1, "No Covs" = 1, "$$X_{i, t = 2013}$$" = 1, "$$X_{i, t}$$" = 1, "No Covs" = 1, "$$X_{i, t = 2013}$$" = 1, "$$X_{i, t}$$" = 1), escape = FALSE, extra_css = 'vertical-align: middle !important;', line = F) %>% add_header_above(c(" " = 1, "Unweighted" = 3, "Weighted" = 3), escape = FALSE)```### Stata```{stata, eval = FALSE}****************************************************************4. Table 5: Regression 2 x 2 DiD with Covariates *************************************************************** *read data in again and restrict to 2013/2014 use "did_jel_aca_replication_data", clear keep if inlist(year, 2013, 2014) & inlist(yaca,2014,2020,2021,2023,.) gen Treat = (yaca==2014) // 1 = ACA in 2014 gen Post = (year == 2014) // 1 = post-expansion year *local with covariate names local covs perc_female perc_white perc_hispanic /// unemp_rate_pc poverty_rate median_income_k * Keep only relevant variables keep county_code year Treat set_wt crude_rate_20_64 `covs' * Reshape everything (outcome + covariates) at once reshape wide crude_rate_20_64 `covs', i(county_code) j(year) * Generate change in covariates foreach v of local covs { gen d_`v' = `v'2014 - `v'2013 } * Generate outcome: change in crude mortality gen long_y = crude_rate_20_642014 - crude_rate_20_642013 * Drop missing outcome drop if missing(long_y) eststo clear * (1) Unweighted: No covariates reg long_y Treat, vce(cluster county_code) eststo m1 * (2) Unweighted: 2013 covariates reg long_y Treat /// perc_female2013 perc_white2013 perc_hispanic2013 /// unemp_rate_pc2013 poverty_rate2013 median_income_k2013, /// vce(cluster county_code) eststo m2 * (3) Unweighted: Changes in covariates reg long_y Treat /// d_perc_female d_perc_white d_perc_hispanic /// d_unemp_rate_pc d_poverty_rate d_median_income_k, /// vce(cluster county_code) eststo m3 * (4) Weighted: No covariates reg long_y Treat [aw=set_wt], vce(cluster county_code) eststo m4 * (5) Weighted: 2013 covariates reg long_y Treat /// perc_female2013 perc_white2013 perc_hispanic2013 /// unemp_rate_pc2013 poverty_rate2013 median_income_k2013 /// [aw=set_wt], vce(cluster county_code) eststo m5 * (6) Weighted: Changes in covariates reg long_y Treat /// d_perc_female d_perc_white d_perc_hispanic /// d_unemp_rate_pc d_poverty_rate d_median_income_k /// [aw=set_wt], vce(cluster county_code) eststo m6esttab m1 m2 m3 m4 m5 m6 using "../data/regdid_2x2_covs.csv", replace keep(Treat) b(%6.2f) se(%6.2f) nostar fragment plain csv label nomtitles alignment(c) collabels(none) ```:::## Table 6: Outcome Regression and Propensity Score ModelsWe next show how to adjust for covariates in the 2 $\times$ 2 setting in a disciplined manner. **Table 6** reports the outcome regression and propensity score model results that feed into the doubly-robust estimator from Callaway and Sant'Anna (2021).::: panel-tabset### R```{r, message = FALSE, warning = FALSE, error = FALSE}## Next we make Table 6 which shows the pscore and outcome models that feed into CS.# These are done by regressing long y on the covariates for the untreated units (the outcome model)# and regressing the expansion indicator on the covariates for 2013 data (the propensity model) # We do it with and without weightsmod1 <- feols(long_y ~ .[covs], data = reg_data_2013 %>% filter(Treat == 0), cluster = ~county_code)mod2 <- feglm(Treat ~ .[covs], data = short_data %>% filter(year == 2013), family = "binomial", vcov = "hetero")mod3 <- feols(long_y ~ .[covs], data = reg_data_2013 %>% filter(Treat == 0), cluster = ~county_code, weights = ~set_wt)mod4 <- feglm(Treat ~ .[covs], data = short_data %>% filter(year == 2013), family = "binomial", vcov = "hetero", weights = ~set_wt)# This is the dictionary to map variables to data labels for the tabledict <- c("(Intercept)" = "Constant", "perc_female" = "% Female", "perc_white" = "% White", "perc_hispanic" = "% Hispanic", "crude_rate_20_64" = "Crude Mortality Rate", "unemp_rate" = "Unemployment Rate", "poverty_rate" = "Poverty Rate", "median_income" = "Median Income")# export table 3modelsummary(list("(1)" = mod1, "(2)" = mod2, "(3)" = mod3, "(4)" = mod4), coef_map = dict, escape = FALSE, gof_map = NA, fmt = 2, estimate = "{estimate}", title = "Outcome Regression and Propensity Score Models") %>% add_header_above(c(" " = 1, "Regression" = 1, "Propensity Score" = 1, "Regression" = 1, "Propensity Score" = 1), escape = FALSE, extra_css = 'vertical-align: middle !important;', line = F) %>% add_header_above(c(" " = 1, "Unweighted" = 2, "Weighted" = 2), escape = FALSE)```### Stata```{stata, eval = FALSE}**************************************************************** Table 6: Outcome Regressions + Propensity Score Models*************************************************************** *read data in again and restrict to 2013/2014 use "did_jel_aca_replication_data", clear keep if inlist(year, 2013, 2014) & inlist(yaca,2014,2020,2021,2023,.) gen Treat = (yaca==2014) // 1 = ACA in 2014 gen Post = (year == 2014) // 1 = post-expansion year *local with covariate names local covs perc_female perc_white perc_hispanic /// unemp_rate_pc poverty_rate median_income_k * Keep only relevant variables keep county_code year Treat set_wt crude_rate_20_64 `covs' * Reshape everything (outcome + covariates) at once reshape wide crude_rate_20_64 `covs', i(county_code) j(year) * Generate change in covariates foreach v of local covs { gen d_`v' = `v'2014 - `v'2013 } * Generate outcome: change in crude mortality gen long_y = crude_rate_20_642014 - crude_rate_20_642013 * Drop missing outcome drop if missing(long_y) eststo clear ** (1) Outcome Regression — Unweighted, Untreated Only reg long_y /// perc_female2013 perc_white2013 perc_hispanic2013 /// unemp_rate_pc2013 poverty_rate2013 median_income_k2013 if Treat==0, /// vce(cluster county_code) eststo or1 ** (2) Propensity Score Model — Unweighted Logit logit Treat /// perc_female2013 perc_white2013 perc_hispanic2013 /// unemp_rate_pc2013 poverty_rate2013 median_income_k2013, /// vce(cluster county_code) eststo ps1 ** (3) Outcome Regression — Weighted, Untreated Only reg long_y /// perc_female2013 perc_white2013 perc_hispanic2013 /// unemp_rate_pc2013 poverty_rate2013 median_income_k2013 if Treat==0 /// [aw = set_wt], vce(cluster county_code) eststo or2 ** (4) Propensity Score — Weighted Logit logit Treat /// perc_female2013 perc_white2013 perc_hispanic2013 /// unemp_rate_pc2013 poverty_rate2013 median_income_k2013 /// [iw = set_wt], vce(cluster county_code) eststo ps2 esttab or1 ps1 or2 ps2 using "../data/reg_pscore.csv", replace /// cells(b(fmt(%9.2f) ) se(fmt(%9.2f) )) nostar collabels(none) /// fragment label alignment(c) order(_cons ) nomtitles nonotes noobs numbers plain csv```:::## Table 7: Sant'Anna and Zhao (2020) DiD**Table 7** shows how to calculate the DiD estimate using the estimators in Sant'Anna and Zhao (2020). The paper describes three different estimation methods: outcome regression, inverse-probability weighted (IPW), and doubly robust models.::: panel-tabset### R```{r, message = FALSE, warning = FALSE, error = FALSE}# finally, get the same estimates using Sant'Anna and Zhao (2020) and Callaway and Sant'Anna (2021) using the # three adjustment methods# You need to reformat the group variable (untreated = 0) and the unit ID variable needs to be numericdata_cs <- short_data %>% mutate(treat_year = if_else(yaca == 2014 & !is.na(yaca), 2014, 0), county_code = as.numeric(county_code))# create a function to run the CS estimator allowing the # estimation method to differrun_cs <- function(method, wt) { # estimate the att_gt atts <- att_gt( yname = "crude_rate_20_64", tname = "year", idname = "county_code", gname = "treat_year", xformla = as.formula(paste("~", paste(covs, collapse = "+"))), data = data_cs, panel = TRUE, control_group = "nevertreated", bstrap = TRUE, cband = TRUE, est_method = method, weightsname = wt, # faster_mode = TRUE, base_period = "universal", biters = 25000 ) # aggregate estimates aggte(atts, na.rm = TRUE, biters = 25000) %>% broom::tidy() %>% filter(group == 2014) %>% mutate(type = method)}# run it three ways - regression adjustment, IPW, and doubly robust# these are unweightedests <- map_dfr(c("reg", "ipw", "dr"), run_cs, wt = NULL)# make a table - Panel A is with unweighted modelstablea <- ests %>% # reformat the data select(type, estimate, std.error) %>% # format the estimate and std error mutate(estimate = scales::number(estimate, accuracy = 0.01), std.error = paste0("(", scales::number(std.error, accuracy = 0.01), ")")) %>% # reshape the data pivot_longer(cols = -type, names_to = "statistic", values_to = "value") %>% pivot_wider(id_cols = "statistic", names_from = "type", values_from = "value") %>% mutate(statistic = if_else(statistic == "estimate", "Medicaid Expansion", " "))# make weighted results for the three CS models.ests_w <- map_dfr(c("reg", "ipw", "dr"), run_cs, wt = "set_wt")# Panel B is the same thing but with weighted resultstableb <- ests_w %>% select(type, estimate, std.error) %>% mutate(estimate = scales::number(estimate, accuracy = 0.01), std.error = paste0("(", scales::number(std.error, accuracy = 0.01), ")")) %>% pivot_longer(cols = -type, names_to = "statistic", values_to = "value") %>% pivot_wider(id_cols = "statistic", names_from = "type", values_from = "value") %>% select(reg, ipw, dr)# Combine Panels A and B together and format the tablebind_cols(tablea, tableb) %>% kable(col.names = c(" ", "Regression", "IPW", "Doubly Robust", "Regression", "IPW", "Doubly Robust"), align = 'lcccccc', escape = F, booktabs = T, label = "2x2_csdid", caption = "Callaway and Sant'Anna (2021) DiD", linesep = "") %>% kable_styling(latex_options = c("hold_position")) %>% add_header_above(c(" " = 1, "Unweighted" = 3, "Weighted" = 3))```### Stata```{stata, eval = FALSE}**************************************************************** Table 7: 2x2 DiD with covariates (Sant'Anna and Zhao 2020)*************************************************************** *read data in again and restrict to 2013/2014 use "did_jel_aca_replication_data", clear keep if inlist(year, 2013, 2014) & inlist(yaca,2014,2020,2021,2023,.) gen Treat = (yaca==2014) // 1 = ACA in 2014 gen Post = (year == 2014) // 1 = post-expansion year *local with covariate names local covs perc_female perc_white perc_hispanic /// unemp_rate_pc poverty_rate median_income_k *create baseline covariates (drdid would do this but we do it explicitly here) *drop the time-varying version of the covariate; not used here foreach x in `covs'{ egen `x'2013 =total(`x'*(year==2013)),by(county_code) drop `x' } * rename the baseline covariates just created rename (perc_female2013 perc_white2013 perc_hispanic2013 /// unemp_rate_pc2013 poverty_rate2013 median_income_k2013) /// (perc_female perc_white perc_hispanic /// unemp_rate_pc poverty_rate median_income_k) * Set panel xtset county_code year * Unweighted DRDID Estimation *request all estimators in one shot (we only use reg, stdipw, and dripw) drdid crude_rate_20_64 `covs', /// ivar(county_code) time(year) tr(Treat) /// all /// pscoretrim(0.995) /// wboot(reps(25000) rseed(20240924) wbtype(rademacher)) /// cluster(county_code) * Extract point estimates scalar reg = e(b)[1,3] scalar ipw = e(b)[1,5] scalar dripw = e(b)[1,1] * Extract standard errors scalar se_reg = sqrt(e(V)[3,3]) scalar se_ipw = sqrt(e(V)[5,5]) scalar se_dripw = sqrt(e(V)[1,1]) * Format for LaTeX export local reg_str : display %6.2f reg local ipw_str : display %6.2f ipw local dripw_str : display %6.2f dripw local se_reg_str : display %6.2f se_reg local se_ipw_str : display %6.2f se_ipw local se_dripw_str : display %6.2f se_dripw * Weighted DRDID Estimation *request all estimators in one shot (we only use reg, stdipw, and dripw) drdid crude_rate_20_64 `covs' [iweight = set_wt], /// ivar(county_code) time(year) tr(Treat) /// all /// pscoretrim(0.995) /// wboot(reps(25000) rseed(20240924) wbtype(rademacher)) /// cluster(county_code) * Extract point estimatesscalar reg_w = e(b)[1,3]scalar ipw_w = e(b)[1,5]scalar dripw_w = e(b)[1,1]* Extract standard errorsscalar se_reg_w = sqrt(e(V)[3,3])scalar se_ipw_w = sqrt(e(V)[5,5])scalar se_dripw_w = sqrt(e(V)[1,1])*Make matrix of the results of interest matrix results_wide = ( /// reg , ipw , dripw , reg_w , ipw_w , dripw_w \ /// se_reg , se_ipw , se_dripw , se_reg_w , se_ipw_w , se_dripw_w )matrix rownames results_wide = b sematrix colnames results_wide = Unw_REG Unw_IPW Unw_DRIPW W_REG W_IPW W_DRIPWmatrix list results_wideclearsvmat results_wide, names(col)* add row labels b/segen stat = ""replace stat = "Medicaid Expansion" in 1replace stat = "" in 2order stat Unw_REG Unw_IPW Unw_DRIPW W_REG W_IPW W_DRIPWexport delimited using "../data/reg_t7.csv", replace```:::## Figure 1: Distribution of Propensity Scores**Figure 1** plots the distribution of the propensity scores from the underlying propensity model for the unweighted and weighted models.::: panel-tabset### R```{r, message = FALSE, warning = FALSE, error = FALSE}#| fig.align: 'center'#| fig.width: 10# Finally, we make Figure 1 which reports the distribution of the propensity scores between # expansion and non-expansion counties.# first add in propensity scores to the dataplot_data <- bind_rows( short_data %>% filter(year == 2013) %>% mutate(propensity = predict(mod2, ., type = "response"), mod = "Unweighted", wt = 1), short_data %>% filter(year == 2013) %>% mutate(propensity = predict(mod4, ., type = "response"), mod = "Weighted", wt = set_wt) )# plot the propensity score distributions by groupplot_data %>% mutate(expand = if_else(Treat == 1, "Expansion Counties", "Non-Expansion Counties")) %>% ggplot(aes(x = propensity, y = after_stat(density), group = expand, color = expand, weight = wt)) + geom_histogram(fill = "white", position = "identity", linewidth = 1, alpha = 0.5) + facet_wrap(~mod) + scale_color_brewer(palette = 'Set1') + labs(x = "Propensity Score", y = "Density") + theme(legend.position = 'bottom', axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), strip.text = element_text(size = 14), panel.border = element_rect(color = "black", fill = NA, linewidth = 1), legend.title = element_blank(), axis.title = element_text(size = 12), axis.text = element_text(size = 10))```### Stata```{stata, eval = FALSE}*************************************************** Figure 1: Distribution of Propensity Scores************************************************* *read data in again and restrict to 2013/2014 use "did_jel_aca_replication_data", clear keep if inlist(year, 2013, 2014) & inlist(yaca,2014,2020,2021,2023,.) gen Treat = (yaca==2014) // 1 = ACA in 2014 gen Post = (year == 2014) // 1 = post-expansion year *local with covariate names local covs perc_female perc_white perc_hispanic /// unemp_rate_pc poverty_rate median_income_k * Keep only relevant variables keep county_code year Treat set_wt crude_rate_20_64 `covs' * Reshape everything (outcome + covariates) at once reshape wide crude_rate_20_64 `covs', i(county_code) j(year) * Generate change in covariates foreach v of local covs { gen d_`v' = `v'2014 - `v'2013 } * Generate outcome: change in crude mortality gen long_y = crude_rate_20_642014 - crude_rate_20_642013 * Drop missing outcome drop if missing(long_y) *Unweighted p-score distributions eststo ps1: logit Treat /// perc_female2013 perc_white2013 perc_hispanic2013 /// unemp_rate_pc2013 poverty_rate2013 median_income_k2013, /// vce(cluster county_code) *estimates use ps1 predict pscore_unw * Expansion counties summ pscore_unw if inlist(Treat,0,1) local min = r(min) local max = r(max) local w = (`max' - `min')/30 local s = floor(`min'/`w')*`w' // align start to a bin edge di `w' di `s' twoway histogram pscore_unw if Treat == 1 , width(`w') start(`s') fcolor(none) lcolor(red) || /// histogram pscore_unw if Treat == 0 , width(`w') start(`s') fcolor(none) lcolor(blue) /// ytitle("Density",angle(horizontal)) xtitle("") title("Unweighted") legend(off) ylabel(0(1)4.5) xlabel(,nogrid) graph save "pscore_uw.gph", replace *Weighted p-score distributions eststo ps2: logit Treat /// perc_female2013 perc_white2013 perc_hispanic2013 /// unemp_rate_pc2013 poverty_rate2013 median_income_k2013, /// [iw = set_wt], vce(cluster county_code) *estimates use ps2 predict pscore_w * Define group for legend gen group = cond(Treat == 1, "Expansion", "Non-Expansion") * Expansion counties summ pscore_w if inlist(Treat,0,1) local min = r(min) local max = r(max) local w = (`max' - `min')/30 local s = floor(`min'/`w')*`w' // align start to a bin edge twoway histogram pscore_w if Treat == 1 [fw=set_wt], width(`w') start(`s') fcolor(none) lcolor(red) || /// histogram pscore_w if Treat == 0 [fw=set_wt], width(`w') start(`s') fcolor(none) lcolor(blue) /// ytitle("") xtitle("Propensity Score") title("Weighted") legend( rows(1)) ylabel(0(1)4.5) xlabel(, nogrid) graph save "pscore_w.gph", replace *combine graphs and use only one legend and y-axis title **NB: differences between the Stata and R histograms are purely down to *some arcane disagreement between histogram and ggplot; as can be seen from *the actual p-score model estimates, the pscores themselves are identical grc1leg2 pscore_uw.gph pscore_w.gph, /// title("") xtob1title xtitlefrom("pscore_w.gph") /// legendfrom(pscore_w.gph) position(6) ring(2) /// cols(2) imargin(4 4 4 4) graph display, xsize(10) ysize(5)```:::# 2 $\times$ T DiD setupsIn the next section we show how to estimate DiD when you have more than two time periods, what we call the 2 $\times$ T setting.## Figure 2: County Mortality Trends by Expansion DecisionIn **Figure 2** we plot the weighted average county mortality rates over our sample period (2009-2019) for our two groups: those who expand in 2014 and those who have not expanded by 2019.::: panel-tabset### R```{r, message = FALSE, warning = FALSE, error = FALSE}#| fig.align: 'center'#| fig.width: 10# set seed for reproducibilityset.seed(20240924)# load cleaned datamydata <- read_csv(here::here("data", "county_mortality_data.csv")) %>% # make state the abbreviation mutate(state = str_sub(county, nchar(county) - 1, nchar(county))) %>% # drop DC and pre-2014 adoption states filter(!(state %in% c("DC", "DE", "MA", "NY", "VT"))) %>% # drop states that adopt between 2014 and 2019 filter(yaca == 2014 | is.na(yaca) | yaca > 2019)# set the covariates that we're going to use.covs <- c("perc_female","perc_white", "perc_hispanic", "unemp_rate", "poverty_rate", "median_income")## Clean data and add in covariatesmydata <- mydata %>% # make variables mutate(perc_white = population_20_64_white / population_20_64 * 100, perc_hispanic = population_20_64_hispanic / population_20_64 * 100, perc_female = population_20_64_female/ population_20_64 * 100, unemp_rate = unemp_rate * 100, median_income = median_income / 1000) %>% # keep just subset of variables that we will use later select(state, county, county_code, year, population_20_64, yaca, starts_with("perc_"), crude_rate_20_64, all_of(covs))# keep only counties with full observations for outcome and covariates in 2013 and 2014mydata <- mydata %>% # allow the aca expansion variable to be missing drop_na(!yaca) %>% group_by(county_code) %>% # need full covariates for 2013 and 2014 filter(length(which(year == 2013 | year == 2014)) == 2) %>% ungroup()# finally, keep only counties with full mortality data for 2009 to 2019mydata <- mydata %>% group_by(county_code) %>% drop_na(crude_rate_20_64) %>% filter(n() == 11)# make ACA and post variables, as well as the weighting variable which is the relevant county # population in 2013mydata <- mydata %>% mutate(Treat = if_else(yaca == 2014 & !is.na(yaca), 1, 0), Post = if_else(year >= 2014, 1, 0)) %>% group_by(county_code) %>% # make a variable with population weight in 2013 mutate(set_wt = population_20_64[which(year == 2013)]) %>% ungroup()# make time series plotmydata %>% mutate(expand = if_else(Treat == 1, "Expansion Counties", "Non-Expansion Counties")) %>% # get the weighted averages by expansion type and year group_by(expand, year) %>% summarize(mortality = weighted.mean(crude_rate_20_64, set_wt)) %>% # plot ggplot(aes(x = year, y = mortality, group = expand, color = expand)) + geom_point(size = 2) + geom_line(linewidth = 1) + geom_vline(xintercept = 2014, linetype = "dashed") + scale_color_brewer(palette = 'Set1') + scale_x_continuous(breaks = 2009:2019) + labs(x = "", y = "Mortality (20-64) \n Per 100,000") + theme(legend.position = 'bottom', axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), strip.text = element_text(size = 14), panel.border = element_rect(color = "black", fill = NA, linewidth = 1), legend.title = element_blank(), axis.title = element_text(size = 12), axis.text = element_text(size = 10))```### Stata```{stata, eval = FALSE}****************************************************************1. Figure 2: 2014 versus non-expansion mortality trends*************************************************************** *read data in again and restrict to 2013/2014 use "did_jel_aca_replication_data", clear keep if inlist(yaca,2014,2020,2021,2023,.) gen Treat = (yaca==2014) // 1 = ACA in 2014 *Collapse to means by group × year gen expand = cond(Treat==1, "Expansion Counties", "Non-Expansion Counties") collapse (mean) crude_rate_20_64 [iw=set_wt], by(expand year) rename crude_rate_20_64 mortality * Create the plot twoway /// (line mortality year if expand == "Expansion Counties", lcolor(red) lpattern(solid) lwidth(medium)) /// (line mortality year if expand == "Non-Expansion Counties", lcolor(blue) lpattern(solid) lwidth(medium)) /// (scatter mortality year if expand == "Expansion Counties", msymbol(O) mcolor(red)) /// (scatter mortality year if expand == "Non-Expansion Counties", msymbol(Oh) mcolor(blue)) , /// legend(order(1 "Expansion Counties" 2 "Non-Expansion Counties") /// pos(6) ring(2) cols(2)) /// xtitle("") /// ytitle("Mortality (20–64)" "Per 100,000", orientation(horizontal)) /// ysize(5) xsize(10) /// xline(2013) /// xlabel(2009(1)2019, nogrid) /// graphregion(color(white)) /// plotregion(style(none))```:::## Figure 3: 2 $\times$ T Event StudyIn **Figure 3** we report the $2 \times T$ event study without covariates.::: panel-tabset### R```{r, message = FALSE, warning = FALSE, error = FALSE}#| fig.align: 'center'#| fig.width: 10# Make event study plots# first add in the variables needed to run the CS model - timing group variable (treat_year)# and county code needs to be numericmydata <- mydata %>% mutate(treat_year = if_else(yaca == 2014 & !is.na(yaca), 2014, 0), county_code = as.numeric(county_code), time_to_treat = if_else(Treat == 1, year - treat_year, 0))# estimate CS models# get the individual ATT(g,t) estimatesmod <- did::att_gt( yname = "crude_rate_20_64", tname = "year", idname = "county_code", gname = "treat_year", xformla = NULL, data = mydata, panel = TRUE, control_group = "nevertreated", bstrap = TRUE, cband = TRUE, est_method = "reg", weightsname = "set_wt", # faster_mode = TRUE, base_period = "universal", biters = 25000)# confirm you get the same with OLS (standard errors differ because of bootstrap only)cs_out <- modols_out <- feols(crude_rate_20_64 ~ i(time_to_treat, Treat, ref = -1) | county_code + year, data = mydata, cluster = ~county_code, weights = ~set_wt)# get the Rambachan/Roth confidence interval# first save our aggregate event study estimateses <- did::aggte( mod, type = "dynamic", min_e = -5, max_e = 0, bstrap = TRUE, biters = 25000)# source the honest_did code made to fit our estimatessource(here::here('scripts/R/5_honestdid.R'))# get robust CIrobust_ci <- honest_did(es = es, type = "relative_magnitude")$robust_ci# get the aggregate value for e = 0:5agg <- mod %>% aggte(type = "dynamic", min_e = 0, max_e = 5, bstrap = TRUE, biters = 25000)# make labels for the estimate, std error, and confidence intervalslabel1 <- paste0("Estimate~(e %in% '{0, 5}')~'='~'", scales::number(agg$overall.att, accuracy = 0.01), "'")label2 <- paste0("Std. Error = ", scales::number(agg$overall.se, 0.01), " \n", "Conf. Int = [", scales::number(agg$overall.att - 1.96*agg$overall.se, 0.01), ", ", scales::number(agg$overall.att + 1.96*agg$overall.se, 0.01), "]")# make event study plot for Figure 3mod %>% # Aggregate in event time ("dynamic") aggte(type = "dynamic", biters = 25000) %>% # get the two confidence intervals broom::tidy(conf.int = TRUE) %>% # plot ggplot(aes(x = event.time, y = estimate)) + geom_linerange(aes(ymin = conf.low, ymax = conf.high), color = "darkred") + geom_linerange(aes(ymin = point.conf.low, ymax = point.conf.high)) + geom_point() + geom_vline(xintercept = -1, linetype = "dashed") + geom_hline(yintercept = 0, linetype = "dashed") + scale_x_continuous(breaks = -5:5) + annotate("text", x = 3, y = 11, label = label1, parse = TRUE) + annotate("text", x = 3, y = 9, label = label2) + labs(x = "Event Time", y = "Treatment Effect \n Mortality Per 100,000") + theme(axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), strip.text = element_text(size = 14), panel.border = element_rect(color = "black", fill = NA, linewidth = 1), legend.title = element_blank(), axis.title = element_text(size = 12), axis.text = element_text(size = 10))```### Stata```{stata, eval = FALSE}****************************************************************2. Figure 3: 2xT event-study with no covariates*************************************************************** *read data in again and restrict to 2013/2014 use "did_jel_aca_replication_data", clear keep if inlist(yaca,2014,2020,2021,2023,.) *Use csdid for simple ES estimation (uniform and non-uniform CIs); use long2!! csdid crude_rate_20_64 [iw=set_wt], /// ivar(county_code) time(year) gvar(treat_year) /// long2 /// wboot(reps(25000) rseed(20240924) wbtype(rademacher)) /// agg(event) *you do actually have to grab the uniform CIs from the posted output preserve clear matrix T = r(table) local cn : colnames T matrix llu = T[5,1..colsof(T)] matrix ulu = T[6,1..colsof(T)] matrix uci = (llu', ulu') matrix colnames uci = uci_lower uci_upper svmat uci, names(col) gen var = "" local i = 0 foreach r of local cn{ local ++i replace var = "`r'" in `i' } save "testo", replace restore *save the posted output to a dataset and merge back on the uniform CIs regsave, ci merge 1:1 var using "testo" keep if _merge==3 drop _merge *get average post coefficient, standard error, and CI qui sum coef if var=="Post_avg" local postcoef : display %03.2f r(mean) qui sum stderr if var=="Post_avg" local postse : display %03.2f r(mean) local postlci : display %03.2f `postcoef'-1.96*`postse' local postuci : display %03.2f `postcoef'+1.96*`postse' *now only keep the event-study estimates keep if substr(var,1,1)=="T" keep coef ci* uci* var *extract timing components from event aggregation notation in csdid gen pre = substr(var,2,1)=="m" gen post = substr(var,2,1)=="p" gen e = real(substr(var,3,.)) replace e = -e if pre *add omitted period local obs = _N+1 set obs `obs' replace coef = 0 in `obs' replace e = -1 in `obs' *plot the ATT(g,t) for g=2014 twoway /// (rcap uci_lower uci_upper e, /// lcolor(red*1.2) msize(0)) /// (rcap ci_lower ci_upper e, /// lcolor(black) msize(0)) /// (scatter coef e, /// msym(O) mcolor(black) /// ), /// legend(off) /// xlabel(-5(1)5, nogrid) /// ylabel(-5(5)10) /// yline(0, lpattern(dash)) /// xline(-1, lpattern(dash)) /// xtitle("Event-Time") /// ytitle("Treatment Effect" "Mortality Per 100,000", orientation(horizontal) size(small)) /// text(10.5 3 "Estimate e ∈ {0,5}=`postcoef'", size(small)) /// text(9 3 "Std. Error = `postse'" "Conf. Int = [`postlci',`postuci']", size(small)) /// xsize(10) ysize(5)```:::## Rambachan & Roth (2023) Sensitivity AnalysisIn the text surrounding the $2 \times T$ event study, we also discuss how you can use the analysis developed in Rambachan and Roth (2023) to test the sensitivity of the ATT estimates to violations of the parallel trends assumption. In particular, we focus on their "relative magnitude" analysis, wherein we assume that any parallel trends violations in the post-period are no larger than the largest violation in the pre-period. In the code below we show how to estimate the credible interval under this bounding approach for $ATT(2014)$ - or the treatment effect in the first year of treatment for those counties that adopted Medicaid expansion.::: panel-tabset### R```{r, message = FALSE, warning = FALSE, error = FALSE}#| fig.align: 'center'#| fig.width: 10# get robust CIrobust_ci %>% # keep just relative magnitude = 1 filter(Mbar == 1) %>% # drop extraneous variables select(lb, ub, Mbar) %>% # round mutate_at(c("lb", "ub"), scales::number, accuracy = 0.01) %>% # make table kable(col.names = c("lower_bound", "upper_bound", "relative_magnitude"), align = 'c', booktabs = T) %>% kable_styling()```### Stata```{stata, eval = FALSE}*************************************************1. read data in again and restrict to 2013/2014************************************************ use "did_jel_aca_replication_data", clear keep if inlist(yaca,2014,2020,2021,2023,.)*************************************************Estimate unconditional csdid version 1.81 (Oct 2025)************************************************ csdid crude_rate_20_64 [iw=set_wt], /// ivar(county_code) time(year) gvar(treat_year) /// long2 /// never /// wboot(reps(25000) rseed(20240924) wbtype(rademacher)) /// agg(event) estat event, window(-5,0) post*************************************************Honest DiD, relative magnitude only using M=1 for e=0************************************************ honestdid, type(relative_magnitude) pre(3/6) post(7) mvec(1)exit```:::## Figure 4: $2 \times T$ Event Study With CovariatesIn **Figure 4** we show the $2 \times T$ event studies with covariates using the Callaway and Sant'Anna (2021) estimators - i.e. using outcome regression, IPW, and doubly-robust estimators.::: panel-tabset### R```{r, message = FALSE, warning = FALSE, error = FALSE}#| fig.align: 'center'#| fig.width: 10# Estimate the CS Model 3 ways with the three estimation types# make a function to run CS, varying the estimation typerun_cs <- function(method) { # get the ATT(g, t) estimates att_gt( yname = "crude_rate_20_64", tname = "year", idname = "county_code", gname = "treat_year", xformla = as.formula(paste("~", paste(covs, collapse = "+"))), data = mydata, panel = TRUE, control_group = "nevertreated", bstrap = TRUE, cband = TRUE, est_method = method, weightsname = "set_wt", # faster_mode = TRUE, base_period = "universal", biters = 25000 ) %>% # aggregate to event time (dynamic) aggte(type = "dynamic", na.rm = TRUE, biters = 25000) %>% broom::tidy(conf.int = TRUE) %>% mutate(type = method)}# estimate it for 3 methods out <- map_dfr(c("reg", "ipw", "dr"), run_cs)# make plot for Figure 4 - 2XT Event Study with Covariatesout %>% # refactor labels for plot mutate(type = case_match(type, "reg" ~ "Regression", "ipw" ~ "IPW", "dr" ~ "Doubly Robust")) %>% mutate(type = factor(type, levels = c("Regression", "IPW", "Doubly Robust"))) %>% # plot the esti mates and confidence intervals ggplot(aes(x = event.time, y = estimate)) + geom_linerange(aes(ymin = conf.low, ymax = conf.high), color = "darkred") + geom_linerange(aes(ymin = point.conf.low, ymax = point.conf.high)) + geom_point() + geom_vline(xintercept = -1, linetype = "dashed") + geom_hline(yintercept = 0, linetype = "dashed") + facet_wrap(~type) + scale_x_continuous(breaks = -5:5) + labs(x = "Event Time", y = "Treatment Effect \n Mortality Per 100,000") + theme(axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), strip.text = element_text(size = 14), panel.border = element_rect(color = "black", fill = NA, linewidth = 1), legend.title = element_blank(), axis.title = element_text(size = 12), axis.text = element_text(size = 10))```### Stata```{stata, eval = FALSE}****************************************************************3. Figure 4: 2xT event-study with covariates*************************************************************** *read data in again and restrict to 2013/2014 use "did_jel_aca_replication_data", clear keep if inlist(yaca,2014,2020,2021,2023,.) *local with covariate names local covs perc_female perc_white perc_hispanic /// unemp_rate_pc poverty_rate median_income_k *Estimate RA, IPW, and DR using csdid version 1.81 (Oct 2025) local reg_title "Regression" local stdipw_title "IPW" local dripw_title "Doubly Robust" foreach m in reg stdipw dripw{ csdid crude_rate_20_64 `covs' [iw=set_wt], /// ivar(county_code) time(year) gvar(treat_year) /// method(`m') /// long2 /// never /// wboot(reps(25000) rseed(20240924) wbtype(rademacher)) /// pscoretrim(0.995) agg(event) *you do actually have to grab the uniform CIs from the posted output preserve clear matrix T = r(table) local cn : colnames T matrix llu = T[5,1..colsof(T)] matrix ulu = T[6,1..colsof(T)] matrix uci = (llu', ulu') matrix colnames uci = uci_lower uci_upper svmat uci, names(col) gen var = "" local i = 0 foreach r of local cn{ local ++i replace var = "`r'" in `i' } save "testo", replace restore *save the posted output to a dataset and merge back on the uniform CIs preserve regsave, ci merge 1:1 var using "testo" keep if _merge==3 drop _merge *get average post coefficient, standard error, and CI qui sum coef if var=="Post_avg" local postcoef : display %03.2f r(mean) qui sum stderr if var=="Post_avg" local postse : display %03.2f r(mean) local postlci : display %03.2f `postcoef'-1.96*`postse' local postuci : display %03.2f `postcoef'+1.96*`postse' *now only keep the event-study estimates keep if substr(var,1,1)=="T" keep coef ci* uci* var *extract timing components from event aggregation notation in csdid gen pre = substr(var,2,1)=="m" gen post = substr(var,2,1)=="p" gen e = real(substr(var,3,.)) replace e = -e if pre *add omitted period local obs = _N+1 set obs `obs' replace coef = 0 in `obs' replace e = -1 in `obs' *plot the ATT(g,t) for g=2014 twoway /// (rcap uci_lower uci_upper e, /// lcolor(red*1.2) msize(0)) /// (rcap ci_lower ci_upper e, /// lcolor(black) msize(0)) /// (scatter coef e, /// msym(O) mcolor(black) /// ), /// legend(off) /// xlabel(-5(1)5, nogrid) /// ylabel(-20(10)20) /// xline(-1, lpattern(dash)) /// yline(0, lpattern(dash)) /// xtitle("Event-Time") /// ytitle("") /// title("``m'_title'") /// xsize(10) ysize(5) graph save "`m'.gph", replace restore } *combine graphs and use only one legend and y-axis title **NB: differences between the Stata and R come from the bootstrap uncertainty **analytic standard errors are identical grc1leg2 reg.gph stdipw.gph dripw.gph, /// title("") xtob1title xtitlefrom("reg.gph") /// l1title("Treatment Effect" "Mortality Per 100,000", size(small) orientation(horizontal)) /// loff /// ycommon /// position(6) ring(2) /// cols(3) imargin(4 4 4 4) graph display, xsize(12) ysize(5)exit```:::# G $\times$ T DiD setupsWe end the paper with the staggered adoption setting, with $G$ treatment groups and $T > 2$ time periods.## Figure 5: Mortality Trends by Expansion Decision with Staggered TimingIn **Figure 5** we plot the trends in weighted-average county mortality rates by treatment timing groups from 2009 to 2019.::: panel-tabset### R```{r, message = FALSE, warning = FALSE, error = FALSE}#| fig.align: 'center'#| fig.width: 10# set seed for reproducibilityset.seed(20240924)# load cleaned datamydata <- read_csv(here::here("data", "county_mortality_data.csv")) %>% # make state the abbreviation mutate(state = str_sub(county, nchar(county) - 1, nchar(county))) %>% # drop DC and pre-2014 adoption states filter(!(state %in% c("DC", "DE", "MA", "NY", "VT"))) # set the covariates that we're going to use.covs <- c("perc_female","perc_white", "perc_hispanic", "unemp_rate", "poverty_rate", "median_income")## Clean data and add in covariatesmydata <- mydata %>% # make variables mutate(perc_white = population_20_64_white / population_20_64 * 100, perc_hispanic = population_20_64_hispanic / population_20_64 * 100, perc_female = population_20_64_female/ population_20_64 * 100, unemp_rate = unemp_rate * 100, median_income = median_income / 1000) %>% # keep just subset of variables that we will use later select(state, county, county_code, year, population_20_64, yaca, starts_with("perc_"), crude_rate_20_64, all_of(covs))# keep only counties with full observations for outcome and covariates in 2013 and 2014mydata <- mydata %>% # allow the aca expansion variable to be missing drop_na(!yaca) %>% group_by(county_code) %>% # need full covariates for 2013 and 2014 filter(length(which(year == 2013 | year == 2014)) == 2) %>% ungroup()# finally, keep only counties with full mortality data for 2009 to 2019mydata <- mydata %>% group_by(county_code) %>% drop_na(crude_rate_20_64) %>% filter(n() == 11)# make smaller datasetmydata <- mydata %>% mutate(Treat = if_else(!is.na(yaca) & yaca <= 2019, 1, 0), treat_year = if_else(!is.na(yaca) & yaca <= 2019, yaca, 0), Post = if_else(year >= 2014, 1, 0)) %>% group_by(county_code) %>% # make a variable with population weight in 2013 mutate(set_wt = population_20_64[which(year == 2013)]) %>% ungroup()# make time series plot by timing group (Figure 5)mydata %>% # identify the groups mutate(treat_year = if_else(treat_year == 0, "Non-Expansion Counties", as.character(treat_year))) %>% # get weighted average mortality by gtiming-group and year group_by(treat_year, year) %>% summarize(mortality = weighted.mean(crude_rate_20_64, set_wt)) %>% ggplot(aes(x = year, y = mortality, group = as.factor(treat_year), color = as.factor(treat_year))) + geom_point(size = 2) + geom_line(linewidth = 1) + scale_color_manual(values = c("#7C7189", "#D04E59", "#BC8E7D", "#2F3D70", "#CABEE9")) + scale_x_continuous(breaks = 2009:2019) + labs(x = "", y = "Mortality (20-64) \n Per 100,000") + theme(legend.position = 'bottom', axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), strip.text = element_text(size = 14), panel.border = element_rect(color = "black", fill = NA, linewidth = 1), legend.title = element_blank(), axis.title = element_text(size = 12), axis.text = element_text(size = 10))```### Stata```{stata, eval = FALSE}***************************************************************************1. Figure 5: Mortality Trends by Timing Group************************************************************************** *read data in again and relabel treatment groups, specifically the "non-expansion" group use "did_jel_aca_replication_data", clear *Define treatment timing labels replace treat_str = "Non-Expansion Counties" if inlist(treat_str, "0", "2020", "2021", "2022", "2023") collapse (mean) crude_rate [aw = set_wt], by(treat_str year) * Plot trends by Timing Group twoway /// (connected crude_rate year if treat_str == "Non-Expansion Counties", /// lcolor(purple*0.35) lwidth(medthick) mcolor(purple*0.35) msymbol(O)) /// (connected crude_rate year if treat_str == "2014", /// lcolor(gray*1.2) lwidth(medthick) mcolor(gray*1.2) msymbol(O)) /// (connected crude_rate year if treat_str == "2015", /// lcolor(red) lwidth(medthick) mcolor(red*1.5) msymbol(O)) /// (connected crude_rate year if treat_str == "2016", /// lcolor(brown) lwidth(medthick) mcolor(brown) msymbol(O)) /// (connected crude_rate year if treat_str == "2019", /// lcolor(navy) lwidth(medthick) mcolor(navy) msymbol(O)), /// /// legend(order(2 "2014" 3 "2015" 4 "2016" 5 "2019" 1 "Non-Expansion Counties") /// pos(6) ring(2) rows(1) region(lcolor(none)) size(small)) /// ytitle("Mortality (20–64)" "per 100,000", size(small) orientation(horizontal)) /// ylabel(350 400 450, angle(0) labsize(small) nogrid) /// xtitle("") xlabel(2009(1)2019, labsize(small)) /// graphregion(color(white)) /// name(fig5, replace) graph display, xsize(10) ysize(5)```:::## Figure 6: ATT(g, t)s for Each Expansion GroupWith the estimator from Callaway & Sant'Anna (2021) we can estimate the group-time average treatment effects - what we call ATT(g, t) - for each group and time period. We plot these cohort-specific treatment effects in **Figure 6**.::: panel-tabset### R```{r, message = FALSE, warning = FALSE, error = FALSE}#| fig.align: 'center'#| fig.width: 10# reclassify the county code as numeric for Callaway/Sant'Annamydata <- mydata %>% mutate(county_code = as.numeric(county_code))# get the ATT(g,t)'smod_no_x <- att_gt( yname = "crude_rate_20_64", tname = "year", idname = "county_code", gname = "treat_year", xformla = NULL, data = mydata, panel = TRUE, control_group = "notyettreated", bstrap = TRUE, biters = 25000, cband = TRUE, weightsname = "set_wt", # faster_mode = TRUE, base_period = "universal")# make plots in calendar time by timing groupmod_no_x %>% # get the estimates and confidence intervals broom::tidy(conf.int = TRUE) %>% mutate(group = as.character(group)) %>% # plot the estimates and CIs ggplot(aes(x = time, y = estimate, color = group)) + geom_point(size = 2) + geom_line(linewidth = 1) + geom_linerange(aes(ymin = conf.low, ymax = conf.high)) + scale_x_continuous(breaks = seq(2009, 2019, by = 2), labels = seq(2009, 2019, by = 2)) + scale_color_manual(values = c("#7C7189", "#D04E59", "#BC8E7D", "#2F3D70")) + geom_vline(aes(xintercept = as.numeric(group) - 1), linetype = "dashed", linewidth = 1) + labs(x = "", y = "Treatment Effect \n Mortality Per 100,000") + facet_wrap(~group) + theme(legend.position = 'bottom', axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), strip.text = element_text(size = 14), panel.border = element_rect(color = "black", fill = NA, linewidth = 1), legend.title = element_blank(), axis.title = element_text(size = 12), axis.text = element_text(size = 10))```### Stata```{stata, eval = FALSE}***************************************************************************2. Figure 6: ATT(g,t) by Calendar Time************************************************************************** *read data in use "did_jel_aca_replication_data", clear *Estimate group × time effects csdid crude_rate [iw=set_wt], /// ivar(county_code) /// time(year) /// gvar(treat_year) /// long2 /// notyet /// wboot(reps(25000) rseed(20240924) wbtype(rademacher)) *adding the omitted category to every g is very annoying ni the matrix style, so do the graphs using regsave regsave *extract timing components from csdid gen g = real(substr(var,2,4)) gen t1 = real(substr(var,9,4)) gen t2 = real(substr(var,14,4)) gen t = (t1)*(t2==g-1) + (t2)*(t1==g-1) *add omitted period preserve keep g duplicates drop gen t = g-1 gen coef = 0 save testo, replace restore append using testo *make CIs gen lci = coef - 1.96*stderr gen uci = coef + 1.96*stderr keep coef lci uci g t sort g t *plot the ATT(g,t) for g=2014 twoway /// (rcap lci uci t if g==2014, /// lcolor(gray*1.2) msize(0)) /// (scatter coef t if g==2014, /// msym(O) mcolor(gray*1.2) /// c(l) lcolor(gray*1.2) /// ), /// legend(off) /// xlabel(2009(2)2019, nogrid) /// ylabel(-40(20)20) /// xline(2013, lpattern(dash)) /// title("2014") /// xtitle("") /// ytitle("", orientation(horizontal)) /// xsize(10) ysize(5) graph save g2014.gph, replace *plot the ATT(g,t) for g=2015 twoway /// (rcap lci uci t if g==2015, /// lcolor(red*1.5) msize(0)) /// (scatter coef t if g==2015, /// msym(O) mcolor(red*1.5) /// c(l) lcolor(red*1.5) /// ), /// legend(off) /// xlabel(2009(2)2019, nogrid) /// ylabel(-40(20)20) /// xline(2014, lpattern(dash)) /// title("2015") /// xtitle("") /// ytitle("", orientation(horizontal)) /// xsize(10) ysize(5) graph save g2015.gph, replace *plot the ATT(g,t) for g=2016 twoway /// (rcap lci uci t if g==2016, /// lcolor(brown) msize(0)) /// (scatter coef t if g==2016, /// msym(O) mcolor(brown) /// c(l) lcolor(brown) /// ), /// legend(off) /// xlabel(2009(2)2019, nogrid) /// ylabel(-40(20)20) /// xline(2015, lpattern(dash)) /// title("2016") /// xtitle("") /// ytitle("", orientation(horizontal)) /// xsize(10) ysize(5) graph save g2016.gph, replace *plot the ATT(g,t) for g=2019 twoway /// (rcap lci uci t if g==2019, /// lcolor(navy) msize(0)) /// (scatter coef t if g==2019, /// msym(O) mcolor(navy) /// c(l) lcolor(navy) /// ), /// legend(off) /// xlabel(2009(2)2019, nogrid) /// ylabel(-40(20)20) /// xline(2018, lpattern(dash)) /// title("2019") /// xtitle("") /// ytitle() /// xsize(10) ysize(5) graph save g2019.gph, replace graph combine g2014.gph g2015.gph g2016.gph g2019.gph, /// title("Treatment Effect" "Mortality per 100,000", size(small) orientation(horizontal) pos(9) ring(2)) /// cols(2) imargin(4 4 4 4) graph display, xsize(10) ysize(5)```:::## Figure 7: ATT(g, t) in Event TimeWe show how you can stack these group-time average treatment effects in relative, or event, time in **Figure 7**.::: panel-tabset### R```{r, message = FALSE, warning = FALSE, error = FALSE}#| fig.align: 'center'#| fig.width: 10# Make a different plot that reports the ATT(g,t)s in relative timemod_no_x %>% # get the estimates and CIs broom::tidy() %>% # define relative time for each group mutate(rel_time = time - group) %>% # plot ggplot(aes(x = rel_time, y = estimate, group = as.factor(group), color = as.factor(group))) + geom_point(size = 2) + geom_line(linewidth = 1) + geom_vline(xintercept = -1, linetype = "dashed", linewidth = 1) + scale_color_manual(values = c("#7C7189", "#D04E59", "#BC8E7D", "#2F3D70")) + scale_x_continuous(breaks = -10:5) + labs(x = "", y = "Treatment Effect \n Mortality Per 100,000") + theme(legend.position = 'bottom', axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), strip.text = element_text(size = 14), panel.border = element_rect(color = "black", fill = NA, linewidth = 1), legend.title = element_blank(), axis.title = element_text(size = 12), axis.text = element_text(size = 10))```### Stata```{stata, eval = FALSE}***************************************************************************3. Figure 7: ATT(g,t) in event-time************************************************************************** *read data in use "did_jel_aca_replication_data", clear *Estimate group × time effects csdid crude_rate [iw=set_wt], /// ivar(county_code) /// time(year) /// gvar(treat_year) /// long2 /// notyet /// wboot(reps(25000) rseed(20240924) wbtype(rademacher)) *adding the omitted category to every g is very annoying ni the matrix style, so do the graphs using regsave regsave *extract timing components from csdid gen g = real(substr(var,2,4)) gen t1 = real(substr(var,9,4)) gen t2 = real(substr(var,14,4)) gen t = (t1)*(t2==g-1) + (t2)*(t1==g-1) *add omitted period preserve keep g duplicates drop gen t = g-1 gen coef = 0 save testo, replace restore append using testo *make CIs gen lci = coef - 1.96*stderr gen uci = coef + 1.96*stderr keep coef lci uci g t sort g t gen e = t-g keep g e coef keep if g<. reshape wide coef, i(e) j(g) twoway /// scatter coef2014 coef2015 coef2016 coef2019 e, /// msym(o o o o) mcolor(gray*1.2 red*1.5 brown navy) /// c(l l l l) lcolor(gray*1.2 red*1.5 brown navy) lwidth(medthick medthick medthick medthick) /// legend(order(1 "2014" 2 "2015" 3 "2016" 4 "2019") rows(1) pos(6) ring(2) size(small)) /// xlabel(-10(1)5, nogrid) /// ylabel(-20(10)20) /// xline(-1, lpattern(dash)) /// title("") /// xtitle("") /// ytitle() /// xsize(10) ysize(5) ```:::## Figure 8: G $\times$ T Event Study Without CovariatesFinally, we demonstrate how to estimate DiD in the $G \times T$ setting by leverage treatment timing and not-yet-treated groups for identification using the doubly-robust method from Callaway and Sant'Anna (2021). First, in **Figure 8** we show the DiD event study estimates without covariates.::: panel-tabset### R```{r, message = FALSE, warning = FALSE, error = FALSE, collectcode=TRUE}#| fig.align: 'center'#| fig.width: 10# Make a full -5 to + 5 event study plot for no covariates # using Callaway/Sant'Anna with not-yet-treated# get the aggregate value for e = 0:5agg <- mod_no_x %>% aggte(type = "dynamic", min_e = 0, max_e = 5, bstrap = TRUE, biters = 25000)# make labels for the estimate, std error, and confidence intervalslabel1 <- paste0("Estimate~(e %in% '{0, 5}')~'='~'", scales::number(agg$overall.att, accuracy = 0.01), "'")label2 <- paste0("Std. Error = ", scales::number(agg$overall.se, 0.01), " \n", "Conf. Int = [", scales::number(agg$overall.att - 1.96*agg$overall.se, 0.01), ", ", scales::number(agg$overall.att + 1.96*agg$overall.se, 0.01), "]")# make G*T event study plot without covariatesmod_no_x %>% # aggregate into relative time (dynamic) aggte(type = "dynamic") %>% broom::tidy(conf.int = TRUE) %>% # keep just -5 to + 5 filter(event.time %>% between(-5, 5)) %>% # plot ggplot(aes(x = event.time, y = estimate)) + geom_linerange(aes(ymin = conf.low, ymax = conf.high), color = "darkred") + geom_linerange(aes(ymin = point.conf.low, ymax = point.conf.high)) + geom_point() + geom_vline(xintercept = -1, linetype = "dashed") + geom_hline(yintercept = 0, linetype = "dashed") + scale_x_continuous(breaks = -5:5) + annotate("text", x = 3, y = 11, label = label1, parse = TRUE) + annotate("text", x = 3, y = 9, label = label2) + labs(x = "Event Time", y = "Treatment Effect \n Mortality Per 100,000") + theme(axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), strip.text = element_text(size = 14), panel.border = element_rect(color = "black", fill = NA, linewidth = 1), legend.title = element_blank(), axis.title = element_text(size = 12), axis.text = element_text(size = 10))```### Stata```{stata, eval = FALSE}/**************************************************************************4. Figure 8: Event study without covariates**************************************************************************/ *read data in use "did_jel_aca_replication_data", clear *Use csdid for simple ES estimation (uniform and non-uniform CIs); use long2!! csdid crude_rate_20_64 [iw=set_wt], /// ivar(county_code) time(year) gvar(treat_year) /// long2 /// notyet /// wboot(reps(25000) rseed(20240924) wbtype(rademacher)) /// agg(event) *you do actually have to grab the uniform CIs from the posted output preserve clear matrix T = r(table) local cn : colnames T matrix llu = T[5,1..colsof(T)] matrix ulu = T[6,1..colsof(T)] matrix uci = (llu', ulu') matrix colnames uci = uci_lower uci_upper svmat uci, names(col) gen var = "" local i = 0 foreach r of local cn{ local ++i replace var = "`r'" in `i' } save "testo", replace restore *save the posted output to a dataset and merge back on the uniform CIs regsave, ci merge 1:1 var using "testo" keep if _merge==3 drop _merge *get average post coefficient, standard error, and CI qui sum coef if var=="Post_avg" local postcoef : display %03.2f r(mean) qui sum stderr if var=="Post_avg" local postse : display %03.2f r(mean) local postlci : display %03.2f `postcoef'-1.96*`postse' local postuci : display %03.2f `postcoef'+1.96*`postse' *now only keep the event-study estimates keep if substr(var,1,1)=="T" keep coef ci* uci* var *extract timing components from event aggregation notation in csdid gen pre = substr(var,2,1)=="m" gen post = substr(var,2,1)=="p" gen e = real(substr(var,3,.)) replace e = -e if pre *add omitted period local obs = _N+1 set obs `obs' replace coef = 0 in `obs' replace e = -1 in `obs' *plot only -5 to 5 keep if inrange(e,-5,5) *plot the ATT(g,t) for g=2014 twoway /// (rcap uci_lower uci_upper e, /// lcolor(red*1.2) msize(0)) /// (rcap ci_lower ci_upper e, /// lcolor(black) msize(0)) /// (scatter coef e, /// msym(O) mcolor(black) /// ), /// legend(off) /// xlabel(-5(1)5, nogrid) /// ylabel(-20(10)20) /// yline(0, lpattern(dash)) /// xline(-1, lpattern(dash)) /// xtitle("Event-Time") /// ytitle("Treatment Effect" "Mortality Per 100,000", orientation(horizontal) size(small)) /// text(18.5 3 "Estimate e ∈ {0,5}=`postcoef'", size(small)) /// text(15.5 3 "Std. Error = `postse'" "Conf. Int = [`postlci',`postuci']", size(small)) /// xsize(10) ysize(5)```:::## Figure 9: G $\times$ T Event Study With Covariates**Figure 9** presents the event study results adjusting for the covariates using the doubly-robust method.::: panel-tabset### R```{r, message = FALSE, warning = FALSE, error = FALSE, collectcode=TRUE}#| fig.align: 'center'#| fig.width: 10# Now re-estimate and plot the same GXT estimates with covariates# set.seed(20240924)mod_with_x <- att_gt( yname = "crude_rate_20_64", tname = "year", idname = "county_code", gname = "treat_year", xformla = as.formula(paste("~", paste(covs, collapse = "+"))), data = mydata, panel = TRUE, control_group = "notyettreated", bstrap = TRUE, biters = 25000, cband = TRUE, est_method = "dr", weightsname = "set_wt", # faster_mode = TRUE, base_period = "universal")# get the aggregate value for e = 0:5agg <- mod_with_x %>% aggte(type = "dynamic", min_e = 0, max_e = 5, bstrap = TRUE, biters = 25000)# make labels for the estimate, std error, and confidence intervalslabel1 <- paste0("Estimate~(e %in% '{0, 5}')~'='~'", scales::number(agg$overall.att, accuracy = 0.01), "'")label2 <- paste0("Std. Error = ", scales::number(agg$overall.se, 0.01), " \n", "Conf. Int = [", scales::number(agg$overall.att - 1.96*agg$overall.se, 0.01), ", ", scales::number(agg$overall.att + 1.96*agg$overall.se, 0.01), "]")# make event study plot for GXT with covariatesmod_with_x %>% # aggregate to event time (dynamic) aggte(type = "dynamic") %>% broom::tidy(conf.int = TRUE) %>% # filter years filter(event.time %>% between(-5, 5)) %>% # plot ggplot(aes(x = event.time, y = estimate)) + geom_linerange(aes(ymin = conf.low, ymax = conf.high), color = "darkred") + geom_linerange(aes(ymin = point.conf.low, ymax = point.conf.high)) + geom_point() + geom_vline(xintercept = -1, linetype = "dashed") + geom_hline(yintercept = 0, linetype = "dashed") + scale_x_continuous(breaks = -5:5) + annotate("text", x = 3, y = 18, label = label1, parse = TRUE) + annotate("text", x = 3, y = 15, label = label2) + labs(x = "Event Time", y = "Treatment Effect \n Mortality Per 100,000") + theme(axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), strip.text = element_text(size = 14), panel.border = element_rect(color = "black", fill = NA, linewidth = 1), legend.title = element_blank(), axis.title = element_text(size = 12), axis.text = element_text(size = 10))```### Stata```{stata, eval = FALSE}/**************************************************************************5. Figure 9: Event study with covariates**************************************************************************/ *read data in use "did_jel_aca_replication_data", clear *local with covariate names local covs perc_female perc_white perc_hispanic /// unemp_rate_pc poverty_rate median_income_k *Use csdid for simple ES estimation (uniform and non-uniform CIs); use long2!! csdid crude_rate_20_64 `covs' [iw=set_wt], /// ivar(county_code) time(year) gvar(treat_year) /// method(dripw) /// long2 /// notyet /// pscoretrim(0.995) /// wboot(reps(25000) rseed(20240924) wbtype(rademacher)) /// agg(event) *you do actually have to grab the uniform CIs from the posted output preserve clear matrix T = r(table) local cn : colnames T matrix llu = T[5,1..colsof(T)] matrix ulu = T[6,1..colsof(T)] matrix uci = (llu', ulu') matrix colnames uci = uci_lower uci_upper svmat uci, names(col) gen var = "" local i = 0 foreach r of local cn{ local ++i replace var = "`r'" in `i' } save "testo", replace restore *save the posted output to a dataset and merge back on the uniform CIs regsave, ci merge 1:1 var using "testo" keep if _merge==3 drop _merge *get average post coefficient, standard error, and CI qui sum coef if var=="Post_avg" local postcoef : display %03.2f r(mean) qui sum stderr if var=="Post_avg" local postse : display %03.2f r(mean) local postlci : display %03.2f `postcoef'-1.96*`postse' local postuci : display %03.2f `postcoef'+1.96*`postse' *now only keep the event-study estimates keep if substr(var,1,1)=="T" keep coef ci* uci* var *extract timing components from event aggregation notation in csdid gen pre = substr(var,2,1)=="m" gen post = substr(var,2,1)=="p" gen e = real(substr(var,3,.)) replace e = -e if pre *add omitted period local obs = _N+1 set obs `obs' replace coef = 0 in `obs' replace e = -1 in `obs' *plot only -5 to 5 keep if inrange(e,-5,5) *plot the ATT(g,t) for g=2014 twoway /// (rcap uci_lower uci_upper e, /// lcolor(red*1.2) msize(0)) /// (rcap ci_lower ci_upper e, /// lcolor(black) msize(0)) /// (scatter coef e, /// msym(O) mcolor(black) /// ), /// legend(off) /// xlabel(-5(1)5, nogrid) /// ylabel(-20(10)20) /// yline(0, lpattern(dash)) /// xline(-1, lpattern(dash)) /// xtitle("Event-Time") /// ytitle("Treatment Effect" "Mortality Per 100,000", orientation(horizontal) size(small)) /// text(19.4 3 "Estimate e ∈ {0,5}=`postcoef'", size(small)) /// text(15.5 3 "Std. Error = `postse'" "Conf. Int = [`postlci',`postuci']", size(small)) /// xsize(10) ysize(5)```:::```{stata, include = FALSE}*delete all the extra stata files created *erase "../data/reg_data_2013.dta"*erase "../data/reg_data_change.dta"*erase "../data/csdid_input.dta"*erase "../data/county_mortality_data_cl.dta"*erase "../data/data_fig_4.dta"*erase "../data/figure8.dta"*erase "../data/precollapse.dta"*erase "../data/fig6_combined.gph"*erase "../data/grp2014.gph"*erase "../data/grp2015.gph"*erase "../data/grp2016.gph"*erase "../data/grp2019.gph"*erase "../data/fig4_dripw.gph"*erase "../data/fig4_stdipw.gph"*erase "../data/fig4_reg.gph"``````{r, include = FALSE}#delete all the extra R files we created # unlink(xlsx_path)# unlink(xlsx_path2)# unlink(xlsx_path3)# unlink(xlsx_path4)# unlink(xlsx_path5)# unlink(xlsx_path6)# unlink(xlsx_path7)#unlink(fig_path1)#unlink(fig_path2)#unlink(fig_path3)#unlink(fig_path4)#unlink(fig_path5)#unlink(fig_path6)#unlink(fig_path7)#unlink(fig_path8)#unlink(fig_path9)```