Code Cell 1
# Load packages for paths, data cleaning, modeling, and tidy model output.
if (!require(pacman)) {
install.packages("pacman")
}
library(pacman)
p_load(tidyverse, here, readxl, fixest, broom)
library(modelsummary)
# Save generated figures to the top-level plots folder.
dir.create("plots", showWarnings = FALSE)
# Increase plot and table text size for readability.
plot_base_size <- 15
table_title_cex <- 1.4
table_header_cex <- 1.05
table_body_cex <- 1.02
table_note_cex <- 0.92
if (requireNamespace("IRdisplay", quietly = TRUE)) {
IRdisplay::display_html("<style>.dataframe, table { font-family: 'Times New Roman', Times, serif; font-size: 16px; } .dataframe th, .dataframe td, table th, table td { padding: 6px 9px; }</style>")
}
index_group_outcome <- function(data, outcome_col, group_col = "treatment_status", year_col = "year", baseline_n = 2) {
baseline_df <- data %>%
transmute(
baseline_group = .data[[group_col]],
baseline_year = .data[[year_col]],
baseline_value = .data[[outcome_col]]
) %>%
filter(!is.na(baseline_value), !is.na(baseline_group), !is.na(baseline_year)) %>%
group_by(baseline_group) %>%
arrange(baseline_year, .by_group = TRUE) %>%
slice_head(n = baseline_n) %>%
summarise(
baseline_outcome = mean(baseline_value, na.rm = TRUE),
baseline_years = paste(range(baseline_year, na.rm = TRUE), collapse = "-"),
.groups = "drop"
) %>%
rename(!!group_col := baseline_group)
data %>%
left_join(baseline_df, by = group_col) %>%
mutate(outcome_change_from_baseline = .data[[outcome_col]] - baseline_outcome)
}
# Keep treatment-group colors consistent across all comparison plots.
treatment_colors <- c(
"Upward Treated" = "#0072B2",
"Control" = "#7F7F7F",
"Low-Cert Control" = "#009E73"
)
star_label <- function(p_value) {
case_when(
is.na(p_value) ~ "",
p_value < 0.01 ~ "***",
p_value < 0.05 ~ "**",
p_value < 0.10 ~ "*",
TRUE ~ ""
)
}
save_regression_table_image <- function(models, terms, term_labels, file, title, r2_digits = 3) {
model_names <- names(models)
body_rows <- list()
for (i in seq_along(terms)) {
coef_values <- map_chr(models, function(model) {
term_row <- tidy(model) %>% filter(term == terms[i])
if (nrow(term_row) == 0) return("")
paste0(sprintf("%.3f", term_row$estimate), star_label(term_row$p.value))
})
se_values <- map_chr(models, function(model) {
term_row <- tidy(model) %>% filter(term == terms[i])
if (nrow(term_row) == 0) return("")
paste0("(", sprintf("%.3f", term_row$std.error), ")")
})
body_rows <- append(body_rows, list(c(term_labels[i], coef_values)))
body_rows <- append(body_rows, list(c("", se_values)))
}
body_rows <- append(body_rows, list(c("Observations", map_chr(models, ~ format(nobs(.x), big.mark = ",")))))
body_rows <- append(body_rows, list(c("Adjusted R²", map_chr(models, ~ sprintf(paste0("%.", r2_digits, "f"), glance(.x)$adj.r.squared)))))
body_matrix <- do.call(rbind, body_rows)
n_body_rows <- nrow(body_matrix)
image_height <- max(1200, 420 + 95 * n_body_rows)
png(file, width = 2200, height = image_height, res = 220)
on.exit(dev.off(), add = TRUE)
par(mar = c(0, 0, 0, 0), family = "Times New Roman")
plot.new()
plot.window(xlim = c(0, 1), ylim = c(0, 1))
left <- 0.04
right <- 0.96
top <- 0.93
bottom <- 0.17
first_col_width <- 0.31
model_col_width <- (right - left - first_col_width) / length(models)
x_edges <- c(left, left + first_col_width, left + first_col_width + model_col_width * seq_len(length(models)))
row_count <- n_body_rows + 2
y_edges <- seq(top, bottom, length.out = row_count + 1)
rect(left, y_edges[row_count + 1], right, y_edges[1], border = "black", lwd = 2)
for (x in x_edges[-c(1, length(x_edges))]) segments(x, y_edges[2], x, y_edges[row_count + 1], lwd = 1.2)
for (y in y_edges) segments(left, y, right, y, lwd = 1.2)
text((left + right) / 2, mean(y_edges[1:2]), title, font = 2, cex = table_title_cex)
text((x_edges[-1] + x_edges[-length(x_edges)]) / 2, mean(y_edges[2:3]), c("", model_names), font = 2, cex = table_header_cex)
for (row_i in seq_len(n_body_rows)) {
y <- mean(y_edges[(row_i + 2):(row_i + 3)])
text(x_edges[1] + 0.012, y, body_matrix[row_i, 1], adj = 0, cex = table_body_cex)
for (col_i in seq_along(models)) {
text(mean(x_edges[(col_i + 1):(col_i + 2)]), y, body_matrix[row_i, col_i + 1], cex = table_body_cex)
}
}
text(left, 0.10, "* p < 0.10, ** p < 0.05, *** p < 0.01. Standard errors clustered by state in parentheses.", adj = 0, cex = table_note_cex)
invisible(file)
}Loading required package: pacman `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing backend. Learn more at: https://vincentarelbundock.github.io/tinytable/ Revert to `kableExtra` for one session: options(modelsummary_factory_default = 'kableExtra') options(modelsummary_factory_latex = 'kableExtra') options(modelsummary_factory_html = 'kableExtra') Silence this message forever: config_modelsummary(startup_message = FALSE)
Data Loading
Code Cell 2
# Load all data files from the top-level project folder.
policy_raw <- readRDS("policy_raw.rds")
snap_part <- read_excel("SNAP_P_Rate.xlsx") %>%
rename(participation = participation_rate)
seda_raw <- read_csv("seda_state_annual_cs_6.0.csv")Warning message in gzfile(file, "rb"): “cannot open compressed file 'policy_raw.rds', probable reason 'No such file or directory'”
Error in gzfile(file, "rb"): cannot open the connection
Traceback:
1. gzfile(file, "rb")
2. .handleSimpleError(function (cnd)
. {
. watcher$capture_plot_and_output()
. cnd <- sanitize_call(cnd)
. watcher$push(cnd)
. switch(on_error, continue = invokeRestart("eval_continue"),
. stop = invokeRestart("eval_stop"), error = NULL)
. }, "cannot open the connection", base::quote(gzfile(file, "rb")))Certification length data
Treatment is defined as the first year in which average certification length exceeded or fell below baseline by more than four months, reflecting sustained administrative regime change.
Code Cell 3
# Keep the certification-length measure and standardize the year variable name.
# Treatment direction is calculated from year-to-year Delta_Cert using a 3-month threshold.
certavg_raw <- policy_raw %>%
transmute(
stateabb,
year = as.integer(Year),
certavg = `Average of certearnavg`,
lag_cert = Lag_Cert,
delta_cert = Delta_Cert,
direction_raw = str_to_lower(Direction),
direction = case_when(
is.na(Delta_Cert) ~ "none",
Delta_Cert >= 3 ~ "up",
Delta_Cert <= -3 ~ "down",
TRUE ~ "none"
)
)
# Optional audit: the raw Direction label may use a different threshold than this analysis.
direction_audit <- certavg_raw %>%
count(direction_raw, direction)
# Define treatment using calculated policy-change direction.
# cert_change remains a descriptive comparison to each state's 2010 baseline.
# direction_raw is retained only as the original label from the policy file.
cert_trt_df <- certavg_raw %>%
arrange(stateabb, year) %>%
group_by(stateabb) %>%
mutate(
cert_baseline_2010 = certavg[year == 2010][1],
cert_change = certavg - cert_baseline_2010,
trt = case_when(
direction == "up" ~ "trt_up",
direction == "down" ~ "trt_down",
TRUE ~ "0"
),
trt_up = as.integer(trt == "trt_up"),
trt_down = as.integer(trt == "trt_down"),
first_up_year = if (any(trt_up == 1, na.rm = TRUE)) {
min(year[trt_up == 1], na.rm = TRUE)
} else {
NA_integer_
},
first_down_year = if (any(trt_down == 1, na.rm = TRUE)) {
min(year[trt_down == 1], na.rm = TRUE)
} else {
NA_integer_
},
ever_up = as.integer(any(trt_up == 1, na.rm = TRUE)),
ever_down = as.integer(any(trt_down == 1, na.rm = TRUE)),
treated = as.integer(ever_up == 1 & ever_down == 0),
control = as.integer(ever_up == 0 & ever_down == 0),
treated_post = as.integer(treated == 1 & !is.na(first_up_year) & year >= first_up_year),
avg_certavg_state = mean(certavg, na.rm = TRUE),
never_up_low = as.integer(control == 1 & avg_certavg_state < 7.5)
) %>%
ungroup() %>%
select(-avg_certavg_state)Code Cell 4
# Plot certification-length trends after dropping states with any downward treatment.
cert_trt_plot_df <- cert_trt_df %>%
filter(is.na(ever_down) | ever_down == 0)
upward_treated_cert_trends <- cert_trt_df %>%
filter((is.na(ever_down) | ever_down == 0), treated == 1)
plot_cert_upward_states <- ggplot(
upward_treated_cert_trends,
aes(x = year, y = certavg, group = stateabb, color = stateabb)
) +
geom_line(alpha = 0.75, linewidth = 0.8) +
geom_point(alpha = 0.75, size = 1.5) +
labs(
title = "Certification Length Trends Among Upward-Treated States",
x = "Year",
y = "Average Certification Length",
color = "State"
) +
scale_x_continuous(
breaks = seq(min(cert_trt_plot_df$year, na.rm = TRUE),
max(cert_trt_plot_df$year, na.rm = TRUE),
by = 1)
) +
theme_minimal(base_size = plot_base_size) +
theme(
legend.position = "right",
panel.grid.minor = element_blank()
)
print(plot_cert_upward_states)
ggsave("plots/certification_length_upward_treated_states.png", plot_cert_upward_states, width = 9, height = 6, dpi = 300)
avg_cert_treated_control <- cert_trt_plot_df %>%
filter(treated == 1 | control == 1) %>%
mutate(treatment_status = factor(
treated,
levels = c(1, 0),
labels = c("Upward Treated", "Control")
)) %>%
group_by(year, treatment_status) %>%
summarise(
avg_certavg = mean(certavg, na.rm = TRUE),
n_states = n_distinct(stateabb),
.groups = "drop"
)
avg_cert_low_control <- cert_trt_plot_df %>%
filter(control == 1 & never_up_low == 1) %>%
group_by(year) %>%
summarise(
avg_certavg = mean(certavg, na.rm = TRUE),
n_states = n_distinct(stateabb),
.groups = "drop"
) %>%
mutate(treatment_status = "Low-Cert Control")
avg_cert_trends <- bind_rows(avg_cert_treated_control, avg_cert_low_control) %>%
mutate(treatment_status = factor(
treatment_status,
levels = c("Upward Treated", "Control", "Low-Cert Control")
))
plot_avg_cert_treated_control <- ggplot(
avg_cert_trends,
aes(x = year, y = avg_certavg, color = treatment_status)
) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
labs(
title = "Average Certification Length:",
subtitle = "Upward-Treated, Control, and Low-Cert Control States",
x = "Year",
y = "Average Certification Length",
color = NULL
) +
scale_color_manual(values = treatment_colors, drop = FALSE) +
scale_x_continuous(
breaks = seq(min(avg_cert_trends$year, na.rm = TRUE),
max(avg_cert_trends$year, na.rm = TRUE),
by = 1)
) +
theme_minimal(base_size = plot_base_size) +
theme(
legend.position = "bottom",
panel.grid.minor = element_blank()
)
print(plot_avg_cert_treated_control)
ggsave("plots/certification_length_treated_control_low_control.png", plot_avg_cert_treated_control, width = 9, height = 6, dpi = 300)SNAP Participation
Participation pre-treatment trend analysis
Code Cell 5
# Create panel merging certification treatment info with participation for regression
cert_part_df <- left_join(cert_trt_df, snap_part, by = c("stateabb", "year")) %>%
select(stateabb, year, participation, certavg, cert_change, trt, trt_up, trt_down,
ever_up, treated, control, never_up_low, first_up_year, treated_post, ever_down)Code Cell 6
# Participation trends: all upward-treated states vs. all controls.
treatment_lookup <- cert_trt_df %>%
select(stateabb, treated, first_up_year, control, never_up_low, ever_down) %>%
distinct()
part_plot_all <- snap_part %>%
left_join(treatment_lookup, by = "stateabb") %>%
filter(treated == 1 | control == 1) %>%
mutate(treatment_status = factor(
treated,
levels = c(1, 0),
labels = c("Upward Treated", "Control")
))
avg_treatment_year_part_all <- part_plot_all %>%
filter(treated == 1) %>%
distinct(stateabb, first_up_year) %>%
summarise(avg_year = mean(first_up_year, na.rm = TRUE)) %>%
pull(avg_year)
avg_part_all <- part_plot_all %>%
group_by(treatment_status, year) %>%
summarise(
avg_participation = mean(participation, na.rm = TRUE),
n_states = n_distinct(stateabb),
.groups = "drop"
)
plot_part_all_controls <- ggplot(avg_part_all, aes(year, avg_participation, color = treatment_status)) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
geom_vline(xintercept = avg_treatment_year_part_all, linetype = "dashed", color = "darkgrey") +
annotate(
"text",
x = avg_treatment_year_part_all + 0.3,
y = min(avg_part_all$avg_participation, na.rm = TRUE) +
(max(avg_part_all$avg_participation, na.rm = TRUE) -
min(avg_part_all$avg_participation, na.rm = TRUE)) / 3,
label = paste0("Avg. treatment year"),
hjust = 0, size = 3.5
) +
labs(
title = "Average SNAP Participation: Upward Treated vs. All Controls",
x = "Year",
y = "Average Participation Rate (%)",
color = NULL
) +
scale_color_manual(values = treatment_colors, drop = FALSE) +
scale_x_continuous(breaks = seq(min(avg_part_all$year, na.rm = TRUE), max(avg_part_all$year, na.rm = TRUE), by = 2)) +
theme_minimal(base_size = plot_base_size) +
theme(
legend.position = "bottom",
panel.grid.minor = element_blank()
)
print(plot_part_all_controls)
ggsave("plots/participation_treated_vs_all_controls.png", plot_part_all_controls, width = 9, height = 6, dpi = 300)Participation pre-treatment trends with low-certification controls
Code Cell 7
# Participation trends: all upward-treated states vs. low-certification controls.
part_plot_low_control <- snap_part %>%
left_join(treatment_lookup, by = "stateabb") %>%
filter(treated == 1 | (control == 1 & never_up_low == 1)) %>%
mutate(treatment_status = case_when(
treated == 1 ~ "Upward Treated",
control == 1 & never_up_low == 1 ~ "Low-Cert Control",
TRUE ~ NA_character_
))
avg_treatment_year_part_low <- part_plot_low_control %>%
filter(treated == 1) %>%
distinct(stateabb, first_up_year) %>%
summarise(avg_year = mean(first_up_year, na.rm = TRUE)) %>%
pull(avg_year)
avg_part_low_control <- part_plot_low_control %>%
filter(!is.na(treatment_status)) %>%
group_by(treatment_status, year) %>%
summarise(
avg_participation = mean(participation, na.rm = TRUE),
n_states = n_distinct(stateabb),
.groups = "drop"
)
plot_part_low_controls <- ggplot(avg_part_low_control, aes(year, avg_participation, color = treatment_status)) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
geom_vline(xintercept = avg_treatment_year_part_low, linetype = "dashed", color = "darkgrey") +
annotate(
"text",
x = avg_treatment_year_part_low + 0.3,
y = min(avg_part_low_control$avg_participation, na.rm = TRUE) +
(max(avg_part_low_control$avg_participation, na.rm = TRUE) -
min(avg_part_low_control$avg_participation, na.rm = TRUE)) / 3,
label = paste0("Avg. treatment year"),
hjust = 0, size = 3.5
) +
labs(
title = "Average SNAP Participation: Upward Treated vs. Low-Cert Controls",
x = "Year",
y = "Average Participation Rate (%)",
color = NULL
) +
scale_color_manual(values = treatment_colors, drop = FALSE) +
scale_x_continuous(breaks = seq(min(avg_part_low_control$year, na.rm = TRUE), max(avg_part_low_control$year, na.rm = TRUE), by = 2)) +
theme_minimal(base_size = plot_base_size) +
theme(
legend.position = "bottom",
panel.grid.minor = element_blank()
)
print(plot_part_low_controls)
ggsave("plots/participation_treated_vs_low_controls.png", plot_part_low_controls, width = 9, height = 6, dpi = 300)Code Cell 8
# Indexed pre-treatment participation trends using each group's first two available years as baseline.
indexed_avg_part_pretrend <- bind_rows(
avg_part_all,
avg_part_low_control %>% filter(treatment_status == "Low-Cert Control")
) %>%
mutate(treatment_status = factor(
as.character(treatment_status),
levels = c("Upward Treated", "Control", "Low-Cert Control")
)) %>%
index_group_outcome("avg_participation")
plot_part_indexed_pretrend <- ggplot(
indexed_avg_part_pretrend,
aes(year, outcome_change_from_baseline, color = treatment_status)
) +
geom_hline(yintercept = 0, color = "grey70", linewidth = 0.5) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
geom_vline(xintercept = avg_treatment_year_part_all, linetype = "dashed", color = "darkgrey") +
labs(
title = "Change in SNAP Participation from Baseline Average",
subtitle = "Baseline is each group's average over its first two available years",
x = "Year",
y = "Change in Average Participation Rate (percentage points)",
color = NULL
) +
scale_color_manual(values = treatment_colors, drop = FALSE) +
scale_x_continuous(breaks = seq(min(indexed_avg_part_pretrend$year, na.rm = TRUE), max(indexed_avg_part_pretrend$year, na.rm = TRUE), by = 2)) +
theme_minimal(base_size = plot_base_size) +
theme(
legend.position = "bottom",
panel.grid.minor = element_blank()
)
print(plot_part_indexed_pretrend)
ggsave("plots/participation_indexed_pretrend.png", plot_part_indexed_pretrend, width = 9, height = 6, dpi = 300)
Fixed Effects Regression: certification length and SNAP participation
Code Cell 9
# Estimate the within-state relationship between certification length and participation,
# absorbing state and year fixed effects and clustering by state.
# Downward-treatment states are included in this specification.
p_cert_length_model <- feols(
participation ~ certavg | stateabb + year,
data = cert_part_df,
cluster = "stateabb"
)
tidy(p_cert_length_model)| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| <chr> | <dbl> | <dbl> | <dbl> | <dbl> |
| certavg | 0.7291608 | 0.3706508 | 1.967245 | 0.05471571 |
DiD FE model with certification treatment as the key independent variable:
Code Cell 10
# Estimate participation DiD with state and year fixed effects.
# Ever-down states remain in the participation sample, but treated_post only turns on
# for states that are ever-up and never-down under the current treatment definition.
participation_did_model <- feols(
participation ~ treated_post | stateabb + year,
data = cert_part_df,
cluster = "stateabb"
)
tidy(participation_did_model)| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| <chr> | <dbl> | <dbl> | <dbl> | <dbl> |
| treated_post | 2.195018 | 2.838332 | 0.7733478 | 0.4429575 |
Code Cell 11
# Participation regression table with fixed effects and state-clustered standard errors.
participation_models <- list(
"(1) Certification Length" = p_cert_length_model,
"(2) Upward Treatment DiD" = participation_did_model
)
modelsummary(
participation_models,
coef_map = c(
"certavg" = "Certification length",
"treated_post" = "Post upward treatment"
),
statistic = "({std.error})",
stars = c("***" = 0.01, "**" = 0.05, "*" = 0.10),
gof_map = data.frame(
raw = c("nobs", "adj.r.squared"),
clean = c("Observations", "Adjusted R²"),
fmt = c(0, 3)
),
title = "Regression Estimates for SNAP Participation",
notes = "State and year fixed effects included. Standard errors clustered by state in parentheses. * p < 0.10, ** p < 0.05, *** p < 0.01."
)
save_regression_table_image(
participation_models,
terms = c("certavg", "treated_post"),
term_labels = c("Certification length", "Post upward treatment"),
file = "plots/participation_regression_table.png",
title = "Regression Estimates for SNAP Participation",
r2_digits = 3
)+-----------------------+--------------------------+--------------------------+ | | (1) Certification Length | (2) Upward Treatment DiD | +=======================+==========================+==========================+ | Certification length | 0.729* | | +-----------------------+--------------------------+--------------------------+ | | (0.371) | | +-----------------------+--------------------------+--------------------------+ | Post upward treatment | | 2.195 | +-----------------------+--------------------------+--------------------------+ | | | (2.838) | +-----------------------+--------------------------+--------------------------+ | Observations | 510 | 510 | +-----------------------+--------------------------+--------------------------+ | Adjusted R² | 0.770 | 0.765 | +=======================+==========================+==========================+ | * p < 0.1, ** p < 0.05, *** p < 0.01 | +=======================+==========================+==========================+ | State and year fixed effects included. Standard errors clustered by state | | in parentheses. * p < 0.10, ** p < 0.05, *** p < 0.01. | +=======================+==========================+==========================+ Table: Regression Estimates for SNAP Participation
SEDA Data
Code Cell 12
# Load SEDA scores and keep economically disadvantaged student averages
seda_ecd_scores <- seda_raw %>%
filter(gap == 0, subcat == "ecd", subgroup == "ecd") %>%
select(stateabb, year, cs_mn_avg_eb)
# Fix: Specifically select the ECD achievement gap (economically disadvantaged vs. others)
seda_gap_scores <- seda_raw %>%
filter(gap == 1, subcat == "ecd", subgroup == "neg") %>%
select(stateabb, year, gap_score = cs_mn_avg_eb)
# Standardize state abbreviations and year to match the certification data.
cert_trt_df <- cert_trt_df %>%
mutate(
stateabb = as.character(stateabb),
year = as.integer(year)
)
seda_ecd_scores <- seda_ecd_scores %>%
mutate(
stateabb = as.character(stateabb),
year = as.integer(year)
)
seda_gap_scores <- seda_gap_scores %>%
mutate(
stateabb = as.character(stateabb),
year = as.integer(year)
)Combine SEDA and Cert data:
Code Cell 13
# Create a stable mapping of states to their treatment status.
state_status_map <- cert_trt_df %>%
group_by(stateabb) %>%
summarise(
ever_up = first(ever_up),
treated = first(treated),
first_up_year = first(first_up_year),
ever_down = first(ever_down),
control = first(control),
never_up_low = first(never_up_low),
.groups = "drop"
)
# Keep year-varying treatment variables for SEDA regressions.
cert_seda_treatment_vars <- cert_trt_df %>%
select(stateabb, year, certavg, cert_change, trt, trt_up, trt_down)
# Combine ECD scores and Gap scores (already filtered in cell 9HElYXN3Fkj8)
seda_combined <- full_join(seda_ecd_scores, seda_gap_scores, by = c("stateabb", "year"))
# Construct the all-states SEDA dataframe, including downward-treatment states.
cert_seda_df_all_states <- seda_combined %>%
left_join(state_status_map, by = "stateabb") %>%
left_join(cert_seda_treatment_vars, by = c("stateabb", "year")) %>%
mutate(
treated_post = case_when(
treated == 1 & !is.na(first_up_year) & year >= first_up_year ~ 1L,
treated == 0 ~ 0L,
TRUE ~ 0L
)
) %>%
mutate(
treated_post = as.integer(replace_na(treated_post, 0L)),
trt_up = as.integer(replace_na(trt_up, 0L)),
trt_down = as.integer(replace_na(trt_down, 0L)),
trt = replace_na(trt, "0")
)
# Keep the original cleaner analysis dataframe for regressions that exclude downward-treatment states.
cert_seda_df_extended <- cert_seda_df_all_states %>%
filter(is.na(ever_down) | ever_down == 0)
# Verify that we now have 1 row per state-year (no duplicates)
duplicate_check <- cert_seda_df_extended %>%
group_by(stateabb, year) %>%
summarise(count = n(), .groups = 'drop') %>%
filter(count > 1)
cat("Total rows in cert_seda_df_extended:", nrow(cert_seda_df_extended), "\n")
cat("Number of state-year duplicates:", nrow(duplicate_check), "\n")Total rows in cert_seda_df_extended: 472 Number of state-year duplicates: 0
Code Cell 14
# Identify states that ever experience an upward certification-length treatment.
treated_up_states <- cert_seda_df_extended %>%
group_by(stateabb) %>%
summarise(treated = first(treated), .groups = "drop") %>%
filter(treated == 1) %>%
pull(stateabb)
# Average treated-state scores by calendar year.
up_avg_score_realt <- cert_seda_df_extended %>%
filter(stateabb %in% treated_up_states) %>%
group_by(year) %>%
summarise(
avg_score = mean(cs_mn_avg_eb, na.rm = TRUE),
.groups = "drop"
)
# Average SEDA scores for never-upward-treated states by year.
never_up_avg_score <- cert_seda_df_extended %>%
filter(control == 1) %>%
group_by(year) %>%
summarise(
avg_score = mean(cs_mn_avg_eb, na.rm = TRUE),
n_states = n_distinct(stateabb),
.groups = "drop"
)
# Average scores for never-upward states with relatively low certification lengths.
never_up_low_avg_score <- cert_seda_df_extended %>%
filter(never_up_low == 1) %>%
group_by(year) %>%
summarise(
avg_score = mean(cs_mn_avg_eb, na.rm = TRUE),
n_states = n_distinct(stateabb),
.groups = "drop"
)Distribution of first treated year
Code Cell 15
# Summarize the distribution of first upward-treatment years among treated states.
ever_up_states <- cert_seda_df_extended %>%
group_by(stateabb) %>%
summarise(
ever_up = first(ever_up),
treated = first(treated),
first_up_year = first(first_up_year),
.groups = "drop"
) %>%
filter(treated == 1)
# Plot when states first cross the upward-treatment threshold.
first_up_year_plot <- ggplot(ever_up_states, aes(x = first_up_year)) +
geom_histogram(binwidth = 1, boundary = 2010.5, color = "white") +
scale_x_continuous(
breaks = seq(
from = min(ever_up_states$first_up_year, na.rm = TRUE),
to = max(ever_up_states$first_up_year, na.rm = TRUE),
by = 1
)
) +
labs(
title = "First Treated Year for Upward Treatment States",
x = "First Treated Year",
y = "Number of States"
) +
theme_minimal(base_size = plot_base_size)
print(first_up_year_plot)
ggsave("plots/first_treated_year_upward_states.png", first_up_year_plot, width = 8, height = 5, dpi = 300)Pre-treatment education score trends in treated and control states
Code Cell 16
# SEDA ECD score trends: all upward-treated states vs. all controls.
ecd_plot_all <- cert_seda_df_extended %>%
filter(treated == 1 | control == 1) %>%
mutate(treatment_status = factor(
treated,
levels = c(1, 0),
labels = c("Upward Treated", "Control")
))
avg_treatment_year_ecd_all <- ecd_plot_all %>%
filter(treated == 1) %>%
distinct(stateabb, first_up_year) %>%
summarise(avg_year = mean(first_up_year, na.rm = TRUE)) %>%
pull(avg_year)
ecd_avg_all <- ecd_plot_all %>%
group_by(treatment_status, year) %>%
summarise(avg_score = mean(cs_mn_avg_eb, na.rm = TRUE), .groups = "drop")
plot_ecd_all_controls <- ggplot(ecd_avg_all, aes(x = year, y = avg_score, color = treatment_status)) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
geom_vline(xintercept = avg_treatment_year_ecd_all, linetype = "dashed", color = "darkgrey") +
annotate(
"text", x = avg_treatment_year_ecd_all + 0.1,
y = min(ecd_avg_all$avg_score, na.rm = TRUE) +
(max(ecd_avg_all$avg_score, na.rm = TRUE) -
min(ecd_avg_all$avg_score, na.rm = TRUE)) / 3,
label = paste0("Avg. treatment year: ", round(avg_treatment_year_ecd_all, 1)),
hjust = 0, size = 3.5
) +
labs(
title = "Average SEDA ECD Scores: Upward Treated vs. All Controls",
x = "Year",
y = "Average Achievement Score",
color = NULL
) +
scale_color_manual(values = treatment_colors, drop = FALSE) +
scale_x_continuous(breaks = seq(min(ecd_avg_all$year, na.rm = TRUE), max(ecd_avg_all$year, na.rm = TRUE), by = 1)) +
theme_minimal(base_size = plot_base_size) +
theme(
legend.position = "bottom",
panel.grid.minor = element_blank()
)
print(plot_ecd_all_controls)
ggsave("plots/education_scores_treated_vs_all_controls.png", plot_ecd_all_controls, width = 9, height = 6, dpi = 300)Education score pre-treatment trends with low-certification controls
Code Cell 17
# SEDA ECD score trends: all upward-treated states vs. low-certification controls.
ecd_plot_low_control <- cert_seda_df_extended %>%
filter(treated == 1 | (control == 1 & never_up_low == 1)) %>%
mutate(treatment_status = case_when(
treated == 1 ~ "Upward Treated",
control == 1 & never_up_low == 1 ~ "Low-Cert Control",
TRUE ~ NA_character_
))
avg_treatment_year_ecd_low <- ecd_plot_low_control %>%
filter(treated == 1) %>%
distinct(stateabb, first_up_year) %>%
summarise(avg_year = mean(first_up_year, na.rm = TRUE)) %>%
pull(avg_year)
ecd_avg_low_control <- ecd_plot_low_control %>%
filter(!is.na(treatment_status)) %>%
group_by(treatment_status, year) %>%
summarise(avg_score = mean(cs_mn_avg_eb, na.rm = TRUE), .groups = "drop")
plot_ecd_low_controls <- ggplot(ecd_avg_low_control, aes(x = year, y = avg_score, color = treatment_status)) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
geom_vline(xintercept = avg_treatment_year_ecd_low, linetype = "dashed", color = "darkgrey") +
annotate(
"text", x = avg_treatment_year_ecd_low + 0.1,
y = min(ecd_avg_low_control$avg_score, na.rm = TRUE) +
(max(ecd_avg_low_control$avg_score, na.rm = TRUE) -
min(ecd_avg_low_control$avg_score, na.rm = TRUE)) / 3,
label = paste0("Avg. treatment year: ", round(avg_treatment_year_ecd_low, 1)),
hjust = 0, size = 3.5
) +
labs(
title = "Average SEDA ECD Scores: Upward Treated vs. Low-Cert Controls",
x = "Year",
y = "Average Achievement Score",
color = NULL
) +
scale_color_manual(values = treatment_colors, drop = FALSE) +
scale_x_continuous(breaks = seq(min(ecd_avg_low_control$year, na.rm = TRUE), max(ecd_avg_low_control$year, na.rm = TRUE), by = 1)) +
theme_minimal(base_size = plot_base_size) +
theme(
legend.position = "bottom",
panel.grid.minor = element_blank()
)
print(plot_ecd_low_controls)
ggsave("plots/education_scores_treated_vs_low_controls.png", plot_ecd_low_controls, width = 9, height = 6, dpi = 300)Code Cell 18
# Indexed ECD score pre-treatment trends using each group's first two available years as baseline.
indexed_ecd_pretrend <- bind_rows(
ecd_avg_all,
ecd_avg_low_control %>% filter(treatment_status == "Low-Cert Control")
) %>%
mutate(treatment_status = factor(
as.character(treatment_status),
levels = c("Upward Treated", "Control", "Low-Cert Control")
)) %>%
index_group_outcome("avg_score")
plot_ecd_indexed_pretrend <- ggplot(
indexed_ecd_pretrend,
aes(year, outcome_change_from_baseline, color = treatment_status)
) +
geom_hline(yintercept = 0, color = "grey70", linewidth = 0.5) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
geom_vline(xintercept = avg_treatment_year_ecd_all, linetype = "dashed", color = "darkgrey") +
labs(
title = "Change in SEDA ECD Scores from Baseline Average",
subtitle = "Baseline is each group's average over its first two available years",
x = "Year",
y = "Change in Average Score",
color = NULL
) +
scale_color_manual(values = treatment_colors, drop = FALSE) +
scale_x_continuous(breaks = seq(min(indexed_ecd_pretrend$year, na.rm = TRUE), max(indexed_ecd_pretrend$year, na.rm = TRUE), by = 1)) +
theme_minimal(base_size = plot_base_size) +
theme(
legend.position = "bottom",
panel.grid.minor = element_blank()
)
print(plot_ecd_indexed_pretrend)
ggsave("plots/education_scores_indexed_pretrend.png", plot_ecd_indexed_pretrend, width = 9, height = 6, dpi = 300)
Differences in differences regression of education score on treatment
Code Cell 19
# Estimate DiD with state and year fixed effects, excluding downward-treatment states.
seda_fe_no_down_df <- cert_seda_df_extended %>%
filter(year >= 2010) %>%
select(stateabb, year, cs_mn_avg_eb, treated_post, ever_down) %>%
filter(!is.na(cs_mn_avg_eb))
fe_did_model <- feols(
cs_mn_avg_eb ~ treated_post | stateabb + year,
data = seda_fe_no_down_df,
cluster = "stateabb"
)
tidy(fe_did_model)| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| <chr> | <dbl> | <dbl> | <dbl> | <dbl> |
| treated_post | 0.04219966 | 0.04012438 | 1.051721 | 0.2984213 |
Achievement Gap analysis
Achievement gap pre-treatment trends
Code Cell 20
# Achievement gap trends: all upward-treated states vs. all controls.
gap_plot_all <- cert_seda_df_extended %>%
filter(treated == 1 | control == 1) %>%
mutate(treatment_status = factor(
treated,
levels = c(1, 0),
labels = c("Upward Treated", "Control")
))
avg_treatment_year_gap_all <- gap_plot_all %>%
filter(treated == 1) %>%
distinct(stateabb, first_up_year) %>%
summarise(avg_year = mean(first_up_year, na.rm = TRUE)) %>%
pull(avg_year)
gap_avg_all <- gap_plot_all %>%
group_by(treatment_status, year) %>%
summarise(avg_gap = mean(gap_score, na.rm = TRUE), .groups = "drop")
plot_gap_all_controls <- ggplot(gap_avg_all, aes(x = year, y = avg_gap, color = treatment_status)) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
geom_vline(xintercept = avg_treatment_year_gap_all, linetype = "dashed", color = "darkgrey") +
annotate(
"text", x = avg_treatment_year_gap_all + 0.1,
y = max(gap_avg_all$avg_gap, na.rm = TRUE),
label = paste0("Avg. treatment year: ", round(avg_treatment_year_gap_all, 1)),
hjust = 0, size = 3.5
) +
labs(
title = "Average ECD Achievement Gap: Upward Treated vs. All Controls",
x = "Year",
y = "Average Gap Score",
color = NULL
) +
scale_color_manual(values = treatment_colors, drop = FALSE) +
scale_x_continuous(breaks = seq(min(gap_avg_all$year, na.rm = TRUE), max(gap_avg_all$year, na.rm = TRUE), by = 1), labels = scales::number_format(accuracy = 1)) +
theme_minimal(base_size = plot_base_size) +
theme(
legend.position = "bottom",
panel.grid.minor = element_blank()
)
print(plot_gap_all_controls)
ggsave("plots/achievement_gap_treated_vs_all_controls.png", plot_gap_all_controls, width = 9, height = 6, dpi = 300)Achievement gap pre-treatment trends with low-certification controls
Code Cell 21
# Achievement gap trends: all upward-treated states vs. low-certification controls.
gap_plot_low_control <- cert_seda_df_extended %>%
filter(treated == 1 | (control == 1 & never_up_low == 1)) %>%
mutate(treatment_status = case_when(
treated == 1 ~ "Upward Treated",
control == 1 & never_up_low == 1 ~ "Low-Cert Control",
TRUE ~ NA_character_
))
avg_treatment_year_gap_low <- gap_plot_low_control %>%
filter(treated == 1) %>%
distinct(stateabb, first_up_year) %>%
summarise(avg_year = mean(first_up_year, na.rm = TRUE)) %>%
pull(avg_year)
gap_avg_low_control <- gap_plot_low_control %>%
filter(!is.na(treatment_status)) %>%
group_by(treatment_status, year) %>%
summarise(avg_gap = mean(gap_score, na.rm = TRUE), .groups = "drop")
plot_gap_low_controls <- ggplot(gap_avg_low_control, aes(x = year, y = avg_gap, color = treatment_status)) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
geom_vline(xintercept = avg_treatment_year_gap_low, linetype = "dashed", color = "darkgrey") +
annotate(
"text", x = avg_treatment_year_gap_low + 0.1,
y = max(gap_avg_low_control$avg_gap, na.rm = TRUE),
label = paste0("Avg. treatment year: ", round(avg_treatment_year_gap_low, 1)),
hjust = 0, size = 3.5
) +
labs(
title = "Average ECD Achievement Gap: Upward Treated vs. Low-Cert Controls",
x = "Year",
y = "Average Gap Score",
color = NULL
) +
scale_color_manual(values = treatment_colors, drop = FALSE) +
scale_x_continuous(breaks = seq(min(gap_avg_low_control$year, na.rm = TRUE), max(gap_avg_low_control$year, na.rm = TRUE), by = 1), labels = scales::number_format(accuracy = 1)) +
theme_minimal(base_size = plot_base_size) +
theme(
legend.position = "bottom",
panel.grid.minor = element_blank()
)
print(plot_gap_low_controls)
ggsave("plots/achievement_gap_treated_vs_low_controls.png", plot_gap_low_controls, width = 9, height = 6, dpi = 300)DiD regression of achievement gap on certification-length treatment
Code Cell 22
fe_gap_model_final <- feols(
gap_score ~ treated_post | stateabb + year,
data = cert_seda_df_extended %>%
filter(year >= 2010),
cluster = "stateabb"
)
tidy(fe_gap_model_final)NOTE: 8 observations removed because of NA values (LHS: 8).
| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| <chr> | <dbl> | <dbl> | <dbl> | <dbl> |
| treated_post | -0.04522895 | 0.0181902 | -2.486446 | 0.01659461 |
Final Sensitivity Analysis: All Treated vs. Low-Certification Controls
This model expands the treated group to include all states that implemented an upward certification policy change (regardless of timing) but maintains the strict control group of states that never upward-treated and had low baseline certification lengths.
Code Cell 23
# Final DiD model: All Treated vs. Low-Cert Control
fe_gap_model_all_vs_low <- feols(
gap_score ~ treated_post | stateabb + year,
data = cert_seda_df_extended %>%
filter(year >= 2010) %>%
filter(treated == 1 | (control == 1 & never_up_low == 1)),
cluster = "stateabb"
)
tidy(fe_gap_model_all_vs_low)NOTE: 6 observations removed because of NA values (LHS: 6).
| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| <chr> | <dbl> | <dbl> | <dbl> | <dbl> |
| treated_post | -0.03841955 | 0.01745975 | -2.200464 | 0.03969103 |
Code Cell 24
# Achievement gap regression table with fixed effects and state-clustered standard errors.
achievement_gap_models <- list(
"(1) Main DiD" = fe_gap_model_final,
"(2) Low-Cert Control" = fe_gap_model_all_vs_low
)
modelsummary(
achievement_gap_models,
coef_map = c(
"treated_post" = "Post upward treatment"
),
statistic = "({std.error})",
stars = c("***" = 0.01, "**" = 0.05, "*" = 0.10),
gof_map = data.frame(
raw = c("nobs", "adj.r.squared"),
clean = c("Observations", "Adjusted R²"),
fmt = c(0, 2)
),
title = "Regression Estimates for SEDA ECD Achievement Gap",
notes = "State and year fixed effects included. Standard errors clustered by state in parentheses. * p < 0.10, ** p < 0.05, *** p < 0.01."
)
save_regression_table_image(
achievement_gap_models,
terms = c("treated_post"),
term_labels = c("Post upward treatment"),
file = "plots/achievement_gap_regression_table.png",
title = "Regression Estimates for SEDA ECD Achievement Gap",
r2_digits = 2
)+-----------------------+--------------+----------------------+ | | (1) Main DiD | (2) Low-Cert Control | +=======================+==============+======================+ | Post upward treatment | -0.045** | -0.038** | +-----------------------+--------------+----------------------+ | | (0.018) | (0.017) | +-----------------------+--------------+----------------------+ | Observations | 419 | 182 | +-----------------------+--------------+----------------------+ | Adjusted R² | 0.84 | 0.89 | +=======================+==============+======================+ | * p < 0.1, ** p < 0.05, *** p < 0.01 | +=======================+==============+======================+ | State and year fixed effects included. Standard errors | | clustered by state in parentheses. * p < 0.10, ** p < | | 0.05, *** p < 0.01. | +=======================+==============+======================+ Table: Regression Estimates for SEDA ECD Achievement Gap