<- getwd()
wdir dir.create(file.path(wdir, "output"), showWarnings = FALSE)
if (!require("pacman")) install.packages("pacman"); library("pacman")
p_load(data.table)
p_load(stringr)
p_load(lubridate)
p_load(countrycode)
p_load(purrr)
p_load(ggplot2)
p_load(scales)
p_load(fixest)
p_load(etable)
# definitions
= c("DEU", "AUT", "BEL", "DNK",
EU27 "FIN", "FRA", "GRC", "IRL",
"ITA", "LUX", "NLD", "PRT",
"ESP", "SWE", "MLT", "CYP",
"EST", "LTU", "LVA", "CZE",
"HUN", "BGR", "ROU", "POL",
"SVK", "SVN", "HRV")
# list of files
= list.files("input/monthly_hs2", full.names = T)
files = fread(files[1])
data = fread(cmd = str_c("gunzip -c ", files[1])) # this command is equivalent
data # data = fread(cmd = str_c("unzip -p ", files[1])) # automatically unzips file and then reads into data.table
# first look at the data
View(data)
head(data)
uniqueN(Reporter)]
data[, uniqueN(Partner)]
data[, uniqueN(Commodity)]
data[, unique(`Trade Flow`)]
data[,
data[, .N]
# check trade zeros
`Trade Flow Code` == 2, .N]
data[
# check trade zeros
uniqueN(Reporter)] * data[, uniqueN(Partner)] * data[, uniqueN(Commodity)]
data[, `Trade Flow Code` == 1, .N] / (data[, uniqueN(Reporter)] * data[, uniqueN(Partner)] * data[, uniqueN(Commodity)])
data[
<- getwd()
wdir dir.create(file.path(wdir, "temp"), showWarnings = FALSE)
# read all files
= list.files("input/monthly_hs2", full.names = T)
files
# f = files[1]
for (f in files) {
print(f)
# load
= fread(f)
data
# format and clean data
= data[, .(date = Period,
data origin = Reporter,
destination = Partner,
hs = `Commodity Code`,
flow = `Trade Flow Code`,
value = `Trade Value (US$)`)]
# make dates pretty
:= ymd(str_c(date, "01"))]
data[, date := date + months(1) - days(1)]
data[, date # str(data)
# use country codes
:= countryname(origin, "iso3c")]
data[, origin := countryname(destination, "iso3c")]
data[, destination # countrycode("Germany", "country.name", "iso3c")
# countrycode("Deutschland", "country.name.de", "iso3c")
# countryname(c("Deutschland", "Alemania", "Allemagne", "Duitsland"), "iso3c")
# transform value from integer to numeric
:= as.numeric(value)]
data[, value
# only take obs with non-missing variable
= data[complete.cases(data)]
data
fwrite(data,
"temp/monthly_1921.csv.gz",
compress = "gzip",
append = T)
}
# # alternativ
# extract_data = function (...) {}
# map(files, ~ extract_data(.x), .progress = T)
# load full dataset
= fread("temp/monthly_1921.csv.gz")
data = data[flow == 2]
data gc()
= origin] # greater or equal than 125
data[, .N, by %>%
data[, .(date, origin)] unique() %>%
= .(year(date), origin)] %>%
.[, .N, by N = sum(N)), by = origin]
.[, .(
# brexit impact
unique(hs)]
data[, = data[origin == "GBR" & hs == "TOTAL", -c("origin", "hs", "flow")]
exports_uk
# plot: aggregate over time
= exports_uk[, .(value = sum(value)), by = date]
plot_data
ggplot(plot_data) +
theme_minimal() +
geom_line(aes(x = date, y = value / 1000000)) +
geom_vline(aes(xintercept = ymd("2020-01-31")), color = "red") +
geom_vline(aes(xintercept = ymd("2020-12-31")), color = "red") +
scale_x_date(name = NULL) +
scale_y_continuous(name = "Total exports in mn USD",
labels = scales::dollar) +
ggtitle(label = "Total value of UK exports",
subtitle = "UN Comtrade Data, 2019 – 2021")
# plot: compare to other countries
= data[origin %in% c("GBR", "IRL", "ISL", "SWE"), .(value = sum(value)), by = .(date, origin)]
plot_data
:= value / mean(value[year(date) == "2019"]), by = origin]
plot_data[, value_norm
= ggplot(plot_data) +
plottheme_minimal() +
geom_line(aes(x = date, y = value_norm, group = origin, color = origin)) +
geom_vline(aes(xintercept = ymd("2020-01-31")), color = "red") +
geom_vline(aes(xintercept = ymd("2020-12-31")), color = "red") +
scale_x_date(name = NULL) +
scale_y_continuous(name = "Total exports compared to average of 2019") +
ggtitle(label = "Impact of Brexit",
subtitle = "UN Comtrade Data, 2019 – 2021")
ggsave(plot,
filename = str_c(wdir, "/output/exp_to_all_dest.png"),
width = 20,
height = 20,
units = "cm")
# plot: compare to other countries the exports only to EU27
= data[origin %in% c("GBR", "IRL", "ISL", "SWE") & destination %in% EU27, .(value = sum(value)), by = .(date, origin)]
plot_data
:= value / mean(value[year(date) == "2019"]), by = origin]
plot_data[, value_norm
:= countrycode(origin, "iso3c", "country.name")]
plot_data[, Country
= ggplot(plot_data) +
plot theme_minimal() +
geom_line(aes(x = date, y = value_norm, group = Country, color = Country)) +
geom_vline(aes(xintercept = ymd("2020-01-31")), color = "red") +
geom_vline(aes(xintercept = ymd("2020-12-31")), color = "red") +
scale_x_date(name = NULL) +
scale_y_continuous(name = "Exports to EU compared to average of 2019") +
ggtitle(label = "Impact of Brexit",
subtitle = "UN Comtrade Data, 2019 – 2021")
ggsave(plot,
filename = str_c(wdir, "/output/exp_to_EU27.png"),
width = 20,
height = 20,
units = "cm")
# regressions instead of eyeball econometrics:all
# 1. regression export value from the 4 exporters toward EU destinations on Brexit treatment
# Identification: use variation within country and controlling for (monthly) time trends common to all 4 exporters.
# 2. the same as 1 but for exports towards all destinations? What do you expect?
# 3. the same but including zeros and estimating via PPML
= data[origin %in% c("GBR", "IRL", "ISL", "SWE") & destination %in% EU27, .(value = sum(value)), by = .(date, origin)]
reg_data
:= (origin == "GBR") * (date > ymd("2020-12-31"))]
reg_data[, treatment
::etable(reg1)
fixest
# all countries
= data[, .(value = sum(value)), by = .(date, origin)]
reg_data
:= (origin == "GBR") * (date > ymd("2020-12-31"))]
reg_data[, treatment
# 162 zeros
= feols(log(value) ~ treatment | date + origin, data = reg_data)
reg2
= fepois(value ~ treatment | date + origin, data = reg_data)
reg3 # etable(reg1, reg2, reg3)
# gravity
= data[hs == "TOTAL", -c("hs", "flow")]
reg_data := str_c(origin, date)]
reg_data[, origin_date := str_c(destination, date)]
reg_data[, destination_date := str_c(origin, destination)]
reg_data[, origin_destination
:= (origin == "GBR") * (destination %in% EU27)* (date > ymd("2020-12-31"))]
reg_data[, treatment
= feols(log(value) ~ treatment | origin_date + destination_date + origin_destination, data = reg_data)
reg4
= fepois(value ~ treatment | origin_date + destination_date + origin_destination, data = reg_data)
reg5
# hwo to create zeros (for on month only)
= data[date == ymd("2020-12-31") & hs == "TOTAL", -c("hs", "flow")]
data_zeros
uniqueN(origin)] * data_zeros[, uniqueN(destination)]
data_zeros[,
data_zeros[, .N]
= CJ(origin = data_zeros[, unique(origin)],
data_grid destination = data_zeros[, unique(destination)],
date = data_zeros[, unique(date)])
= data_grid[origin != destination]
data_grid
= merge(data_grid,
data_zeros
data_zeros,by = c("date", "origin", "destination"),
all.x = T)
is.na(value), value := 0] data_zeros[
05 — Event data
Things are being recorded automatically.
Slack channel: #05-event-data
In this class, we explore event data to uncover the causal impact of an event on a specific variable of interest. We aim to develop data analysis skills to understand how events influence the chosen variable, such as trade agreements on trade flows or earning calls on stock prices. Through these analyses, we seek meaningful insights into event-variable relationships
Lecture slides
Morning session slides
Code
Here’s the code we wrote during the afternoon session:
We get these two wonderful plot:
To be added.
:::