05 — Event data

Things are being recorded automatically.
Lecturer
Date
May 10, 2023
In this class, we explore event data to uncover the causal impact of an event on a specific variable of interest. We aim to develop data analysis skills to understand how events influence the chosen variable, such as trade agreements on trade flows or earning calls on stock prices. Through these analyses, we seek meaningful insights into event-variable relationships
Lecture slides

Morning session slides

View full screen Download as pdf
Code

Here’s the code we wrote during the afternoon session:
wdir <- getwd()
dir.create(file.path(wdir, "output"), showWarnings = FALSE)


if (!require("pacman")) install.packages("pacman"); library("pacman")
p_load(data.table)
p_load(stringr)
p_load(lubridate)
p_load(countrycode)
p_load(purrr)
p_load(ggplot2)
p_load(scales)
p_load(fixest)
p_load(etable)


# definitions
EU27 = c("DEU", "AUT", "BEL", "DNK",
         "FIN", "FRA", "GRC", "IRL",
         "ITA", "LUX", "NLD", "PRT",
         "ESP", "SWE", "MLT", "CYP",
         "EST", "LTU", "LVA", "CZE", 
         "HUN", "BGR", "ROU", "POL",
         "SVK", "SVN", "HRV")

# list of files
files = list.files("input/monthly_hs2", full.names = T)
data = fread(files[1])
data = fread(cmd = str_c("gunzip -c ", files[1])) # this command is equivalent
# data = fread(cmd = str_c("unzip -p ", files[1]))  # automatically unzips file and then reads into data.table

# first look at the data
View(data)
head(data)
data[, uniqueN(Reporter)]
data[, uniqueN(Partner)]
data[, uniqueN(Commodity)]
data[, unique(`Trade Flow`)] 
data[, .N]

# check trade zeros
data[`Trade Flow Code` == 2, .N]

# check trade zeros
data[, uniqueN(Reporter)] * data[, uniqueN(Partner)] * data[, uniqueN(Commodity)] 
data[`Trade Flow Code` == 1, .N] / (data[, uniqueN(Reporter)] * data[, uniqueN(Partner)] * data[, uniqueN(Commodity)])

wdir <- getwd()
dir.create(file.path(wdir, "temp"), showWarnings = FALSE)

# read all files
files = list.files("input/monthly_hs2", full.names = T)

# f = files[1]
for (f in files) {
    print(f)

    # load
    data = fread(f)

    # format and clean data
    data = data[, .(date = Period,
                    origin = Reporter,
                    destination = Partner,
                    hs = `Commodity Code`,
                    flow = `Trade Flow Code`,
                    value = `Trade Value (US$)`)]

    # make dates pretty
    data[, date := ymd(str_c(date, "01"))]
    data[, date := date + months(1) - days(1)]
    # str(data)

    # use country codes
    data[, origin := countryname(origin, "iso3c")]
    data[, destination := countryname(destination, "iso3c")]
    # countrycode("Germany", "country.name", "iso3c")
    # countrycode("Deutschland", "country.name.de", "iso3c")
    # countryname(c("Deutschland", "Alemania", "Allemagne", "Duitsland"), "iso3c")

    # transform value from integer to numeric
    data[, value := as.numeric(value)]

    # only take obs with non-missing variable
    data = data[complete.cases(data)]

    fwrite(data,
           "temp/monthly_1921.csv.gz",
           compress = "gzip",
           append = T)

}
 
# # alternativ
# extract_data = function (...) {}
# map(files, ~ extract_data(.x), .progress = T)

# load full dataset
data = fread("temp/monthly_1921.csv.gz")
data = data[flow == 2]
gc()


data[, .N, by = origin] # greater or equal than 125
data[, .(date, origin)] %>%
  unique() %>%
  .[, .N, by = .(year(date), origin)] %>%
  .[, .(N = sum(N)), by = origin]


# brexit impact
data[, unique(hs)]
exports_uk = data[origin == "GBR" & hs == "TOTAL", -c("origin", "hs", "flow")]

# plot: aggregate over time
plot_data = exports_uk[, .(value = sum(value)), by = date]

ggplot(plot_data) +
    theme_minimal() +
    geom_line(aes(x = date, y = value / 1000000)) +
    geom_vline(aes(xintercept = ymd("2020-01-31")), color = "red") +
    geom_vline(aes(xintercept = ymd("2020-12-31")), color = "red") +
    scale_x_date(name = NULL) +
    scale_y_continuous(name = "Total exports in mn USD",
                       labels = scales::dollar) +
    ggtitle(label = "Total value of UK exports",
            subtitle = "UN Comtrade Data, 2019 – 2021")


# plot: compare to other countries
plot_data = data[origin %in% c("GBR", "IRL", "ISL", "SWE"), .(value = sum(value)), by = .(date, origin)]

plot_data[, value_norm := value / mean(value[year(date) == "2019"]), by = origin]

plot= ggplot(plot_data) +
    theme_minimal() +
    geom_line(aes(x = date, y = value_norm, group = origin, color = origin)) +
    geom_vline(aes(xintercept = ymd("2020-01-31")), color = "red") +
    geom_vline(aes(xintercept = ymd("2020-12-31")), color = "red") +
    scale_x_date(name = NULL) +
    scale_y_continuous(name = "Total exports compared to average of 2019") +
    ggtitle(label = "Impact of Brexit",
            subtitle = "UN Comtrade Data, 2019 – 2021")

ggsave(plot,
       filename = str_c(wdir, "/output/exp_to_all_dest.png"),
       width = 20,
       height = 20,
       units = "cm")


# plot: compare to other countries the exports only to EU27
plot_data = data[origin %in% c("GBR", "IRL", "ISL", "SWE") & destination %in% EU27, .(value = sum(value)), by = .(date, origin)]

plot_data[, value_norm := value / mean(value[year(date) == "2019"]), by = origin]

plot_data[, Country := countrycode(origin, "iso3c", "country.name")]

plot = ggplot(plot_data) +
    theme_minimal() +
    geom_line(aes(x = date, y = value_norm, group = Country, color = Country)) +
    geom_vline(aes(xintercept = ymd("2020-01-31")), color = "red") +
    geom_vline(aes(xintercept = ymd("2020-12-31")), color = "red") +
    scale_x_date(name = NULL) +
    scale_y_continuous(name = "Exports to EU compared to average of 2019") +
    ggtitle(label = "Impact of Brexit",
            subtitle = "UN Comtrade Data, 2019 – 2021")

ggsave(plot,
       filename = str_c(wdir, "/output/exp_to_EU27.png"),
       width = 20,
       height = 20,
       units = "cm")


# regressions instead of eyeball econometrics:all
  # 1. regression export value from the 4 exporters toward EU destinations on Brexit treatment
        # Identification: use variation within country and controlling for (monthly) time trends common to all 4 exporters.
  # 2. the same as 1 but for exports towards all destinations? What do you expect? 
  # 3. the same but including zeros and estimating via PPML



reg_data = data[origin %in% c("GBR", "IRL", "ISL", "SWE") & destination %in% EU27, .(value = sum(value)), by = .(date, origin)]

reg_data[, treatment := (origin == "GBR") * (date > ymd("2020-12-31"))]

fixest::etable(reg1)

# all countries
reg_data = data[, .(value = sum(value)), by = .(date, origin)]

reg_data[, treatment := (origin == "GBR") * (date > ymd("2020-12-31"))]

# 162 zeros
reg2 = feols(log(value) ~ treatment | date + origin, data = reg_data)

reg3 = fepois(value ~ treatment | date + origin, data = reg_data)
# etable(reg1, reg2, reg3)

# gravity
reg_data = data[hs == "TOTAL", -c("hs", "flow")]
reg_data[, origin_date := str_c(origin, date)]
reg_data[, destination_date := str_c(destination, date)]
reg_data[, origin_destination := str_c(origin, destination)]

reg_data[, treatment := (origin == "GBR") * (destination %in% EU27)* (date > ymd("2020-12-31"))]

reg4 = feols(log(value) ~ treatment | origin_date + destination_date + origin_destination, data = reg_data)

reg5 = fepois(value ~ treatment | origin_date + destination_date + origin_destination, data = reg_data)


# hwo to create zeros (for on month only)
data_zeros = data[date == ymd("2020-12-31") & hs == "TOTAL", -c("hs", "flow")]

data_zeros[, uniqueN(origin)] * data_zeros[, uniqueN(destination)]
data_zeros[, .N]

data_grid = CJ(origin = data_zeros[, unique(origin)],
               destination = data_zeros[, unique(destination)],
               date = data_zeros[, unique(date)])
data_grid = data_grid[origin != destination]

data_zeros = merge(data_grid,
                   data_zeros,
                   by = c("date", "origin", "destination"),
                   all.x = T)
data_zeros[is.na(value), value := 0]
We get these two wonderful plot:
To be added.
:::