Chapter 4 Appendix: Importing the original datasets

4.1 Software

I followed the standards and conventions from the Tidyverse, and I converted all the original datasets in Stata with this software:

R version 3.6.3 (2020-02-29)

Platform: x86_64-pc-linux-gnu (64-bit)

locale: LC_CTYPE=en_US.UTF-8, LC_NUMERIC=C, LC_TIME=es_CL.UTF-8, LC_COLLATE=en_US.UTF-8, LC_MONETARY=es_CL.UTF-8, LC_MESSAGES=en_US.UTF-8, LC_PAPER=es_CL.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=es_CL.UTF-8 and LC_IDENTIFICATION=C

attached base packages: stats, graphics, grDevices, utils, datasets, methods and base

other attached packages: ggplot2(v.3.3.2), duckdb(v.0.2.1-2), DBI(v.1.1.0), msm(v.1.6.8), broom(v.0.7.1), lmtest(v.0.9-38), zoo(v.1.8-8), sandwich(v.3.0-0), multiwayvcov(v.1.2.3), tidyr(v.1.1.2), dplyr(v.1.0.2) and yotover(v.0.0.0.9000)

loaded via a namespace (and not attached): tidyselect(v.1.1.0), xfun(v.0.18), pander(v.0.6.3), purrr(v.0.3.4), splines(v.3.6.3), lattice(v.0.20-41), colorspace(v.1.4-1), vctrs(v.0.3.4), generics(v.0.0.2), expm(v.0.999-5), htmltools(v.0.5.0), yaml(v.2.2.1), utf8(v.1.1.4), survival(v.3.1-12), rlang(v.0.4.8), R.oo(v.1.24.0), pillar(v.1.4.6), withr(v.2.3.0), glue(v.1.4.2), R.utils(v.2.10.1), rappdirs(v.0.3.1), lifecycle(v.0.2.0), stringr(v.1.4.0), munsell(v.0.5.0), gtable(v.0.3.0), R.methodsS3(v.1.8.1), mvtnorm(v.1.1-1), codetools(v.0.2-16), evaluate(v.0.14), labeling(v.0.3), knitr(v.1.30), parallel(v.3.6.3), fansi(v.0.4.1), Rcpp(v.1.0.5), backports(v.1.1.10), scales(v.1.1.1), farver(v.2.0.3), digest(v.0.6.26), stringi(v.1.5.3), bookdown(v.0.21), grid(v.3.6.3), cli(v.2.1.0), tools(v.3.6.3), magrittr(v.1.5), tibble(v.3.0.4), crayon(v.1.3.4), pkgconfig(v.2.0.3), ellipsis(v.0.3.1), Matrix(v.1.2-18), assertthat(v.0.2.1), rmarkdown(v.2.4), httr(v.1.4.2), rstudioapi(v.0.11), R6(v.2.4.1), boot(v.1.3-25) and compiler(v.3.6.3)

4.2 Downloading the original datasets

appfiles_url <- "https://vi.unctad.org/tpa/web/zips/vol2/Advanced%20Guide%20to%20TPA.zip"
appfiles_zip <- "00-application-files.zip"
appfiles_dir <- "00-application-files"

if (!file.exists(appfiles_zip)) {
  download.file(appfiles_url, appfiles_zip)
}

if (!dir.exists(appfiles_dir)) {
  unzip(appfiles_zip)
  file.rename("Advanced Guide to TPA", appfiles_dir)
}

4.3 Converting the original datasets

This code chunk can be a bit obscure. It is only shown to make all of my steps transparent.

# these packages are only used to import the data
library(haven)
library(stringr)
library(janitor)
library(purrr)

try(dir.create("data-tsv", showWarnings = F))

dta_files <- list.files("00-application-files",
                        pattern = "dta",
                        full.names = TRUE,
                        recursive = TRUE)

read_and_clean <- function(finp) {
  message(finp)
  
  fout <- finp %>% 
    str_replace(appfiles_dir, "") %>% 
    str_replace("Chapter", "ch") %>% 
    str_replace_all("Chapter[0-9]|\\.dta", "")
  
  fout <- fout %>% 
    str_replace_all("(/)", "_") %>% 
    make_clean_names()
  
  long_names <- c(
    "datasets_",
    "applications_",
    "exercises_",
    "1_trade_without_border_results_1",
    "2_rt_as_effects_results_2_"
  )
  
  fout <- fout %>% 
    str_replace_all(paste(long_names, collapse = "|"), "")
  
  fout <- str_replace(fout, "_([0-9])_|__", "_")

  fout2 <- sprintf("data-tsv/%s.tsv", fout)
  
  if (!file.exists(fout2)) {
    d <- read_dta(finp) %>% 
      clean_names()
    
    data.table::fwrite(d, fout2, sep = "\t")
  }
}

map(dta_files, read_and_clean)