Chapter 4 Appendix: Importing the original datasets
4.1 Software
I followed the standards and conventions from the Tidyverse, and I converted all the original datasets in Stata with this software:
R version 3.6.3 (2020-02-29)
Platform: x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.UTF-8, LC_NUMERIC=C, LC_TIME=es_CL.UTF-8, LC_COLLATE=en_US.UTF-8, LC_MONETARY=es_CL.UTF-8, LC_MESSAGES=en_US.UTF-8, LC_PAPER=es_CL.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=es_CL.UTF-8 and LC_IDENTIFICATION=C
attached base packages: stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: ggplot2(v.3.3.2), duckdb(v.0.2.1-2), DBI(v.1.1.0), msm(v.1.6.8), broom(v.0.7.1), lmtest(v.0.9-38), zoo(v.1.8-8), sandwich(v.3.0-0), multiwayvcov(v.1.2.3), tidyr(v.1.1.2), dplyr(v.1.0.2) and yotover(v.0.0.0.9000)
loaded via a namespace (and not attached): tidyselect(v.1.1.0), xfun(v.0.18), pander(v.0.6.3), purrr(v.0.3.4), splines(v.3.6.3), lattice(v.0.20-41), colorspace(v.1.4-1), vctrs(v.0.3.4), generics(v.0.0.2), expm(v.0.999-5), htmltools(v.0.5.0), yaml(v.2.2.1), utf8(v.1.1.4), survival(v.3.1-12), rlang(v.0.4.8), R.oo(v.1.24.0), pillar(v.1.4.6), withr(v.2.3.0), glue(v.1.4.2), R.utils(v.2.10.1), rappdirs(v.0.3.1), lifecycle(v.0.2.0), stringr(v.1.4.0), munsell(v.0.5.0), gtable(v.0.3.0), R.methodsS3(v.1.8.1), mvtnorm(v.1.1-1), codetools(v.0.2-16), evaluate(v.0.14), labeling(v.0.3), knitr(v.1.30), parallel(v.3.6.3), fansi(v.0.4.1), Rcpp(v.1.0.5), backports(v.1.1.10), scales(v.1.1.1), farver(v.2.0.3), digest(v.0.6.26), stringi(v.1.5.3), bookdown(v.0.21), grid(v.3.6.3), cli(v.2.1.0), tools(v.3.6.3), magrittr(v.1.5), tibble(v.3.0.4), crayon(v.1.3.4), pkgconfig(v.2.0.3), ellipsis(v.0.3.1), Matrix(v.1.2-18), assertthat(v.0.2.1), rmarkdown(v.2.4), httr(v.1.4.2), rstudioapi(v.0.11), R6(v.2.4.1), boot(v.1.3-25) and compiler(v.3.6.3)
4.2 Downloading the original datasets
appfiles_url <- "https://vi.unctad.org/tpa/web/zips/vol2/Advanced%20Guide%20to%20TPA.zip"
appfiles_zip <- "00-application-files.zip"
appfiles_dir <- "00-application-files"
if (!file.exists(appfiles_zip)) {
download.file(appfiles_url, appfiles_zip)
}
if (!dir.exists(appfiles_dir)) {
unzip(appfiles_zip)
file.rename("Advanced Guide to TPA", appfiles_dir)
}
4.3 Converting the original datasets
This code chunk can be a bit obscure. It is only shown to make all of my steps transparent.
# these packages are only used to import the data
library(haven)
library(stringr)
library(janitor)
library(purrr)
try(dir.create("data-tsv", showWarnings = F))
dta_files <- list.files("00-application-files",
pattern = "dta",
full.names = TRUE,
recursive = TRUE)
read_and_clean <- function(finp) {
message(finp)
fout <- finp %>%
str_replace(appfiles_dir, "") %>%
str_replace("Chapter", "ch") %>%
str_replace_all("Chapter[0-9]|\\.dta", "")
fout <- fout %>%
str_replace_all("(/)", "_") %>%
make_clean_names()
long_names <- c(
"datasets_",
"applications_",
"exercises_",
"1_trade_without_border_results_1",
"2_rt_as_effects_results_2_"
)
fout <- fout %>%
str_replace_all(paste(long_names, collapse = "|"), "")
fout <- str_replace(fout, "_([0-9])_|__", "_")
fout2 <- sprintf("data-tsv/%s.tsv", fout)
if (!file.exists(fout2)) {
d <- read_dta(finp) %>%
clean_names()
data.table::fwrite(d, fout2, sep = "\t")
}
}
map(dta_files, read_and_clean)