TBA
suppressWarnings(suppressMessages({
library(knitr)
library(kableExtra)
library(htmltools)
library(tidyverse)
library(scales)
library(ExPanDaR)
}))
knitr::opts_chunk$set(fig.align = 'center')
This step reads the raw data provided by ExPanD()
and generates the sample for the analysis.
create_sample <- function(df, df_def) {
# Set infinite numerical variables to NA
df[, df_def$var_name[df_def$type == "numeric"]] <-
lapply(df[, df_def$var_name[df_def$type == "numeric"]],
function(x) ifelse(is.finite(x), x, NA))
# Delete numerical variables that only contain NAs
all_na_vars <- sapply(df, function (x) all(is.na(x)))
df_def <- df_def[!all_na_vars,]
df <- df[, df_def$var_name]
# Drop observations that are NA in variables that are not allowed to
df <- df[complete.cases(df[, df_def$var_name[which(df_def$can_be_na == FALSE)]]), ]
df <- droplevels(df)
return(list(df = df, df_def = df_def))
}
load("ExPanD_nb_data.Rdata")
smp_list <- create_sample(nb_df, nb_df_def)
smp <- smp_list$df
smp_def <- smp_list$df_def
df <- smp
df$year <- as.factor(df$year)
p <- ggplot(df, aes(x = year)) +
geom_bar() + labs(x = "year")
p <- p + scale_x_discrete(breaks = pretty(as.numeric(as.character(df$year)), n = 10))
p
df <- smp
prepare_missing_values_graph(df, "year")
df <- smp
t <- prepare_descriptive_table(smp)
t$kable_ret %>%
kable_styling("condensed", full_width = F, position = "center")
N | Mean | Std. dev. | Min. | 25 % | Median | 75 % | Max. | |
---|---|---|---|---|---|---|---|---|
Y | 650 | 1,314,992.485 | 2,597,230.332 | 55,661.000 | 221,501.250 | 367,013.500 | 1,398,438.750 | 16,490,883.000 |
K | 650 | 4,360,577.048 | 8,057,613.445 | 163,185.000 | 777,063.500 | 1,410,611.000 | 4,428,251.750 | 52,849,892.000 |
pop | 650 | 35.609 | 58.351 | 3.016 | 5.930 | 10.752 | 41.669 | 319.449 |
L | 650 | 16.576 | 28.160 | 1.184 | 2.842 | 4.784 | 18.223 | 148.463 |
s | 650 | 10.272 | 1.556 | 5.844 | 9.299 | 10.474 | 11.389 | 14.996 |
alpha_it | 650 | 0.429 | 0.081 | 0.298 | 0.372 | 0.407 | 0.471 | 0.801 |
GDPpc | 650 | 34,429.230 | 10,778.468 | 14,193.678 | 26,931.717 | 33,616.328 | 39,840.247 | 81,879.516 |
lp | 650 | 74,238.609 | 19,239.144 | 31,440.531 | 61,726.696 | 73,649.012 | 83,727.193 | 153,111.469 |
h | 650 | 3.680 | 0.450 | 2.484 | 3.386 | 3.724 | 3.999 | 5.202 |
kl | 650 | 257,507.503 | 88,933.005 | 67,559.000 | 195,197.680 | 237,460.523 | 306,806.281 | 552,395.188 |
kp | 650 | 0.305 | 0.077 | 0.134 | 0.257 | 0.303 | 0.340 | 0.693 |
ky | 650 | 3.482 | 0.881 | 1.444 | 2.945 | 3.304 | 3.884 | 7.479 |
TFP | 650 | 1,722.405 | 890.059 | 583.144 | 1,095.234 | 1,419.952 | 2,133.549 | 5,388.233 |
log_GDPpc_raw | 650 | 10.400 | 0.304 | 9.561 | 10.201 | 10.423 | 10.593 | 11.313 |
log_lp_raw | 650 | 11.183 | 0.254 | 10.356 | 11.030 | 11.207 | 11.335 | 11.939 |
log_ky_raw | 650 | 1.218 | 0.244 | 0.367 | 1.080 | 1.195 | 1.357 | 2.012 |
log_h_raw | 650 | 1.295 | 0.127 | 0.910 | 1.220 | 1.315 | 1.386 | 1.649 |
log_tfp_raw | 650 | 7.345 | 0.446 | 6.368 | 6.999 | 7.258 | 7.666 | 8.592 |
log_GDPpc | 650 | 10.400 | 0.297 | 9.574 | 10.205 | 10.411 | 10.594 | 11.368 |
log_lp | 650 | 11.183 | 0.248 | 10.351 | 11.032 | 11.191 | 11.333 | 11.984 |
log_ky | 650 | 1.218 | 0.223 | 0.344 | 1.100 | 1.193 | 1.329 | 1.947 |
log_h | 650 | 1.295 | 0.125 | 0.904 | 1.219 | 1.318 | 1.385 | 1.615 |
log_tfp | 650 | 7.345 | 0.442 | 6.376 | 6.985 | 7.235 | 7.650 | 8.595 |
club_log_lp | 625 | 2.080 | 0.797 | 1.000 | 2.000 | 2.000 | 2.000 | 4.000 |
finalclub_log_lp | 625 | 1.880 | 0.516 | 1.000 | 2.000 | 2.000 | 2.000 | 3.000 |
re_log_lp | 650 | 1.000 | 0.017 | 0.951 | 0.994 | 1.000 | 1.009 | 1.050 |
club_log_ky | 650 | 1.923 | 0.829 | 1.000 | 1.000 | 2.000 | 3.000 | 3.000 |
finalclub_log_ky | 650 | 1.308 | 0.462 | 1.000 | 1.000 | 1.000 | 2.000 | 2.000 |
re_log_ky | 650 | 1.000 | 0.160 | 0.304 | 0.915 | 1.003 | 1.079 | 1.389 |
club_log_h | 625 | 1.120 | 0.325 | 1.000 | 1.000 | 1.000 | 1.000 | 2.000 |
finalclub_log_h | 625 | 1.120 | 0.325 | 1.000 | 1.000 | 1.000 | 1.000 | 2.000 |
re_log_h | 650 | 1.000 | 0.087 | 0.754 | 0.946 | 1.015 | 1.060 | 1.211 |
club_log_tfp | 650 | 2.000 | 0.680 | 1.000 | 2.000 | 2.000 | 2.000 | 3.000 |
finalclub_log_tfp | 650 | 2.000 | 0.680 | 1.000 | 2.000 | 2.000 | 2.000 | 3.000 |
re_log_tfp | 650 | 1.000 | 0.059 | 0.892 | 0.949 | 0.986 | 1.039 | 1.166 |
var <- as.numeric(smp$log_lp[smp$year == "1990"])
hist(var, main="", xlab = "log_lp", col="red", right = FALSE, breaks= 25)
df <- smp
vars <- c("country", "year", "GDPpc", "year")
df <- df[df$year == "1990", ]
df <- df[, vars]
df <- droplevels(df[complete.cases(df), ])
if (nrow(df) <= 10) {
cat("Not enough data to generate table")
} else {
tab <- prepare_ext_obs_table(df, var = "GDPpc")
tab$kable_ret %>%
kable_styling()
}
country | year | year.1 | GDPpc |
---|---|---|---|
Switzerland | 1990 | 1990 | 37,503.4 |
United States | 1990 | 1990 | 36,621.1 |
Norway | 1990 | 1990 | 30,351.3 |
Canada | 1990 | 1990 | 30,233.4 |
Australia | 1990 | 1990 | 27,216.0 |
... | ... | ... | ... |
Taiwan | 1990 | 1990 | 19,214.2 |
Spain | 1990 | 1990 | 17,108.7 |
Ireland | 1990 | 1990 | 16,931.0 |
Greece | 1990 | 1990 | 16,185.3 |
Portugal | 1990 | 1990 | 14,193.7 |
df <- smp
df <- df[df$year == "1990", ]
prepare_by_group_bar_graph(df, "region", "lp", mean, TRUE)$plot +
ylab("mean lp")
df <- smp
prepare_by_group_violin_graph(df, "region", "log_lp", TRUE)
df <- smp
prepare_trend_graph(df, "year", c("lp"))$plot
df <- smp
prepare_quantile_trend_graph(df, "year", c(0.05, 0.25, 0.5, 0.75, 0.95), "lp")$plot
log_lp_raw <- prepare_quantile_trend_graph(smp, "year", c(0.05, 0.25, 0.5, 0.75, 0.95), "log_lp_raw")$plot
log_lp_raw <- log_lp_raw +
theme_minimal() +
guides(color = guide_legend(reverse = TRUE)) +
scale_color_discrete(name = "Quantile") +
labs(x = "",
y = "Log of Labor Productivity")
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
ggsave("quintiles_hiYes_log_lp_raw.pdf", width = 6, height = 4)
log_ky_raw <- prepare_quantile_trend_graph(smp, "year", c(0.05, 0.25, 0.5, 0.75, 0.95), "log_ky_raw")$plot
log_ky_raw <- log_ky_raw +
theme_minimal() +
guides(color = guide_legend(reverse = TRUE)) +
scale_color_discrete(name = "Quantile") +
labs(x = "",
y = "Log of Capital-Output Ratio")
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
ggsave("quintiles_hiYes_log_ky_raw.pdf", width = 6, height = 4)
log_h_raw <- prepare_quantile_trend_graph(smp, "year", c(0.05, 0.25, 0.5, 0.75, 0.95), "log_h_raw")$plot
log_h_raw <- log_h_raw +
theme_minimal() +
guides(color = guide_legend(reverse = TRUE)) +
scale_color_discrete(name = "Quantile") +
labs(x = "",
y = "Log of Human Capital")
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
ggsave("quintiles_hiYes_log_h_raw.pdf", width = 6, height = 4)
log_tfp_raw <- prepare_quantile_trend_graph(smp, "year", c(0.05, 0.25, 0.5, 0.75, 0.95), "log_tfp_raw")$plot
log_tfp_raw <- log_tfp_raw +
theme_minimal() +
guides(color = guide_legend(reverse = TRUE)) +
scale_color_discrete(name = "Quantile") +
labs(x = "",
y = "Log of Aggregate Efficiency")
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
ggsave("quintiles_hiYes_log_tfp_raw.pdf", width = 6, height = 4)
df <- smp
ret <- prepare_correlation_graph(df[, c(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40)])
df <- smp
df <- df[, c("country", "year", "log_lp", "log_GDPpc", "region", "pop")]
df <- df[complete.cases(df), ]
df$region <- as.factor(df$region)
prepare_scatter_plot(df, "log_lp", "log_GDPpc", color = "region", size = "pop", loess = 1)
df <- smp
df <- df[, c("log_lp", "log_ky", "log_h", "log_tfp", "country", "year")]
df <- df[complete.cases(df), ]
df <- droplevels(df)
t <- prepare_regression_table(df, dvs = "log_lp", idvs = c("log_ky", "log_h", "log_tfp"), feffects = c("country", "year"), clusters = c("country", "year"), models = "ols")
length of NULL cannot be changedlength of NULL cannot be changedlength of NULL cannot be changed
HTML(t$table)
Dependent variable: | |
log_lp | |
log_ky | 0.469*** |
(0.060) | |
log_h | 0.435*** |
(0.135) | |
log_tfp | 1.180*** |
(0.064) | |
Estimator | ols |
Fixed effects | country, year |
Std. errors clustered | country, year |
Observations | 650 |
R2 | 0.988 |
Adjusted R2 | 0.987 |
Note: | *p<0.1; **p<0.05; ***p<0.01 |
This Notebook has been automatically generated using the ExPanDaR package.