TBA
suppressWarnings(suppressMessages({
library(knitr)
library(kableExtra)
library(htmltools)
library(tidyverse)
library(scales)
library(ExPanDaR)
}))
knitr::opts_chunk$set(fig.align = 'center')
This step reads the raw data provided by ExPanD()
and generates the sample for the analysis.
create_sample <- function(df, df_def) {
# Set infinite numerical variables to NA
df[, df_def$var_name[df_def$type == "numeric"]] <-
lapply(df[, df_def$var_name[df_def$type == "numeric"]],
function(x) ifelse(is.finite(x), x, NA))
# Delete numerical variables that only contain NAs
all_na_vars <- sapply(df, function (x) all(is.na(x)))
df_def <- df_def[!all_na_vars,]
df <- df[, df_def$var_name]
# Drop observations that are NA in variables that are not allowed to
df <- df[complete.cases(df[, df_def$var_name[which(df_def$can_be_na == FALSE)]]), ]
df <- droplevels(df)
return(list(df = df, df_def = df_def))
}
load("ExPanD_nb_data.Rdata")
smp_list <- create_sample(nb_df, nb_df_def)
smp <- smp_list$df
smp_def <- smp_list$df_def
df <- smp
df$year <- as.factor(df$year)
df$hi1990 <- as.factor(df$hi1990)
p <- ggplot(df, aes(x = year)) +
geom_bar(aes(fill = hi1990), position = "fill") +
labs(x = "year", fill = "hi1990", y = "Percent") +
scale_y_continuous(labels = percent_format())
p <- p + scale_x_discrete(breaks = pretty(as.numeric(as.character(df$year)), n = 10))
p
df <- smp
prepare_missing_values_graph(df, "year")
df <- smp
t <- prepare_descriptive_table(smp)
t$kable_ret %>%
kable_styling("condensed", full_width = F, position = "center")
N | Mean | Std. dev. | Min. | 25 % | Median | 75 % | Max. | |
---|---|---|---|---|---|---|---|---|
Y | 2,700 | 583,984.538 | 1,647,698.584 | 2,777.000 | 27,881.000 | 125,138.000 | 399,547.000 | 16,810,260.000 |
K | 2,700 | 1,822,569.809 | 5,261,927.515 | 2,004.000 | 69,041.250 | 342,061.500 | 1,293,640.750 | 69,379,696.000 |
pop | 2,700 | 53.207 | 163.005 | 1.316 | 5.891 | 14.126 | 40.377 | 1,369.436 |
L | 2,700 | 23.456 | 81.801 | 0.565 | 2.475 | 5.045 | 14.792 | 798.368 |
s | 2,700 | 7.678 | 3.030 | 0.893 | 5.224 | 8.021 | 10.273 | 14.996 |
alpha_it | 2,250 | 0.456 | 0.112 | 0.134 | 0.381 | 0.453 | 0.527 | 0.836 |
GDPpc | 2,700 | 13,515.155 | 14,012.510 | 313.981 | 2,453.331 | 8,046.677 | 20,912.062 | 81,879.516 |
lp | 2,700 | 31,371.190 | 29,035.164 | 991.421 | 6,559.711 | 21,222.568 | 51,459.522 | 153,111.469 |
h | 2,700 | 2.993 | 0.792 | 1.320 | 2.334 | 3.038 | 3.665 | 5.202 |
kl | 2,700 | 102,388.866 | 109,835.164 | 725.042 | 15,844.668 | 59,226.441 | 166,845.258 | 552,395.188 |
kp | 2,700 | 0.442 | 0.367 | 0.088 | 0.288 | 0.355 | 0.486 | 5.367 |
ky | 2,700 | 2.910 | 1.254 | 0.186 | 2.059 | 2.817 | 3.476 | 11.308 |
TFP | 2,700 | 949.055 | 774.396 | 70.087 | 361.980 | 753.693 | 1,311.020 | 5,388.233 |
log_GDPpc_raw | 2,700 | 8.868 | 1.247 | 5.749 | 7.805 | 8.993 | 9.948 | 11.313 |
log_lp_raw | 2,700 | 9.797 | 1.179 | 6.899 | 8.789 | 9.963 | 10.849 | 11.939 |
log_ky_raw | 2,700 | 0.966 | 0.487 | -1.680 | 0.722 | 1.036 | 1.246 | 2.425 |
log_h_raw | 2,700 | 1.057 | 0.287 | 0.278 | 0.848 | 1.111 | 1.299 | 1.649 |
log_tfp_raw | 2,700 | 6.537 | 0.831 | 4.250 | 5.892 | 6.625 | 7.179 | 8.592 |
log_GDPpc | 2,700 | 8.868 | 1.241 | 6.134 | 7.808 | 8.992 | 9.953 | 11.368 |
log_lp | 2,700 | 9.797 | 1.174 | 7.050 | 8.799 | 9.971 | 10.851 | 11.984 |
log_ky | 2,700 | 0.966 | 0.467 | -1.807 | 0.735 | 1.034 | 1.243 | 2.024 |
log_h | 2,700 | 1.057 | 0.287 | 0.266 | 0.848 | 1.112 | 1.297 | 1.615 |
log_tfp | 2,700 | 6.537 | 0.824 | 4.451 | 5.876 | 6.630 | 7.182 | 8.595 |
club_log_lp | 2,600 | 3.760 | 2.372 | 1.000 | 2.000 | 3.000 | 5.000 | 10.000 |
finalclub_log_lp | 2,600 | 1.298 | 0.618 | 1.000 | 1.000 | 1.000 | 1.000 | 4.000 |
re_log_lp | 2,700 | 1.000 | 0.118 | 0.730 | 0.903 | 1.019 | 1.096 | 1.188 |
club_log_ky | 2,700 | 1.000 | 0.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
finalclub_log_ky | 2,700 | 1.000 | 0.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
re_log_ky | 2,700 | 1.000 | 0.498 | -2.441 | 0.766 | 1.066 | 1.274 | 2.175 |
club_log_h | 2,700 | 1.463 | 0.552 | 1.000 | 1.000 | 1.000 | 2.000 | 3.000 |
finalclub_log_h | 2,700 | 1.463 | 0.552 | 1.000 | 1.000 | 1.000 | 2.000 | 3.000 |
re_log_h | 2,700 | 1.000 | 0.267 | 0.280 | 0.822 | 1.055 | 1.219 | 1.532 |
club_log_tfp | 2,650 | 2.623 | 1.916 | 1.000 | 1.000 | 2.000 | 4.000 | 7.000 |
finalclub_log_tfp | 2,650 | 1.566 | 0.869 | 1.000 | 1.000 | 1.000 | 2.000 | 4.000 |
re_log_tfp | 2,700 | 1.000 | 0.125 | 0.661 | 0.897 | 1.009 | 1.096 | 1.310 |
var <- as.numeric(smp$log_lp[smp$year == "1990"])
hist(var, main="", xlab = "log_lp", col="red", right = FALSE, breaks= 25)
df <- smp
vars <- c("country", "year", "GDPpc", "year")
df <- df[df$year == "1990", ]
df <- df[, vars]
df <- droplevels(df[complete.cases(df), ])
if (nrow(df) <= 10) {
cat("Not enough data to generate table")
} else {
tab <- prepare_ext_obs_table(df, var = "GDPpc")
tab$kable_ret %>%
kable_styling()
}
country | year | year.1 | GDPpc |
---|---|---|---|
Switzerland | 1990 | 1990 | 37,503.441 |
United States | 1990 | 1990 | 36,621.109 |
Norway | 1990 | 1990 | 30,351.324 |
Canada | 1990 | 1990 | 30,233.379 |
Australia | 1990 | 1990 | 27,215.979 |
... | ... | ... | ... |
Uganda | 1990 | 1990 | 805.954 |
Myanmar | 1990 | 1990 | 799.932 |
Mali | 1990 | 1990 | 727.063 |
Yemen | 1990 | 1990 | 694.167 |
Mozambique | 1990 | 1990 | 526.026 |
df <- smp
df <- df[df$year == "1990", ]
prepare_by_group_bar_graph(df, "hi1990", "lp", mean, TRUE)$plot +
ylab("mean lp")
df <- smp
prepare_by_group_violin_graph(df, "region", "log_lp", TRUE)
df <- smp
prepare_trend_graph(df, "year", c("lp"))$plot
df <- smp
prepare_quantile_trend_graph(df, "year", c(0.05, 0.25, 0.5, 0.75, 0.95), "lp")$plot
log_lp_raw <- prepare_quantile_trend_graph(smp, "year", c(0.05, 0.25, 0.5, 0.75, 0.95), "log_lp_raw")$plot
log_lp_raw <- log_lp_raw +
theme_minimal() +
guides(color = guide_legend(reverse = TRUE)) +
scale_color_discrete(name = "Quantile") +
labs(x = "",
y = "Log of Labor Productivity")
Scale for 'colour' is already present. Adding another scale for 'colour', which
will replace the existing scale.
ggsave("quintiles_all_log_lp_raw.pdf", width = 6, height = 4)
Qre_log_lp_World <- prepare_quantile_trend_graph(smp, "year", c(0.05, 0.25, 0.5, 0.75, 0.95), "re_log_lp")$plot
Qre_log_lp_World +
theme_minimal() +
guides(color = guide_legend(reverse = TRUE)) +
scale_color_discrete(name = "Quantile") +
labs(x = "",
y = "Relative (log) labor productivity")
Scale for 'colour' is already present. Adding another scale for 'colour', which
will replace the existing scale.
ggsave("Qre_log_lp_World.pdf", width = 6, height = 4)
log_ky_raw <- prepare_quantile_trend_graph(smp, "year", c(0.05, 0.25, 0.5, 0.75, 0.95), "log_ky_raw")$plot
log_ky_raw <- log_ky_raw +
theme_minimal() +
guides(color = guide_legend(reverse = TRUE)) +
scale_color_discrete(name = "Quantile") +
labs(x = "",
y = "Log of Capital-Output Ratio")
Scale for 'colour' is already present. Adding another scale for 'colour', which
will replace the existing scale.
ggsave("quintiles_all_log_ky_raw.pdf", width = 6, height = 4)
log_h_raw <- prepare_quantile_trend_graph(smp, "year", c(0.05, 0.25, 0.5, 0.75, 0.95), "log_h_raw")$plot
log_h_raw <- log_h_raw +
theme_minimal() +
guides(color = guide_legend(reverse = TRUE)) +
scale_color_discrete(name = "Quantile") +
labs(x = "",
y = "Log of Human Capital")
Scale for 'colour' is already present. Adding another scale for 'colour', which
will replace the existing scale.
ggsave("quintiles_all_log_h_raw.pdf", width = 6, height = 4)
log_tfp_raw <- prepare_quantile_trend_graph(smp, "year", c(0.05, 0.25, 0.5, 0.75, 0.95), "log_tfp_raw")$plot
log_tfp_raw <- log_tfp_raw +
theme_minimal() +
guides(color = guide_legend(reverse = TRUE)) +
scale_color_discrete(name = "Quantile") +
labs(x = "",
y = "Log of Aggregate Efficiency")
Scale for 'colour' is already present. Adding another scale for 'colour', which
will replace the existing scale.
ggsave("quintiles_all_log_tfp_raw.pdf", width = 6, height = 4)
df <- smp
ret <- prepare_correlation_graph(df[, c(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40)])
df <- smp
df <- df[, c("country", "year", "log_lp", "log_GDPpc", "region", "pop")]
df <- df[complete.cases(df), ]
df$region <- as.factor(df$region)
set.seed(42)
df <- sample_n(df, 1000)
prepare_scatter_plot(df, "log_lp", "log_GDPpc", color = "region", size = "pop", loess = 1)
df <- smp
df <- df[, c("log_lp", "log_ky", "log_h", "log_tfp", "country", "year", "hi1990")]
df <- df[complete.cases(df), ]
df$hi1990 <- as.factor(df$hi1990)
df <- droplevels(df)
t <- prepare_regression_table(df, dvs = "log_lp", idvs = c("log_ky", "log_h", "log_tfp"), feffects = c("country", "year"), clusters = c("country", "year"), byvar = "hi1990", models = "ols")
length of NULL cannot be changedlength of NULL cannot be changedlength of NULL cannot be changed
HTML(t$table)
Dependent variable: | |||
log_lp | |||
Full Sample | no | yes | |
(1) | (2) | (3) | |
log_ky | 0.472*** | 0.483*** | 0.469*** |
(0.031) | (0.032) | (0.060) | |
log_h | 0.226* | 0.183 | 0.435*** |
(0.132) | (0.168) | (0.135) | |
log_tfp | 1.447*** | 1.503*** | 1.180*** |
(0.042) | (0.048) | (0.064) | |
Estimator | ols | ols | ols |
Fixed effects | country, year | country, year | country, year |
Std. errors clustered | country, year | country, year | country, year |
Observations | 2,700 | 2,050 | 650 |
R2 | 0.998 | 0.997 | 0.988 |
Adjusted R2 | 0.998 | 0.997 | 0.987 |
Note: | *p<0.1; **p<0.05; ***p<0.01 |
This Notebook has been automatically generated using the ExPanDaR package.