TBA
suppressWarnings(suppressMessages({
library(knitr)
library(kableExtra)
library(htmltools)
library(tidyverse)
library(scales)
library(ExPanDaR)
}))
knitr::opts_chunk$set(fig.align = 'center')
This step reads the raw data provided by ExPanD()
and generates the sample for the analysis.
create_sample <- function(df, df_def) {
# Set infinite numerical variables to NA
df[, df_def$var_name[df_def$type == "numeric"]] <-
lapply(df[, df_def$var_name[df_def$type == "numeric"]],
function(x) ifelse(is.finite(x), x, NA))
# Delete numerical variables that only contain NAs
all_na_vars <- sapply(df, function (x) all(is.na(x)))
df_def <- df_def[!all_na_vars,]
df <- df[, df_def$var_name]
# Drop observations that are NA in variables that are not allowed to
df <- df[complete.cases(df[, df_def$var_name[which(df_def$can_be_na == FALSE)]]), ]
df <- droplevels(df)
return(list(df = df, df_def = df_def))
}
load("ExPanD_nb_data.Rdata")
smp_list <- create_sample(nb_df, nb_df_def)
smp <- smp_list$df
smp_def <- smp_list$df_def
df <- smp
df$year <- as.factor(df$year)
p <- ggplot(df, aes(x = year)) +
geom_bar() + labs(x = "year")
p <- p + scale_x_discrete(breaks = pretty(as.numeric(as.character(df$year)), n = 10))
p
df <- smp
prepare_missing_values_graph(df, "year")
df <- smp
t <- prepare_descriptive_table(smp)
t$kable_ret %>%
kable_styling("condensed", full_width = F, position = "center")
N | Mean | Std. dev. | Min. | 25 % | Median | 75 % | Max. | |
---|---|---|---|---|---|---|---|---|
Y | 2,050 | 352,201.530 | 1,102,853.740 | 2,777.000 | 19,869.499 | 57,618.500 | 265,002.500 | 16,810,260.000 |
K | 2,050 | 1,017,835.807 | 3,635,318.526 | 2,004.000 | 46,608.000 | 135,641.000 | 751,916.250 | 69,379,696.000 |
pop | 2,050 | 58.787 | 183.826 | 1.316 | 5.885 | 14.773 | 39.932 | 1,369.436 |
L | 2,050 | 25.637 | 92.429 | 0.565 | 2.235 | 5.186 | 14.301 | 798.368 |
s | 2,050 | 6.855 | 2.918 | 0.893 | 4.558 | 6.921 | 9.235 | 13.303 |
alpha_it | 1,600 | 0.467 | 0.121 | 0.134 | 0.394 | 0.469 | 0.553 | 0.836 |
GDPpc | 2,050 | 6,883.863 | 6,252.230 | 313.981 | 1,896.098 | 4,769.369 | 10,289.792 | 34,585.176 |
lp | 2,050 | 17,779.082 | 15,015.283 | 991.421 | 5,230.301 | 13,171.517 | 26,803.804 | 66,454.039 |
h | 2,050 | 2.775 | 0.752 | 1.320 | 2.177 | 2.752 | 3.368 | 4.613 |
kl | 2,050 | 53,204.908 | 57,725.995 | 725.042 | 10,905.244 | 33,394.666 | 78,021.984 | 383,185.594 |
kp | 2,050 | 0.486 | 0.409 | 0.088 | 0.302 | 0.388 | 0.530 | 5.367 |
ky | 2,050 | 2.728 | 1.300 | 0.186 | 1.886 | 2.577 | 3.308 | 11.308 |
TFP | 2,050 | 703.846 | 537.702 | 70.087 | 306.778 | 542.910 | 927.293 | 3,306.472 |
log_GDPpc_raw | 2,050 | 8.382 | 1.018 | 5.749 | 7.548 | 8.470 | 9.239 | 10.451 |
log_lp_raw | 2,050 | 9.357 | 1.005 | 6.899 | 8.562 | 9.486 | 10.196 | 11.104 |
log_ky_raw | 2,050 | 0.886 | 0.516 | -1.680 | 0.635 | 0.947 | 1.196 | 2.425 |
log_h_raw | 2,050 | 0.982 | 0.283 | 0.278 | 0.778 | 1.012 | 1.214 | 1.529 |
log_tfp_raw | 2,050 | 6.281 | 0.757 | 4.250 | 5.726 | 6.297 | 6.832 | 8.104 |
log_GDPpc | 2,050 | 8.382 | 1.010 | 6.134 | 7.563 | 8.452 | 9.224 | 10.476 |
log_lp | 2,050 | 9.357 | 0.997 | 7.050 | 8.597 | 9.476 | 10.195 | 11.141 |
log_ky | 2,050 | 0.886 | 0.495 | -1.807 | 0.641 | 0.943 | 1.187 | 2.024 |
log_h | 2,050 | 0.982 | 0.282 | 0.266 | 0.775 | 1.008 | 1.214 | 1.531 |
log_tfp | 2,050 | 6.281 | 0.748 | 4.451 | 5.704 | 6.300 | 6.832 | 8.125 |
club_log_lp | 2,025 | 2.370 | 1.427 | 1.000 | 1.000 | 2.000 | 4.000 | 6.000 |
finalclub_log_lp | 2,025 | 1.753 | 1.106 | 1.000 | 1.000 | 1.000 | 3.000 | 5.000 |
re_log_lp | 2,050 | 1.000 | 0.104 | 0.760 | 0.918 | 1.016 | 1.093 | 1.169 |
club_log_ky | 2,050 | 1.000 | 0.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
finalclub_log_ky | 2,050 | 1.000 | 0.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
re_log_ky | 2,050 | 1.000 | 0.590 | -2.936 | 0.737 | 1.057 | 1.300 | 2.509 |
club_log_h | 2,050 | 2.280 | 1.373 | 1.000 | 1.000 | 2.000 | 3.000 | 5.000 |
finalclub_log_h | 2,050 | 1.439 | 0.718 | 1.000 | 1.000 | 1.000 | 2.000 | 3.000 |
re_log_h | 2,050 | 1.000 | 0.282 | 0.306 | 0.778 | 1.013 | 1.243 | 1.573 |
club_log_tfp | 2,025 | 3.728 | 1.218 | 1.000 | 3.000 | 4.000 | 4.000 | 6.000 |
finalclub_log_tfp | 2,025 | 1.951 | 0.888 | 1.000 | 1.000 | 2.000 | 2.000 | 4.000 |
re_log_tfp | 2,050 | 1.000 | 0.118 | 0.683 | 0.910 | 1.012 | 1.090 | 1.247 |
var <- as.numeric(smp$log_lp[smp$year == "1990"])
hist(var, main="", xlab = "log_lp", col="red", right = FALSE, breaks= 25)
df <- smp
vars <- c("country", "year", "GDPpc", "year")
df <- df[df$year == "1990", ]
df <- df[, vars]
df <- droplevels(df[complete.cases(df), ])
if (nrow(df) <= 10) {
cat("Not enough data to generate table")
} else {
tab <- prepare_ext_obs_table(df, var = "GDPpc")
tab$kable_ret %>%
kable_styling()
}
country | year | year.1 | GDPpc |
---|---|---|---|
Czech Republic | 1990 | 1990 | 20,417.098 |
Russia | 1990 | 1990 | 18,293.967 |
Slovenia | 1990 | 1990 | 17,182.484 |
Slovak Republic | 1990 | 1990 | 16,293.812 |
Latvia | 1990 | 1990 | 14,869.078 |
... | ... | ... | ... |
Uganda | 1990 | 1990 | 805.954 |
Myanmar | 1990 | 1990 | 799.932 |
Mali | 1990 | 1990 | 727.063 |
Yemen | 1990 | 1990 | 694.167 |
Mozambique | 1990 | 1990 | 526.026 |
df <- smp
df <- df[df$year == "1990", ]
prepare_by_group_bar_graph(df, "region", "lp", mean, TRUE)$plot +
ylab("mean lp")
df <- smp
prepare_by_group_violin_graph(df, "region", "log_lp", TRUE)
df <- smp
prepare_trend_graph(df, "year", c("lp"))$plot
df <- smp
prepare_quantile_trend_graph(df, "year", c(0.05, 0.25, 0.5, 0.75, 0.95), "lp")$plot
log_lp_raw <- prepare_quantile_trend_graph(smp, "year", c(0.05, 0.25, 0.5, 0.75, 0.95), "log_lp_raw")$plot
log_lp_raw <- log_lp_raw +
theme_minimal() +
guides(color = guide_legend(reverse = TRUE)) +
scale_color_discrete(name = "Quantile") +
labs(x = "",
y = "Log of Labor Productivity")
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
ggsave("quintiles_hiNo_log_lp_raw.pdf", width = 6, height = 4)
log_ky_raw <- prepare_quantile_trend_graph(smp, "year", c(0.05, 0.25, 0.5, 0.75, 0.95), "log_ky_raw")$plot
log_ky_raw <- log_ky_raw +
theme_minimal() +
guides(color = guide_legend(reverse = TRUE)) +
scale_color_discrete(name = "Quantile") +
labs(x = "",
y = "Log of Capital-Output Ratio")
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
ggsave("quintiles_hiNo_log_ky_raw.pdf", width = 6, height = 4)
log_h_raw <- prepare_quantile_trend_graph(smp, "year", c(0.05, 0.25, 0.5, 0.75, 0.95), "log_h_raw")$plot
log_h_raw <- log_h_raw +
theme_minimal() +
guides(color = guide_legend(reverse = TRUE)) +
scale_color_discrete(name = "Quantile") +
labs(x = "",
y = "Log of Human Capital")
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
ggsave("quintiles_hiNo_log_h_raw.pdf", width = 6, height = 4)
log_tfp_raw <- prepare_quantile_trend_graph(smp, "year", c(0.05, 0.25, 0.5, 0.75, 0.95), "log_tfp_raw")$plot
log_tfp_raw <- log_tfp_raw +
theme_minimal() +
guides(color = guide_legend(reverse = TRUE)) +
scale_color_discrete(name = "Quantile") +
labs(x = "",
y = "Log of Aggregate Efficiency")
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
ggsave("quintiles_hiNo_log_tfp_raw.pdf", width = 6, height = 4)
df <- smp
ret <- prepare_correlation_graph(df[, c(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40)])
df <- smp
df <- df[, c("country", "year", "log_lp", "log_GDPpc", "region", "pop")]
df <- df[complete.cases(df), ]
df$region <- as.factor(df$region)
set.seed(42)
df <- sample_n(df, 1000)
prepare_scatter_plot(df, "log_lp", "log_GDPpc", color = "region", size = "pop", loess = 1)
df <- smp
df <- df[, c("log_lp", "log_ky", "log_h", "log_tfp", "country", "year")]
df <- df[complete.cases(df), ]
df <- droplevels(df)
t <- prepare_regression_table(df, dvs = "log_lp", idvs = c("log_ky", "log_h", "log_tfp"), feffects = c("country", "year"), clusters = c("country", "year"), models = "ols")
length of NULL cannot be changedlength of NULL cannot be changedlength of NULL cannot be changed
HTML(t$table)
Dependent variable: | |
log_lp | |
log_ky | 0.483*** |
(0.032) | |
log_h | 0.183 |
(0.168) | |
log_tfp | 1.503*** |
(0.048) | |
Estimator | ols |
Fixed effects | country, year |
Std. errors clustered | country, year |
Observations | 2,050 |
R2 | 0.997 |
Adjusted R2 | 0.997 |
Note: | *p<0.1; **p<0.05; ***p<0.01 |
This Notebook has been automatically generated using the ExPanDaR package.