# Load the readxl package library(readxl) # Replace "your_file.xlsx" with the path to your Excel file # Specify NA values as blank cells and 0 data <- read_excel("[..]//SM_ExpPat_HH_Expenditure.xlsx", na = c("",":", 0)) # View the first few rows of the imported data head(data) # Data frame contains columns CP04_2015 and CP045_2015 data$new_var <- data$CP04_2020 - data$CP045_2020 # Create variable "basics" as the sum of CP01_2020, CP02_2020, and CP04_2020 data$basics <- data$CP01_2020 + data$CP02_2020 + data$CP04_2020 library(ggplot2) # You want to analyze is named 'column_name' unique_characters <- unique(data$column_name) num_unique_characters <- length(unique_characters) print(num_unique_characters) print(unique_characters) # Load required libraries library(ggplot2) library(dplyr) # Define color palette color_palette <- c( "LU" = "firebrick", "AT" = "red", "BE" = "salmon", "BG" = "tomato", "CY" = "lightblue", "CZ" = "skyblue", "DE" = "darkblue", "DK" = "royalblue", "EA" = "deepskyblue", "EA12" = "black", "EA13" = "gray40", "EA17" = "gray60", "EA18" = "gray80", "EE" = "white", "EEA28" = "yellow", "EEA30_2007" = "yellow", "EFTA" = "darkgreen", "EL" = "green", "ES" = "limegreen", "EU15" = "palegreen", "EU25" = "purple", "EU27_2007" = "orange", "EU27_2020" = "orchid", "FI" = "cyan", "FR" = "magenta", "HR" = "violet", "HU" = "brown", "IE" = "tan", "IT" = "khaki", "LT" = "gold", "LV" = "purple", "ME" = "beige", "MK" = "navy", "MT" = "lavender", "NL" = "pink", "NO" = "maroon", "PL" = "coral", "PT" = "azure", "RO" = "ivory", "RS" = "aquamarine", "SE" = "blue", "SI" = "turquoise", "SK" = "pink", "TR" = "salmon", "UK" = "tomato", "XK" = "red" ) # Calculate "Food" data$Food <- data$CP01_2020 + data$CP02_2020 # Remove rows with missing data in CP045_2020 or Food data_clean <- data %>% filter(!is.na(CP045_2020) & !is.na(Food)) # Define variables to correlate variables_2020 <- c("Food") # Pearson test for Food and CP045_2020 pearson_test_2020 <- cor.test(data_clean$Food, data_clean$CP045_2020, method = "pearson") # Spearman test for Food and CP045_2020 spearman_test_2020 <- cor.test(data_clean$Food, data_clean$CP045_2020, method = "spearman") # Linear regression for Food and CP045_2020 lm_model_2020 <- lm(Food ~ CP045_2020, data = data_clean) # Predict estimate interval for Food when CP045_2020 is 100 prediction_interval <- predict(lm_model_2020, interval = "prediction", newdata = data.frame(CP045_2020 = 100)) # Print correlation coefficients and tests print(paste("Correlation coefficient (Pearson) for Food and CP045_2020:", pearson_test_2020$estimate)) print(paste("Correlation coefficient (Spearman) for Food and CP045_2020:", spearman_test_2020$estimate)) # Print Pearson test results print("Pearson Test Results for Food and CP045_2020:") print(pearson_test_2020) # Print Spearman test results print("Spearman Test Results for Food and CP045_2020:") print(spearman_test_2020) # Print regression summary print(summary(lm_model_2020)) # Print prediction interval print("Prediction Interval for Food when CP045_2020 = 100:") print(prediction_interval) # Fit linear regression model lm_model <- lm(Food ~ CP045_2020, data = data_clean) # Predict value of "Food" when "CP045_2020" is 100 predicted_value <- predict(lm_model, newdata = data.frame(CP045_2020 = 100)) # Calculate prediction interval for "Food" when "CP045_2020" is 100 prediction_interval <- predict(lm_model, newdata = data.frame(CP045_2020 = 100), interval = "prediction", level = 0.95) # Print predicted value and interval print("Predicted Value of Food when CP045_2020 is 100:") print(predicted_value) print("Prediction Interval for Food when CP045_2020 is 100 (95% confidence level):") print(prediction_interval) # Fit linear regression model lm_model <- lm(Food ~ CP045_2020, data = data_clean) # Print formula print("Linear Regression Formula:") print(summary(lm_model)$call) # Print R-squared value print("R-squared:") print(summary(lm_model)$r.squared) # Print p-value print("p-value:") print(summary(lm_model)$coefficients["CP045_2020", "Pr(>|t|)"]) # Create scatter plot for Food and CP045_2020 scatter_plot_2020 <- ggplot(data_clean, aes(x = CP045_2020, y = Food, color = V)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "Food", color = "Country") + ggtitle("Scatter Plot of Food vs CP045_2020") + theme_minimal() # Print scatter plot print(scatter_plot_2020) # Load required libraries library(ggplot2) # Define color palette color_palette <- c( A_GE3 = "firebrick", A_GE3_DCH = "red", A2 = "salmon", A2_CDH = "tomato", A1 = "lightblue", A1_DCH = "skyblue", DEG1 = "darkblue", DEG2 = "royalblue", DEG3 = "deepskyblue", QUINTILE1 = "black", QUINTILE2 = "gray40", QUINTILE3 = "gray60", QUINTILE4 = "gray80", QUINTILE5 = "white", UNK = "yellow", UNKUNK_Italy = "yellow", Y_GE60 = "darkgreen", Y45_59 = "green", Y30_44 = "limegreen", Y_LT30 = "palegreen" ) # Calculate "Food" data$Food <- data$CP01_2020 + data$CP02_2020 # Define variables to correlate variables_2020 <- c("Food") # Pearson test for Food and CP045_2020 pearson_test_2020 <- cor.test(data$Food, data$CP045_2020, method = "pearson") # Spearman test for Food and CP045_2020 spearman_test_2020 <- cor.test(data$Food, data$CP045_2020, method = "spearman") # Linear regression for Food and CP045_2020 lm_model_2020 <- lm(data$Food ~ data$CP045_2020) # Predict estimate interval for Food when CP045_2020 is 100 prediction_interval <- predict(lm_model_2020, interval = "prediction", newdata = data.frame(CP045_2020 = 100)) # Print correlation coefficients and tests print(paste("Correlation coefficient (Pearson) for Food and CP045_2020:", pearson_test_2020$estimate)) print(paste("Correlation coefficient (Spearman) for Food and CP045_2020:", spearman_test_2020$estimate)) # Print Pearson test results print("Pearson Test Results for Food and CP045_2020:") print(pearson_test_2020) # Print Spearman test results print("Spearman Test Results for Food and CP045_2020:") print(spearman_test_2020) # Print regression summary print(summary(lm_model_2020)) # Print prediction interval print("Prediction Interval for Food when CP045_2020 = 100:") print(prediction_interval) # Fit linear regression model lm_model <- lm(Food ~ CP045_2020, data = data) # Predict value of "Food" when "CP045_2020" is 100 predicted_value <- predict(lm_model, newdata = data.frame(CP045_2020 = 100)) # Calculate prediction interval for "Food" when "CP045_2020" is 100 prediction_interval <- predict(lm_model, newdata = data.frame(CP045_2020 = 100), interval = "prediction", level = 0.95) # Print predicted value and interval print("Predicted Value of Food when CP045_2020 is 100:") print(predicted_value) print("Prediction Interval for Food when CP045_2020 is 100 (95% confidence level):") print(prediction_interval) # Fit linear regression model lm_model <- lm(Food ~ CP045_2020, data = data) # Print formula print("Linear Regression Formula:") print(summary(lm_model)$call) # Print R-squared value print("R-squared:") print(summary(lm_model)$r.squared) # Print p-value print("p-value:") print(summary(lm_model)$coefficients["CP045_2020", "Pr(>|t|)"]) # Create scatter plot for Food and CP045_2020 scatter_plot_2020 <- ggplot(data, aes(x = CP045_2020, y = Food, color = A)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "Food", color = "A") + ggtitle("Scatter Plot of Food vs CP045_2020") + theme_minimal() # Print scatter plot print(scatter_plot_2020) # Create scatter plot for Food and CP045_2020 scatter_plot_2020 <- ggplot(data, aes(x = CP045_2020, y = Food, color = V)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "Food", color = "Country") + ggtitle("Scatter Plot of Food vs CP045_2020") + theme_minimal() # Create normal probability plot normal_prob_plot <- qqnorm(residuals(lm_model_2020), main = "Normal Probability Plot of Residuals") qqline(residuals(lm_model_2020)) # Print scatter plot and normal probability plot print(scatter_plot_2020) print(normal_prob_plot) # Load required libraries library(ggplot2) library(dplyr) # Calculate the number of items classified by type "A" num_items_by_A <- data %>% filter(Food > 181.1764) %>% count(A) # Sort the counts in descending order and extract the top 5 categories top_5_categories <- num_items_by_A %>% arrange(desc(n)) %>% head(5) %>% pull(A) # Create a vector of colors, setting red for the top 5 categories and gray for the rest slice_colors <- ifelse(num_items_by_A$A %in% top_5_categories, "red", "gray") # Create a pie chart with custom colors pie(num_items_by_A$n, labels = num_items_by_A$A, main = "Number of Items by Type A (Above 181.1764)", col = slice_colors) # Load required libraries library(ggplot2) library(dplyr) # Calculate the number of items classified by country (variable V) num_items_by_country <- data %>% filter(Food > 181.1764) %>% count(V) # Sort the counts in descending order and extract the top 5 categories top_5_countries <- num_items_by_country %>% arrange(desc(n)) %>% head(5) %>% pull(V) # Create a vector of colors, setting red for the top 5 categories and gray for the rest slice_colors <- ifelse(num_items_by_country$V %in% top_5_countries, "red", "gray") # Create a pie chart with custom colors pie(num_items_by_country$n, labels = num_items_by_country$V, main = "Number of Items by Country (Above 181.1764)", col = slice_colors) # Load required libraries library(ggplot2) # Define color palette color_palette <- c( "LU" = "firebrick", "AT" = "red", "BE" = "salmon", "BG" = "tomato", "CY" = "lightblue", "CZ" = "skyblue", "DE" = "darkblue", "DK" = "royalblue", "EA" = "deepskyblue", "EA12" = "black", "EA13" = "gray40", "EA17" = "gray60", "EA18" = "gray80", "EE" = "white", "EEA28" = "yellow", "EEA30_2007" = "yellow", "EFTA" = "darkgreen", "EL" = "green", "ES" = "limegreen", "EU15" = "palegreen", "EU25" = "purple", "EU27_2007" = "orange", "EU27_2020" = "orchid", "FI" = "cyan", "FR" = "magenta", "HR" = "violet", "HU" = "brown", "IE" = "tan", "IT" = "khaki", "LT" = "gold", "LV" = "purple", "ME" = "beige", "MK" = "navy", "MT" = "lavender", "NL" = "pink", "NO" = "maroon", "PL" = "coral", "PT" = "azure", "RO" = "ivory", "RS" = "aquamarine", "SE" = "blue", "SI" = "turquoise", "SK" = "pink", "TR" = "salmon", "UK" = "tomato", "XK" = "red" ) # Define variables to correlate variables_2020 <- c("CP04_2020") # Pearson test for CP04_2020 and CP045_2020 pearson_test_2020 <- cor.test(data$CP04_2020, data$CP045_2020, method = "pearson") # Spearman test for CP04_2020 and CP045_2020 spearman_test_2020 <- cor.test(data$CP04_2020, data$CP045_2020, method = "spearman") # Linear regression for CP04_2020 and CP045_2020 lm_model_2020 <- lm(data$CP04_2020 ~ data$CP045_2020) # Predict estimate interval for CP04_2020 when CP045_2020 is 100 prediction_interval <- predict(lm_model_2020, interval = "prediction", newdata = data.frame(CP045_2020 = 100)) # Print correlation coefficients and tests print(paste("Correlation coefficient (Pearson) for CP04_2020 and CP045_2020:", pearson_test_2020$estimate)) print(paste("Correlation coefficient (Spearman) for CP04_2020 and CP045_2020:", spearman_test_2020$estimate)) # Print Pearson test results print("Pearson Test Results for CP04_2020 and CP045_2020:") print(pearson_test_2020) # Print Spearman test results print("Spearman Test Results for CP04_2020 and CP045_2020:") print(spearman_test_2020) # Print regression summary print(summary(lm_model_2020)) # Print prediction interval print("Prediction Interval for CP04_2020 when CP045_2020 = 100:") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP04_2020 ~ CP045_2020, data = data) # Predict value of "CP04_2020" when "CP045_2020" is 100 predicted_value <- predict(lm_model, newdata = data.frame(CP045_2020 = 100)) # Calculate prediction interval for "CP04_2020" when "CP045_2020" is 100 prediction_interval <- predict(lm_model, newdata = data.frame(CP045_2020 = 100), interval = "prediction", level = 0.95) # Print predicted value and interval print("Predicted Value of CP04_2020 when CP045_2020 is 100:") print(predicted_value) print("Prediction Interval for CP04_2020 when CP045_2020 is 100 (95% confidence level):") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP04_2020 ~ CP045_2020, data = data) # Print formula print("Linear Regression Formula:") print(summary(lm_model)$call) # Print R-squared value print("R-squared:") print(summary(lm_model)$r.squared) # Print p-value print("p-value:") print(summary(lm_model)$coefficients["CP045_2020", "Pr(>|t|)"]) # Create scatter plot for CP04_2020 and CP045_2020 scatter_plot_2020 <- ggplot(data, aes(x = CP045_2020, y = CP04_2020, color = V)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "CP04_2020", color = "Country") + ggtitle("Scatter Plot of CP04_2020 vs CP045_2020") + theme_minimal() # Print scatter plot print(scatter_plot_2020) # Load required libraries library(ggplot2) # Define color palette color_palette <- c( A_GE3 = "firebrick", A_GE3_DCH = "red", A2 = "salmon", A2_CDH = "tomato", A1 = "lightblue", A1_DCH = "skyblue", DEG1 = "darkblue", DEG2 = "royalblue", DEG3 = "deepskyblue", QUINTILE1 = "black", QUINTILE2 = "gray40", QUINTILE3 = "gray60", QUINTILE4 = "gray80", QUINTILE5 = "white", UNK = "yellow", UNKUNK_Italy = "yellow", Y_GE60 = "darkgreen", Y45_59 = "green", Y30_44 = "limegreen", Y_LT30 = "palegreen" ) # Define variables to correlate variables_2020 <- c("CP04_2020") # Pearson test for CP04_2020 and CP045_2020 pearson_test_2020 <- cor.test(data$CP04_2020, data$CP045_2020, method = "pearson") # Spearman test for CP04_2020 and CP045_2020 spearman_test_2020 <- cor.test(data$CP04_2020, data$CP045_2020, method = "spearman") # Linear regression for CP04_2020 and CP045_2020 lm_model_2020 <- lm(data$CP04_2020 ~ data$CP045_2020) # Predict estimate interval for CP04_2020 when CP045_2020 is 100 prediction_interval <- predict(lm_model_2020, interval = "prediction", newdata = data.frame(CP045_2020 = 100)) # Print correlation coefficients and tests print(paste("Correlation coefficient (Pearson) for CP04_2020 and CP045_2020:", pearson_test_2020$estimate)) print(paste("Correlation coefficient (Spearman) for CP04_2020 and CP045_2020:", spearman_test_2020$estimate)) # Print Pearson test results print("Pearson Test Results for CP04_2020 and CP045_2020:") print(pearson_test_2020) # Print Spearman test results print("Spearman Test Results for CP04_2020 and CP045_2020:") print(spearman_test_2020) # Print regression summary print(summary(lm_model_2020)) # Print prediction interval print("Prediction Interval for CP04_2020 when CP045_2020 = 100:") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP04_2020 ~ CP045_2020, data = data) # Predict value of "CP04_2020" when "CP045_2020" is 100 predicted_value <- predict(lm_model, newdata = data.frame(CP045_2020 = 100)) # Calculate prediction interval for "CP04_2020" when "CP045_2020" is 100 prediction_interval <- predict(lm_model, newdata = data.frame(CP045_2020 = 100), interval = "prediction", level = 0.95) # Print predicted value and interval print("Predicted Value of CP04_2020 when CP045_2020 is 100:") print(predicted_value) print("Prediction Interval for CP04_2020 when CP045_2020 is 100 (95% confidence level):") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP04_2020 ~ CP045_2020, data = data) # Print formula print("Linear Regression Formula:") print(summary(lm_model)$call) # Print R-squared value print("R-squared:") print(summary(lm_model)$r.squared) # Print p-value print("p-value:") print(summary(lm_model)$coefficients["CP045_2020", "Pr(>|t|)"]) # Create scatter plot for CP04_2020 and CP045_2020 scatter_plot_2020 <- ggplot(data, aes(x = CP045_2020, y = CP04_2020, color = A)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "CP04_2020", color = "A") + ggtitle("Scatter Plot of CP04_2020 vs CP045_2020") + theme_minimal() # Print scatter plot print(scatter_plot_2020) # Create scatter plot for CP04_2020 and CP045_2020 scatter_plot_2020 <- ggplot(data, aes(x = CP045_2020, y = CP04_2020, color = V)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "CP04_2020", color = "Country") + ggtitle("Scatter Plot of CP04_2020 vs CP045_2020") + theme_minimal() # Create normal probability plot normal_prob_plot <- qqnorm(residuals(lm_model_2020), main = "Normal Probability Plot of Residuals") qqline(residuals(lm_model_2020)) # Print scatter plot and normal probability plot print(scatter_plot_2020) print(normal_prob_plot) # Load required libraries library(ggplot2) library(dplyr) # Calculate the number of items classified by type "A" num_items_by_A <- data %>% filter(CP04_2020 > 196.3928) %>% count(A) # Sort the counts in descending order and extract the top 5 categories top_5_categories <- num_items_by_A %>% arrange(desc(n)) %>% head(5) %>% pull(A) # Create a vector of colors, setting red for the top 5 categories and gray for the rest slice_colors <- ifelse(num_items_by_A$A %in% top_5_categories, "red", "gray") # Create a pie chart with custom colors pie(num_items_by_A$n, labels = num_items_by_A$A, main = "Number of Items by Type A (Above 196.3928)", col = slice_colors) # Load required libraries library(ggplot2) library(dplyr) # Calculate the number of items classified by country (variable V) num_items_by_country <- data %>% filter(CP04_2020 > 196.3928) %>% count(V) # Sort the counts in descending order and extract the top 5 categories top_5_countries <- num_items_by_country %>% arrange(desc(n)) %>% head(5) %>% pull(V) # Create a vector of colors, setting red for the top 5 categories and gray for the rest slice_colors <- ifelse(num_items_by_country$V %in% top_5_countries, "red", "gray") # Create a pie chart with custom colors pie(num_items_by_country$n, labels = num_items_by_country$V, main = "Number of Items by Country (Above 196.3928)", col = slice_colors) # Load required libraries library(ggplot2) # Define color palette color_palette <- c( "LU" = "firebrick", "AT" = "red", "BE" = "salmon", "BG" = "tomato", "CY" = "lightblue", "CZ" = "skyblue", "DE" = "darkblue", "DK" = "royalblue", "EA" = "deepskyblue", "EA12" = "black", "EA13" = "gray40", "EA17" = "gray60", "EA18" = "gray80", "EE" = "white", "EEA28" = "yellow", "EEA30_2007" = "yellow", "EFTA" = "darkgreen", "EL" = "green", "ES" = "limegreen", "EU15" = "palegreen", "EU25" = "purple", "EU27_2007" = "orange", "EU27_2020" = "orchid", "FI" = "cyan", "FR" = "magenta", "HR" = "violet", "HU" = "brown", "IE" = "tan", "IT" = "khaki", "LT" = "gold", "LV" = "purple", "ME" = "beige", "MK" = "navy", "MT" = "lavender", "NL" = "pink", "NO" = "maroon", "PL" = "coral", "PT" = "azure", "RO" = "ivory", "RS" = "aquamarine", "SE" = "blue", "SI" = "turquoise", "SK" = "pink", "TR" = "salmon", "UK" = "tomato", "XK" = "red" ) # Pearson test for CP06_2020 and CP045_2020 pearson_test_2020 <- cor.test(data$CP06_2020, data$CP045_2020, method = "pearson") # Spearman test for CP06_2020 and CP045_2020 spearman_test_2020 <- cor.test(data$CP06_2020, data$CP045_2020, method = "spearman") # Linear regression for CP06_2020 and CP045_2020 lm_model_2020 <- lm(data$CP06_2020 ~ data$CP045_2020) # Predict estimate interval for CP06_2020 when CP045_2020 is 100 prediction_interval <- predict(lm_model_2020, interval = "prediction", newdata = data.frame(CP045_2020 = 100)) # Print correlation coefficients and tests print(paste("Correlation coefficient (Pearson) for CP06_2020 and CP045_2020:", pearson_test_2020$estimate)) print(paste("Correlation coefficient (Spearman) for CP06_2020 and CP045_2020:", spearman_test_2020$estimate)) # Print Pearson test results print("Pearson Test Results for CP06_2020 and CP045_2020:") print(pearson_test_2020) # Print Spearman test results print("Spearman Test Results for CP06_2020 and CP045_2020:") print(spearman_test_2020) # Print regression summary print(summary(lm_model_2020)) # Print prediction interval print("Prediction Interval for CP06_2020 when CP045_2020 = 100:") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP06_2020 ~ CP045_2020, data = data) # Predict value of "CP06_2020" when "CP045_2020" is 100 predicted_value <- predict(lm_model, newdata = data.frame(CP045_2020 = 100)) # Calculate prediction interval for "CP06_2020" when "CP045_2020" is 100 prediction_interval <- predict(lm_model, newdata = data.frame(CP045_2020 = 100), interval = "prediction", level = 0.95) # Print predicted value and interval print("Predicted Value of CP06_2020 when CP045_2020 is 100:") print(predicted_value) print("Prediction Interval for CP06_2020 when CP045_2020 is 100 (95% confidence level):") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP06_2020 ~ CP045_2020, data = data) # Print formula print("Linear Regression Formula:") print(summary(lm_model)$call) # Print R-squared value print("R-squared:") print(summary(lm_model)$r.squared) # Print p-value print("p-value:") print(summary(lm_model)$coefficients["CP045_2020", "Pr(>|t|)"]) # Create scatter plot for CP06_2020 and CP045_2020 scatter_plot_2020 <- ggplot(data, aes(x = CP045_2020, y = CP06_2020, color = V)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "CP06_2020", color = "Country") + ggtitle("Scatter Plot of CP06_2020 vs CP045_2020") + theme_minimal() # Print scatter plot print(scatter_plot_2020) # Load required libraries library(ggplot2) # Define color palette color_palette <- c( A_GE3 = "firebrick", A_GE3_DCH = "red", A2 = "salmon", A2_CDH = "tomato", A1 = "lightblue", A1_DCH = "skyblue", DEG1 = "darkblue", DEG2 = "royalblue", DEG3 = "deepskyblue", QUINTILE1 = "black", QUINTILE2 = "gray40", QUINTILE3 = "gray60", QUINTILE4 = "gray80", QUINTILE5 = "white", UNK = "yellow", UNKUNK_Italy = "yellow", Y_GE60 = "darkgreen", Y45_59 = "green", Y30_44 = "limegreen", Y_LT30 = "palegreen" ) # Pearson test for CP06_2020 and CP045_2020 pearson_test_2020 <- cor.test(data$CP06_2020, data$CP045_2020, method = "pearson") # Spearman test for CP06_2020 and CP045_2020 spearman_test_2020 <- cor.test(data$CP06_2020, data$CP045_2020, method = "spearman") # Linear regression for CP06_2020 and CP045_2020 lm_model_2020 <- lm(data$CP06_2020 ~ data$CP045_2020) # Predict estimate interval for CP06_2020 when CP045_2020 is 100 prediction_interval <- predict(lm_model_2020, interval = "prediction", newdata = data.frame(CP045_2020 = 100)) # Print correlation coefficients and tests print(paste("Correlation coefficient (Pearson) for CP06_2020 and CP045_2020:", pearson_test_2020$estimate)) print(paste("Correlation coefficient (Spearman) for CP06_2020 and CP045_2020:", spearman_test_2020$estimate)) # Print Pearson test results print("Pearson Test Results for CP06_2020 and CP045_2020:") print(pearson_test_2020) # Print Spearman test results print("Spearman Test Results for CP06_2020 and CP045_2020:") print(spearman_test_2020) # Print regression summary print(summary(lm_model_2020)) # Print prediction interval print("Prediction Interval for CP06_2020 when CP045_2020 = 100:") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP06_2020 ~ CP045_2020, data = data) # Predict value of "CP06_2020" when "CP045_2020" is 100 predicted_value <- predict(lm_model, newdata = data.frame(CP045_2020 = 100)) # Calculate prediction interval for "CP06_2020" when "CP045_2020" is 100 prediction_interval <- predict(lm_model, newdata = data.frame(CP045_2020 = 100), interval = "prediction", level = 0.95) # Print predicted value and interval print("Predicted Value of CP06_2020 when CP045_2020 is 100:") print(predicted_value) print("Prediction Interval for CP06_2020 when CP045_2020 is 100 (95% confidence level):") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP06_2020 ~ CP045_2020, data = data) # Print formula print("Linear Regression Formula:") print(summary(lm_model)$call) # Print R-squared value print("R-squared:") print(summary(lm_model)$r.squared) # Print p-value print("p-value:") print(summary(lm_model)$coefficients["CP045_2020", "Pr(>|t|)"]) # Create scatter plot for CP06_2020 and CP045_2020 scatter_plot_2020 <- ggplot(data, aes(x = CP045_2020, y = CP06_2020, color = A)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "CP06_2020", color = "A") + ggtitle("Scatter Plot of CP06_2020 vs CP045_2020") + theme_minimal() # Print scatter plot print(scatter_plot_2020) # Create scatter plot for CP06_2020 and CP045_2020 scatter_plot_2020 <- ggplot(data, aes(x = CP045_2020, y = CP06_2020, color = V)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "CP06_2020", color = "Country") + ggtitle("Scatter Plot of CP06_2020 vs CP045_2020") + theme_minimal() # Create normal probability plot normal_prob_plot <- qqnorm(residuals(lm_model_2020), main = "Normal Probability Plot of Residuals") qqline(residuals(lm_model_2020)) # Print scatter plot and normal probability plot print(scatter_plot_2020) print(normal_prob_plot) # Load required libraries library(ggplot2) library(dplyr) # Calculate the number of items classified by type "A" num_items_by_A <- data %>% filter(CP06_2020 > 43.72451) %>% count(A) # Sort the counts in descending order and extract the top 5 categories top_5_categories <- num_items_by_A %>% arrange(desc(n)) %>% head(5) %>% pull(A) # Create a vector of colors, setting red for the top 5 categories and gray for the rest slice_colors <- ifelse(num_items_by_A$A %in% top_5_categories, "red", "gray") # Create a pie chart with custom colors pie(num_items_by_A$n, labels = num_items_by_A$A, main = "Number of Items by Type A (Above 43.72451)", col = slice_colors) # Load required libraries library(ggplot2) library(dplyr) # Calculate the number of items classified by country (variable V) num_items_by_country <- data %>% filter(CP06_2020 > 43.72451) %>% count(V) # Sort the counts in descending order and extract the top 5 categories top_5_countries <- num_items_by_country %>% arrange(desc(n)) %>% head(5) %>% pull(V) # Create a vector of colors, setting red for the top 5 categories and gray for the rest slice_colors <- ifelse(num_items_by_country$V %in% top_5_countries, "red", "gray") # Create a pie chart with custom colors pie(num_items_by_country$n, labels = num_items_by_country$V, main = "Number of Items by Country (Above 43.72451)", col = slice_colors) # Load required libraries library(ggplot2) # Define color palette color_palette <- c( "LU" = "firebrick", "AT" = "red", "BE" = "salmon", "BG" = "tomato", "CY" = "lightblue", "CZ" = "skyblue", "DE" = "darkblue", "DK" = "royalblue", "EA" = "deepskyblue", "EA12" = "black", "EA13" = "gray40", "EA17" = "gray60", "EA18" = "gray80", "EE" = "white", "EEA28" = "yellow", "EEA30_2007" = "yellow", "EFTA" = "darkgreen", "EL" = "green", "ES" = "limegreen", "EU15" = "palegreen", "EU25" = "purple", "EU27_2007" = "orange", "EU27_2020" = "orchid", "FI" = "cyan", "FR" = "magenta", "HR" = "violet", "HU" = "brown", "IE" = "tan", "IT" = "khaki", "LT" = "gold", "LV" = "purple", "ME" = "beige", "MK" = "navy", "MT" = "lavender", "NL" = "pink", "NO" = "maroon", "PL" = "coral", "PT" = "azure", "RO" = "ivory", "RS" = "aquamarine", "SE" = "blue", "SI" = "turquoise", "SK" = "pink", "TR" = "salmon", "UK" = "tomato", "XK" = "red" ) # Pearson test for CP10_2020 and CP045_2020 pearson_test_2020 <- cor.test(data$CP10_2020, data$CP045_2020, method = "pearson") # Spearman test for CP10_2020 and CP045_2020 spearman_test_2020 <- cor.test(data$CP10_2020, data$CP045_2020, method = "spearman") # Linear regression for CP10_2020 and CP045_2020 lm_model_2020 <- lm(data$CP10_2020 ~ data$CP045_2020) # Predict estimate interval for CP10_2020 when CP045_2020 is 100 prediction_interval <- predict(lm_model_2020, interval = "prediction", newdata = data.frame(CP045_2020 = 100)) # Print correlation coefficients and tests print(paste("Correlation coefficient (Pearson) for CP10_2020 and CP045_2020:", pearson_test_2020$estimate)) print(paste("Correlation coefficient (Spearman) for CP10_2020 and CP045_2020:", spearman_test_2020$estimate)) # Print Pearson test results print("Pearson Test Results for CP10_2020 and CP045_2020:") print(pearson_test_2020) # Print Spearman test results print("Spearman Test Results for CP10_2020 and CP045_2020:") print(spearman_test_2020) # Print regression summary print(summary(lm_model_2020)) # Print prediction interval print("Prediction Interval for CP10_2020 when CP045_2020 = 100:") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP10_2020 ~ CP045_2020, data = data) # Predict value of "CP10_2020" when "CP045_2020" is 100 predicted_value <- predict(lm_model, newdata = data.frame(CP045_2020 = 100)) # Calculate prediction interval for "CP10_2020" when "CP045_2020" is 100 prediction_interval <- predict(lm_model, newdata = data.frame(CP045_2020 = 100), interval = "prediction", level = 0.95) # Print predicted value and interval print("Predicted Value of CP10_2020 when CP045_2020 is 100:") print(predicted_value) print("Prediction Interval for CP10_2020 when CP045_2020 is 100 (95% confidence level):") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP10_2020 ~ CP045_2020, data = data) # Print formula print("Linear Regression Formula:") print(summary(lm_model)$call) # Print R-squared value print("R-squared:") print(summary(lm_model)$r.squared) # Print p-value print("p-value:") print(summary(lm_model)$coefficients["CP045_2020", "Pr(>|t|)"]) # Create scatter plot for CP10_2020 and CP045_2020 scatter_plot_2020 <- ggplot(data, aes(x = CP045_2020, y = CP10_2020, color = V)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "CP10_2020", color = "Country") + ggtitle("Scatter Plot of CP10_2020 vs CP045_2020") + theme_minimal() # Print scatter plot print(scatter_plot_2020) # Load required libraries library(ggplot2) # Define color palette color_palette <- c( A_GE3 = "firebrick", A_GE3_DCH = "red", A2 = "salmon", A2_CDH = "tomato", A1 = "lightblue", A1_DCH = "skyblue", DEG1 = "darkblue", DEG2 = "royalblue", DEG3 = "deepskyblue", QUINTILE1 = "black", QUINTILE2 = "gray40", QUINTILE3 = "gray60", QUINTILE4 = "gray80", QUINTILE5 = "white", UNK = "yellow", UNKUNK_Italy = "yellow", Y_GE60 = "darkgreen", Y45_59 = "green", Y30_44 = "limegreen", Y_LT30 = "palegreen" ) # Pearson test for CP10_2020 and CP045_2020 pearson_test_2020 <- cor.test(data$CP10_2020, data$CP045_2020, method = "pearson") # Spearman test for CP10_2020 and CP045_2020 spearman_test_2020 <- cor.test(data$CP10_2020, data$CP045_2020, method = "spearman") # Linear regression for CP10_2020 and CP045_2020 lm_model_2020 <- lm(data$CP10_2020 ~ data$CP045_2020) # Predict estimate interval for CP10_2020 when CP045_2020 is 100 prediction_interval <- predict(lm_model_2020, interval = "prediction", newdata = data.frame(CP045_2020 = 100)) # Print correlation coefficients and tests print(paste("Correlation coefficient (Pearson) for CP10_2020 and CP045_2020:", pearson_test_2020$estimate)) print(paste("Correlation coefficient (Spearman) for CP10_2020 and CP045_2020:", spearman_test_2020$estimate)) # Print Pearson test results print("Pearson Test Results for CP10_2020 and CP045_2020:") print(pearson_test_2020) # Print Spearman test results print("Spearman Test Results for CP10_2020 and CP045_2020:") print(spearman_test_2020) # Print regression summary print(summary(lm_model_2020)) # Print prediction interval print("Prediction Interval for CP10_2020 when CP045_2020 = 100:") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP10_2020 ~ CP045_2020, data = data) # Predict value of "CP10_2020" when "CP045_2020" is 100 predicted_value <- predict(lm_model, newdata = data.frame(CP045_2020 = 100)) # Calculate prediction interval for "CP10_2020" when "CP045_2020" is 100 prediction_interval <- predict(lm_model, newdata = data.frame(CP045_2020 = 100), interval = "prediction", level = 0.95) # Print predicted value and interval print("Predicted Value of CP10_2020 when CP045_2020 is 100:") print(predicted_value) print("Prediction Interval for CP10_2020 when CP045_2020 is 100 (95% confidence level):") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP10_2020 ~ CP045_2020, data = data) # Print formula print("Linear Regression Formula:") print(summary(lm_model)$call) # Print R-squared value print("R-squared:") print(summary(lm_model)$r.squared) # Print p-value print("p-value:") print(summary(lm_model)$coefficients["CP045_2020", "Pr(>|t|)"]) # Create scatter plot for CP10_2020 and CP045_2020 scatter_plot_2020 <- ggplot(data, aes(x = CP045_2020, y = CP10_2020, color = A)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "CP10_2020", color = "A") + ggtitle("Scatter Plot of CP10_2020 vs CP045_2020") + theme_minimal() # Print scatter plot print(scatter_plot_2020) # Create scatter plot for CP10_2020 and CP045_2020 scatter_plot_2020 <- ggplot(data, aes(x = CP045_2020, y = CP10_2020, color = V)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "CP10_2020", color = "Country") + ggtitle("Scatter Plot of CP10_2020 vs CP045_2020") + theme_minimal() # Create normal probability plot normal_prob_plot <- qqnorm(residuals(lm_model_2020), main = "Normal Probability Plot of Residuals") qqline(residuals(lm_model_2020)) # Print scatter plot and normal probability plot print(scatter_plot_2020) print(normal_prob_plot) # Load required libraries library(ggplot2) library(dplyr) # Calculate the number of items classified by type "A" num_items_by_A <- data %>% filter(CP10_2020 < 5.147409) %>% count(A) # Sort the counts in descending order and extract the top 5 categories top_5_categories <- num_items_by_A %>% arrange(desc(n)) %>% head(5) %>% pull(A) # Create a vector of colors, setting red for the top 5 categories and gray for the rest slice_colors <- ifelse(num_items_by_A$A %in% top_5_categories, "red", "gray") # Create a pie chart with custom colors pie(num_items_by_A$n, labels = num_items_by_A$A, main = "Number of Items by Type A (Below 5.147409 )", col = slice_colors) # Load required libraries library(ggplot2) library(dplyr) # Calculate the number of items classified by country (variable V) num_items_by_country <- data %>% filter(CP10_2020 < 5.147409 ) %>% count(V) # Sort the counts in descending order and extract the top 5 categories top_5_countries <- num_items_by_country %>% arrange(desc(n)) %>% head(5) %>% pull(V) # Create a vector of colors, setting red for the top 5 categories and gray for the rest slice_colors <- ifelse(num_items_by_country$V %in% top_5_countries, "red", "gray") # Create a pie chart with custom colors pie(num_items_by_country$n, labels = num_items_by_country$V, main = "Number of Items by Country (Below 5.147409 )", col = slice_colors) # Load required libraries library(ggplot2) # Define color palette color_palette <- c( "LU" = "firebrick", "AT" = "red", "BE" = "salmon", "BG" = "tomato", "CY" = "lightblue", "CZ" = "skyblue", "DE" = "darkblue", "DK" = "royalblue", "EA" = "deepskyblue", "EA12" = "black", "EA13" = "gray40", "EA17" = "gray60", "EA18" = "gray80", "EE" = "white", "EEA28" = "yellow", "EEA30_2007" = "yellow", "EFTA" = "darkgreen", "EL" = "green", "ES" = "limegreen", "EU15" = "palegreen", "EU25" = "purple", "EU27_2007" = "orange", "EU27_2020" = "orchid", "FI" = "cyan", "FR" = "magenta", "HR" = "violet", "HU" = "brown", "IE" = "tan", "IT" = "khaki", "LT" = "gold", "LV" = "purple", "ME" = "beige", "MK" = "navy", "MT" = "lavender", "NL" = "pink", "NO" = "maroon", "PL" = "coral", "PT" = "azure", "RO" = "ivory", "RS" = "aquamarine", "SE" = "blue", "SI" = "turquoise", "SK" = "pink", "TR" = "salmon", "UK" = "tomato", "XK" = "red" ) # Pearson test for CP10_2020 and CP045_2020 pearson_test_2020 <- cor.test(children_subset$CP10_2020, children_subset$CP045_2020, method = "pearson") # Spearman test for CP10_2020 and CP045_2020 spearman_test_2020 <- cor.test(children_subset$CP10_2020, children_subset$CP045_2020, method = "spearman") # Linear regression for CP10_2020 and CP045_2020 lm_model_2020 <- lm(children_subset$CP10_2020 ~ children_subset$CP045_2020) # Predict estimate interval for CP10_2020 when CP045_2020 is 100 prediction_interval <- predict(lm_model_2020, interval = "prediction", newdata = children_subset) # Print correlation coefficients and tests print(paste("Correlation coefficient (Pearson) for CP10_2020 and CP045_2020:", pearson_test_2020$estimate)) print(paste("Correlation coefficient (Spearman) for CP10_2020 and CP045_2020:", spearman_test_2020$estimate)) # Print Pearson test results print("Pearson Test Results for CP10_2020 and CP045_2020:") print(pearson_test_2020) # Print Spearman test results print("Spearman Test Results for CP10_2020 and CP045_2020:") print(spearman_test_2020) # Print regression summary print(summary(lm_model_2020)) # Print prediction interval print("Prediction Interval for CP10_2020 when CP045_2020 = 100:") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP10_2020 ~ CP045_2020, data = children_subset) # Predict value of "CP10_2020" when "CP045_2020" is 100 predicted_value <- predict(lm_model, newdata = children_subset) # Calculate prediction interval for "CP10_2020" when "CP045_2020" is 100 prediction_interval <- predict(lm_model, newdata = children_subset, interval = "prediction", level = 0.95) # Print predicted value and interval print("Predicted Value of CP10_2020 when CP045_2020 is 100:") print(predicted_value) print("Prediction Interval for CP10_2020 when CP045_2020 is 100 (95% confidence level):") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP10_2020 ~ CP045_2020, data = children_subset) # Print formula print("Linear Regression Formula:") print(summary(lm_model)$call) # Print R-squared value print("R-squared:") print(summary(lm_model)$r.squared) # Print p-value print("p-value:") print(summary(lm_model)$coefficients["CP045_2020", "Pr(>|t|)"]) # Create scatter plot for CP10_2020 and CP045_2020 scatter_plot_2020 <- ggplot(children_subset, aes(x = CP045_2020, y = CP10_2020, color = V)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "CP10_2020", color = "Country") + ggtitle("Scatter Plot of CP10_2020 vs CP045_2020") + theme_minimal() # Print scatter plot print(scatter_plot_2020) # Load required libraries library(ggplot2) # Define color palette color_palette <- c( A_GE3 = "firebrick", A_GE3_DCH = "red", A2 = "salmon", A2_CDH = "tomato", A1 = "lightblue", A1_DCH = "skyblue", DEG1 = "darkblue", DEG2 = "royalblue", DEG3 = "deepskyblue", QUINTILE1 = "black", QUINTILE2 = "gray40", QUINTILE3 = "gray60", QUINTILE4 = "gray80", QUINTILE5 = "white", UNK = "yellow", UNKUNK_Italy = "yellow", Y_GE60 = "darkgreen", Y45_59 = "green", Y30_44 = "limegreen", Y_LT30 = "palegreen" ) # Pearson test for CP10_2020 and CP045_2020 pearson_test_2020 <- cor.test(children_subset$CP10_2020, children_subset$CP045_2020, method = "pearson") # Spearman test for CP10_2020 and CP045_2020 spearman_test_2020 <- cor.test(children_subset$CP10_2020, children_subset$CP045_2020, method = "spearman") # Linear regression for CP10_2020 and CP045_2020 lm_model_2020 <- lm(children_subset$CP10_2020 ~ children_subset$CP045_2020) # Predict estimate interval for CP10_2020 when CP045_2020 is 100 prediction_interval <- predict(lm_model_2020, interval = "prediction", newdata = children_subset) # Print correlation coefficients and tests print(paste("Correlation coefficient (Pearson) for CP10_2020 and CP045_2020:", pearson_test_2020$estimate)) print(paste("Correlation coefficient (Spearman) for CP10_2020 and CP045_2020:", spearman_test_2020$estimate)) # Print Pearson test results print("Pearson Test Results for CP10_2020 and CP045_2020:") print(pearson_test_2020) # Print Spearman test results print("Spearman Test Results for CP10_2020 and CP045_2020:") print(spearman_test_2020) # Print regression summary print(summary(lm_model_2020)) # Print prediction interval print("Prediction Interval for CP10_2020 when CP045_2020 = 100:") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP10_2020 ~ CP045_2020, data = children_subset) # Predict value of "CP10_2020" when "CP045_2020" is 100 predicted_value <- predict(lm_model, newdata = children_subset) # Calculate prediction interval for "CP10_2020" when "CP045_2020" is 100 prediction_interval <- predict(lm_model, newdata = children_subset, interval = "prediction", level = 0.95) # Print predicted value and interval print("Predicted Value of CP10_2020 when CP045_2020 is 100:") print(predicted_value) print("Prediction Interval for CP10_2020 when CP045_2020 is 100 (95% confidence level):") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP10_2020 ~ CP045_2020, data = children_subset) # Print formula print("Linear Regression Formula:") print(summary(lm_model)$call) # Print R-squared value print("R-squared:") print(summary(lm_model)$r.squared) # Print p-value print("p-value:") print(summary(lm_model)$coefficients["CP045_2020", "Pr(>|t|)"]) # Create scatter plot for CP10_2020 and CP045_2020 scatter_plot_2020 <- ggplot(children_subset, aes(x = CP045_2020, y = CP10_2020, color = A)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "CP10_2020", color = "A") + ggtitle("Scatter Plot of CP10_2020 vs CP045_2020") + theme_minimal() # Print scatter plot print(scatter_plot_2020) # Create scatter plot for CP10_2020 and CP045_2020 scatter_plot_2020 <- ggplot(children_subset, aes(x = CP045_2020, y = CP10_2020, color = V)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "CP10_2020", color = "Country") + ggtitle("Scatter Plot of CP10_2020 vs CP045_2020") + theme_minimal() # Create normal probability plot normal_prob_plot <- qqnorm(residuals(lm_model_2020), main = "Normal Probability Plot of Residuals") qqline(residuals(lm_model_2020)) # Print scatter plot and normal probability plot print(scatter_plot_2020) print(normal_prob_plot) # Load required libraries library(ggplot2) library(dplyr) # Calculate the number of items classified by type "A" num_items_by_A <- children_subset %>% filter(CP10_2020 < 6.713799) %>% count(A) # Sort the counts in descending order and extract the top 5 categories top_5_categories <- num_items_by_A %>% arrange(desc(n)) %>% head(5) %>% pull(A) # Create a vector of colors, setting red for the top 5 categories and gray for the rest slice_colors <- ifelse(num_items_by_A$A %in% top_5_categories, "red", "gray") # Create a pie chart with custom colors pie(num_items_by_A$n, labels = num_items_by_A$A, main = "Number of Items by Type A (Below 6.713799)", col = slice_colors) # Load required libraries library(ggplot2) library(dplyr) # Calculate the number of items classified by country (variable V) num_items_by_country <- children_subset %>% filter(CP10_2020 < 6.713799) %>% count(V) # Sort the counts in descending order and extract the top 5 categories top_5_countries <- num_items_by_country %>% arrange(desc(n)) %>% head(5) %>% pull(V) # Create a vector of colors, setting red for the top 5 categories and gray for the rest slice_colors <- ifelse(num_items_by_country$V %in% top_5_countries, "red", "gray") # Create a pie chart with custom colors pie(num_items_by_country$n, labels = num_items_by_country$V, main = "Number of Items by Country (Below 6.713799)", col = slice_colors) # Load required libraries library(ggplot2) # Define color palette color_palette <- c( "LU" = "firebrick", "AT" = "red", "BE" = "salmon", "BG" = "tomato", "CY" = "lightblue", "CZ" = "skyblue", "DE" = "darkblue", "DK" = "royalblue", "EA" = "deepskyblue", "EA12" = "black", "EA13" = "gray40", "EA17" = "gray60", "EA18" = "gray80", "EE" = "white", "EEA28" = "yellow", "EEA30_2007" = "yellow", "EFTA" = "darkgreen", "EL" = "green", "ES" = "limegreen", "EU15" = "palegreen", "EU25" = "purple", "EU27_2007" = "orange", "EU27_2020" = "orchid", "FI" = "cyan", "FR" = "magenta", "HR" = "violet", "HU" = "brown", "IE" = "tan", "IT" = "khaki", "LT" = "gold", "LV" = "purple", "ME" = "beige", "MK" = "navy", "MT" = "lavender", "NL" = "pink", "NO" = "maroon", "PL" = "coral", "PT" = "azure", "RO" = "ivory", "RS" = "aquamarine", "SE" = "blue", "SI" = "turquoise", "SK" = "pink", "TR" = "salmon", "UK" = "tomato", "XK" = "red" ) # Pearson test for CP10_BASIC_2020 and CP045_2020 pearson_test_2020 <- cor.test(data$CP10_BASIC_2020, data$CP045_2020, method = "pearson") # Spearman test for CP10_BASIC_2020 and CP045_2020 spearman_test_2020 <- cor.test(data$CP10_BASIC_2020, data$CP045_2020, method = "spearman") # Linear regression for CP10_BASIC_2020 and CP045_2020 lm_model_2020 <- lm(data$CP10_BASIC_2020 ~ data$CP045_2020) # Predict estimate interval for CP10_BASIC_2020 when CP045_2020 is 100 prediction_interval <- predict(lm_model_2020, interval = "prediction", newdata = data.frame(CP045_2020 = 100)) # Print correlation coefficients and tests print(paste("Correlation coefficient (Pearson) for CP10_BASIC_2020 and CP045_2020:", pearson_test_2020$estimate)) print(paste("Correlation coefficient (Spearman) for CP10_BASIC_2020 and CP045_2020:", spearman_test_2020$estimate)) # Print Pearson test results print("Pearson Test Results for CP10_BASIC_2020 and CP045_2020:") print(pearson_test_2020) # Print Spearman test results print("Spearman Test Results for CP10_BASIC_2020 and CP045_2020:") print(spearman_test_2020) # Print regression summary print(summary(lm_model_2020)) # Print prediction interval print("Prediction Interval for CP10_BASIC_2020 when CP045_2020 = 100:") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP10_BASIC_2020 ~ CP045_2020, data = data) # Predict value of "CP10_BASIC_2020" when "CP045_2020" is 100 predicted_value <- predict(lm_model, newdata = data.frame(CP045_2020 = 100)) # Calculate prediction interval for "CP10_BASIC_2020" when "CP045_2020" is 100 prediction_interval <- predict(lm_model, newdata = data.frame(CP045_2020 = 100), interval = "prediction", level = 0.95) # Print predicted value and interval print("Predicted Value of CP10_BASIC_2020 when CP045_2020 is 100:") print(predicted_value) print("Prediction Interval for CP10_BASIC_2020 when CP045_2020 is 100 (95% confidence level):") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP10_BASIC_2020 ~ CP045_2020, data = data) # Print formula print("Linear Regression Formula:") print(summary(lm_model)$call) # Print R-squared value print("R-squared:") print(summary(lm_model)$r.squared) # Print p-value print("p-value:") print(summary(lm_model)$coefficients["CP045_2020", "Pr(>|t|)"]) # Create scatter plot for CP10_BASIC_2020 and CP045_2020 scatter_plot_2020 <- ggplot(data, aes(x = CP045_2020, y = CP10_BASIC_2020, color = V)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "CP10_BASIC_2020", color = "Country") + ggtitle("Scatter Plot of CP10_BASIC_2020 vs CP045_2020") + theme_minimal() # Print scatter plot print(scatter_plot_2020) # Load required libraries library(ggplot2) # Define color palette color_palette <- c( A_GE3 = "firebrick", A_GE3_DCH = "red", A2 = "salmon", A2_CDH = "tomato", A1 = "lightblue", A1_DCH = "skyblue", DEG1 = "darkblue", DEG2 = "royalblue", DEG3 = "deepskyblue", QUINTILE1 = "black", QUINTILE2 = "gray40", QUINTILE3 = "gray60", QUINTILE4 = "gray80", QUINTILE5 = "white", UNK = "yellow", UNKUNK_Italy = "yellow", Y_GE60 = "darkgreen", Y45_59 = "green", Y30_44 = "limegreen", Y_LT30 = "palegreen" ) # Pearson test for CP10_BASIC_2020 and CP045_2020 pearson_test_2020 <- cor.test(data$CP10_BASIC_2020, data$CP045_2020, method = "pearson") # Spearman test for CP10_BASIC_2020 and CP045_2020 spearman_test_2020 <- cor.test(data$CP10_BASIC_2020, data$CP045_2020, method = "spearman") # Linear regression for CP10_BASIC_2020 and CP045_2020 lm_model_2020 <- lm(data$CP10_BASIC_2020 ~ data$CP045_2020) # Predict estimate interval for CP10_BASIC_2020 when CP045_2020 is 100 prediction_interval <- predict(lm_model_2020, interval = "prediction", newdata = data.frame(CP045_2020 = 100)) # Print correlation coefficients and tests print(paste("Correlation coefficient (Pearson) for CP10_BASIC_2020 and CP045_2020:", pearson_test_2020$estimate)) print(paste("Correlation coefficient (Spearman) for CP10_BASIC_2020 and CP045_2020:", spearman_test_2020$estimate)) # Print Pearson test results print("Pearson Test Results for CP10_BASIC_2020 and CP045_2020:") print(pearson_test_2020) # Print Spearman test results print("Spearman Test Results for CP10_BASIC_2020 and CP045_2020:") print(spearman_test_2020) # Print regression summary print(summary(lm_model_2020)) # Print prediction interval print("Prediction Interval for CP10_BASIC_2020 when CP045_2020 = 100:") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP10_BASIC_2020 ~ CP045_2020, data = data) # Predict value of "CP10_BASIC_2020" when "CP045_2020" is 100 predicted_value <- predict(lm_model, newdata = data.frame(CP045_2020 = 100)) # Calculate prediction interval for "CP10_BASIC_2020" when "CP045_2020" is 100 prediction_interval <- predict(lm_model, newdata = data.frame(CP045_2020 = 100), interval = "prediction", level = 0.95) # Print predicted value and interval print("Predicted Value of CP10_BASIC_2020 when CP045_2020 is 100:") print(predicted_value) print("Prediction Interval for CP10_BASIC_2020 when CP045_2020 is 100 (95% confidence level):") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP10_BASIC_2020 ~ CP045_2020, data = data) # Print formula print("Linear Regression Formula:") print(summary(lm_model)$call) # Print R-squared value print("R-squared:") print(summary(lm_model)$r.squared) # Print p-value print("p-value:") print(summary(lm_model)$coefficients["CP045_2020", "Pr(>|t|)"]) # Create scatter plot for CP10_BASIC_2020 and CP045_2020 scatter_plot_2020 <- ggplot(data, aes(x = CP045_2020, y = CP10_BASIC_2020, color = A)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "CP10_BASIC_2020", color = "A") + ggtitle("Scatter Plot of CP10_BASIC_2020 vs CP045_2020") + theme_minimal() # Print scatter plot print(scatter_plot_2020) # Create scatter plot for CP10_BASIC_2020 and CP045_2020 scatter_plot_2020 <- ggplot(data, aes(x = CP045_2020, y = CP10_BASIC_2020, color = V)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "CP10_BASIC_2020", color = "Country") + ggtitle("Scatter Plot of CP10_BASIC_2020 vs CP045_2020") + theme_minimal() # Create normal probability plot normal_prob_plot <- qqnorm(residuals(lm_model_2020), main = "Normal Probability Plot of Residuals") qqline(residuals(lm_model_2020)) # Print scatter plot and normal probability plot print(scatter_plot_2020) print(normal_prob_plot) # Load required libraries library(ggplot2) library(dplyr) # Calculate the number of items classified by type "A" num_items_by_A <- data %>% filter(CP10_BASIC_2020 < 3.128399) %>% count(A) # Sort the counts in descending order and extract the top 5 categories top_5_categories <- num_items_by_A %>% arrange(desc(n)) %>% head(5) %>% pull(A) # Create a vector of colors, setting red for the top 5 categories and gray for the rest slice_colors <- ifelse(num_items_by_A$A %in% top_5_categories, "red", "gray") # Create a pie chart with custom colors pie(num_items_by_A$n, labels = num_items_by_A$A, main = "Number of Items by Type A (Below 3.128399)", col = slice_colors) # Load required libraries library(ggplot2) library(dplyr) # Calculate the number of items classified by country (variable V) num_items_by_country <- data %>% filter(CP10_BASIC_2020 < 3.128399) %>% count(V) # Sort the counts in descending order and extract the top 5 categories top_5_countries <- num_items_by_country %>% arrange(desc(n)) %>% head(5) %>% pull(V) # Create a vector of colors, setting red for the top 5 categories and gray for the rest slice_colors <- ifelse(num_items_by_country$V %in% top_5_countries, "red", "gray") # Create a pie chart with custom colors pie(num_items_by_country$n, labels = num_items_by_country$V, main = "Number of Items by Country (Below 3.128399)", col = slice_colors) # Load required libraries library(ggplot2) # Define color palette color_palette <- c( "LU" = "firebrick", "AT" = "red", "BE" = "salmon", "BG" = "tomato", "CY" = "lightblue", "CZ" = "skyblue", "DE" = "darkblue", "DK" = "royalblue", "EA" = "deepskyblue", "EA12" = "black", "EA13" = "gray40", "EA17" = "gray60", "EA18" = "gray80", "EE" = "white", "EEA28" = "yellow", "EEA30_2007" = "yellow", "EFTA" = "darkgreen", "EL" = "green", "ES" = "limegreen", "EU15" = "palegreen", "EU25" = "purple", "EU27_2007" = "orange", "EU27_2020" = "orchid", "FI" = "cyan", "FR" = "magenta", "HR" = "violet", "HU" = "brown", "IE" = "tan", "IT" = "khaki", "LT" = "gold", "LV" = "purple", "ME" = "beige", "MK" = "navy", "MT" = "lavender", "NL" = "pink", "NO" = "maroon", "PL" = "coral", "PT" = "azure", "RO" = "ivory", "RS" = "aquamarine", "SE" = "blue", "SI" = "turquoise", "SK" = "pink", "TR" = "salmon", "UK" = "tomato", "XK" = "red" ) # Pearson test for CP07_transportserv_2020 and CP045_2020 pearson_test_2020 <- cor.test(data$CP07_transportserv_2020, data$CP045_2020, method = "pearson") # Spearman test for CP07_transportserv_2020 and CP045_2020 spearman_test_2020 <- cor.test(data$CP07_transportserv_2020, data$CP045_2020, method = "spearman") # Linear regression for CP07_transportserv_2020 and CP045_2020 lm_model_2020 <- lm(data$CP07_transportserv_2020 ~ data$CP045_2020) # Predict estimate interval for CP07_transportserv_2020 when CP045_2020 is 100 prediction_interval <- predict(lm_model_2020, interval = "prediction", newdata = data.frame(CP045_2020 = 100)) # Print correlation coefficients and tests print(paste("Correlation coefficient (Pearson) for CP07_transportserv_2020 and CP045_2020:", pearson_test_2020$estimate)) print(paste("Correlation coefficient (Spearman) for CP07_transportserv_2020 and CP045_2020:", spearman_test_2020$estimate)) # Print Pearson test results print("Pearson Test Results for CP07_transportserv_2020 and CP045_2020:") print(pearson_test_2020) # Print Spearman test results print("Spearman Test Results for CP07_transportserv_2020 and CP045_2020:") print(spearman_test_2020) # Print regression summary print(summary(lm_model_2020)) # Print prediction interval print("Prediction Interval for CP07_transportserv_2020 when CP045_2020 = 100:") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP07_transportserv_2020 ~ CP045_2020, data = data) # Predict value of "CP07_transportserv_2020" when "CP045_2020" is 100 predicted_value <- predict(lm_model, newdata = data.frame(CP045_2020 = 100)) # Calculate prediction interval for "CP07_transportserv_2020" when "CP045_2020" is 100 prediction_interval <- predict(lm_model, newdata = data.frame(CP045_2020 = 100), interval = "prediction", level = 0.95) # Print predicted value and interval print("Predicted Value of CP07_transportserv_2020 when CP045_2020 is 100:") print(predicted_value) print("Prediction Interval for CP07_transportserv_2020 when CP045_2020 is 100 (95% confidence level):") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP07_transportserv_2020 ~ CP045_2020, data = data) # Print formula print("Linear Regression Formula:") print(summary(lm_model)$call) # Print R-squared value print("R-squared:") print(summary(lm_model)$r.squared) # Print p-value print("p-value:") print(summary(lm_model)$coefficients["CP045_2020", "Pr(>|t|)"]) # Create scatter plot for CP07_transportserv_2020 and CP045_2020 scatter_plot_2020 <- ggplot(data, aes(x = CP045_2020, y = CP07_transportserv_2020, color = V)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "CP07_transportserv_2020", color = "Country") + ggtitle("Scatter Plot of CP07_transportserv_2020 vs CP045_2020") + theme_minimal() # Print scatter plot print(scatter_plot_2020) # Load required libraries library(ggplot2) # Define color palette color_palette <- c( A_GE3 = "firebrick", A_GE3_DCH = "red", A2 = "salmon", A2_CDH = "tomato", A1 = "lightblue", A1_DCH = "skyblue", DEG1 = "darkblue", DEG2 = "royalblue", DEG3 = "deepskyblue", QUINTILE1 = "black", QUINTILE2 = "gray40", QUINTILE3 = "gray60", QUINTILE4 = "gray80", QUINTILE5 = "white", UNK = "yellow", UNKUNK_Italy = "yellow", Y_GE60 = "darkgreen", Y45_59 = "green", Y30_44 = "limegreen", Y_LT30 = "palegreen" ) # Pearson test for CP07_transportserv_2020 and CP045_2020 pearson_test_2020 <- cor.test(data$CP07_transportserv_2020, data$CP045_2020, method = "pearson") # Spearman test for CP07_transportserv_2020 and CP045_2020 spearman_test_2020 <- cor.test(data$CP07_transportserv_2020, data$CP045_2020, method = "spearman") # Linear regression for CP07_transportserv_2020 and CP045_2020 lm_model_2020 <- lm(data$CP07_transportserv_2020 ~ data$CP045_2020) # Predict estimate interval for CP07_transportserv_2020 when CP045_2020 is 100 prediction_interval <- predict(lm_model_2020, interval = "prediction", newdata = data.frame(CP045_2020 = 100)) # Print correlation coefficients and tests print(paste("Correlation coefficient (Pearson) for CP07_transportserv_2020 and CP045_2020:", pearson_test_2020$estimate)) print(paste("Correlation coefficient (Spearman) for CP07_transportserv_2020 and CP045_2020:", spearman_test_2020$estimate)) # Print Pearson test results print("Pearson Test Results for CP07_transportserv_2020 and CP045_2020:") print(pearson_test_2020) # Print Spearman test results print("Spearman Test Results for CP07_transportserv_2020 and CP045_2020:") print(spearman_test_2020) # Print regression summary print(summary(lm_model_2020)) # Print prediction interval print("Prediction Interval for CP07_transportserv_2020 when CP045_2020 = 100:") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP07_transportserv_2020 ~ CP045_2020, data = data) # Predict value of "CP07_transportserv_2020" when "CP045_2020" is 100 predicted_value <- predict(lm_model, newdata = data.frame(CP045_2020 = 100)) # Calculate prediction interval for "CP07_transportserv_2020" when "CP045_2020" is 100 prediction_interval <- predict(lm_model, newdata = data.frame(CP045_2020 = 100), interval = "prediction", level = 0.95) # Print predicted value and interval print("Predicted Value of CP07_transportserv_2020 when CP045_2020 is 100:") print(predicted_value) print("Prediction Interval for CP07_transportserv_2020 when CP045_2020 is 100 (95% confidence level):") print(prediction_interval) # Fit linear regression model lm_model <- lm(CP07_transportserv_2020 ~ CP045_2020, data = data) # Print formula print("Linear Regression Formula:") print(summary(lm_model)$call) # Print R-squared value print("R-squared:") print(summary(lm_model)$r.squared) # Print p-value print("p-value:") print(summary(lm_model)$coefficients["CP045_2020", "Pr(>|t|)"]) # Create scatter plot for CP07_transportserv_2020 and CP045_2020 scatter_plot_2020 <- ggplot(data, aes(x = CP045_2020, y = CP07_transportserv_2020, color = A)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "CP07_transportserv_2020", color = "A") + ggtitle("Scatter Plot of CP07_transportserv_2020 vs CP045_2020") + theme_minimal() # Print scatter plot print(scatter_plot_2020) # Create scatter plot for CP07_transportserv_2020 and CP045_2020 scatter_plot_2020 <- ggplot(data, aes(x = CP045_2020, y = CP07_transportserv_2020, color = V)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "CP07_transportserv_2020", color = "Country") + ggtitle("Scatter Plot of CP07_transportserv_2020 vs CP045_2020") + theme_minimal() # Create normal probability plot normal_prob_plot <- qqnorm(residuals(lm_model_2020), main = "Normal Probability Plot of Residuals") qqline(residuals(lm_model_2020)) # Print scatter plot and normal probability plot print(scatter_plot_2020) print(normal_prob_plot) # Load required libraries library(ggplot2) library(dplyr) # Calculate the number of items classified by type "A" num_items_by_A <- data %>% filter(CP07_transportserv_2020 > 196.3928) %>% count(A) # Sort the counts in descending order and extract the top 5 categories top_5_categories <- num_items_by_A %>% arrange(desc(n)) %>% head(5) %>% pull(A) # Create a vector of colors, setting red for the top 5 categories and gray for the rest slice_colors <- ifelse(num_items_by_A$A %in% top_5_categories, "red", "gray") # Create a pie chart with custom colors pie(num_items_by_A$n, labels = num_items_by_A$A, main = "Number of Items by Type A (Above 181.1764)", col = slice_colors) # Load required libraries library(ggplot2) library(dplyr) # Calculate the number of items classified by country (variable V) num_items_by_country <- data %>% filter(CP07_transportserv_2020 > 196.3928) %>% count(V) # Sort the counts in descending order and extract the top 5 categories top_5_countries <- num_items_by_country %>% arrange(desc(n)) %>% head(5) %>% pull(V) # Create a vector of colors, setting red for the top 5 categories and gray for the rest slice_colors <- ifelse(num_items_by_country$V %in% top_5_countries, "red", "gray") # Create a pie chart with custom colors pie(num_items_by_country$n, labels = num_items_by_country$V, main = "Number of Items by Country (Above 181.1764)", col = slice_colors) # Load required libraries library(ggplot2) # Define color palette color_palette <- c( "LU" = "firebrick", "AT" = "red", "BE" = "salmon", "BG" = "tomato", "CY" = "lightblue", "CZ" = "skyblue", "DE" = "darkblue", "DK" = "royalblue", "EA" = "deepskyblue", "EA12" = "black", "EA13" = "gray40", "EA17" = "gray60", "EA18" = "gray80", "EE" = "white", "EEA28" = "yellow", "EEA30_2007" = "yellow", "EFTA" = "darkgreen", "EL" = "green", "ES" = "limegreen", "EU15" = "palegreen", "EU25" = "purple", "EU27_2007" = "orange", "EU27_2020" = "orchid", "FI" = "cyan", "FR" = "magenta", "HR" = "violet", "HU" = "brown", "IE" = "tan", "IT" = "khaki", "LT" = "gold", "LV" = "purple", "ME" = "beige", "MK" = "navy", "MT" = "lavender", "NL" = "pink", "NO" = "maroon", "PL" = "coral", "PT" = "azure", "RO" = "ivory", "RS" = "aquamarine", "SE" = "blue", "SI" = "turquoise", "SK" = "pink", "TR" = "salmon", "UK" = "tomato", "XK" = "red" ) # Define variables to correlate variables_2020 <- c("basics") # Pearson test for basics and CP045_2020 pearson_test_2020 <- cor.test(data$basics, data$CP045_2020, method = "pearson") # Spearman test for basics and CP045_2020 spearman_test_2020 <- cor.test(data$basics, data$CP045_2020, method = "spearman") # Linear regression for basics and CP045_2020 lm_model_2020 <- lm(data$basics ~ data$CP045_2020) # Predict estimate interval for basics when CP045_2020 is 100 prediction_interval <- predict(lm_model_2020, interval = "prediction", newdata = data.frame(CP045_2020 = 100)) # Print correlation coefficients and tests print(paste("Correlation coefficient (Pearson) for basics and CP045_2020:", pearson_test_2020$estimate)) print(paste("Correlation coefficient (Spearman) for basics and CP045_2020:", spearman_test_2020$estimate)) # Print Pearson test results print("Pearson Test Results for basics and CP045_2020:") print(pearson_test_2020) # Print Spearman test results print("Spearman Test Results for basics and CP045_2020:") print(spearman_test_2020) # Print regression summary print(summary(lm_model_2020)) # Print prediction interval print("Prediction Interval for basics when CP045_2020 = 100:") print(prediction_interval) # Fit linear regression model lm_model <- lm(basics ~ CP045_2020, data = data) # Predict value of "basics" when "CP045_2020" is 100 predicted_value <- predict(lm_model, newdata = data.frame(CP045_2020 = 100)) # Calculate prediction interval for "basics" when "CP045_2020" is 100 prediction_interval <- predict(lm_model, newdata = data.frame(CP045_2020 = 100), interval = "prediction", level = 0.95) # Print predicted value and interval print("Predicted Value of basics when CP045_2020 is 100:") print(predicted_value) print("Prediction Interval for basics when CP045_2020 is 100 (95% confidence level):") print(prediction_interval) # Fit linear regression model lm_model <- lm(basics ~ CP045_2020, data = data) # Print formula print("Linear Regression Formula:") print(summary(lm_model)$call) # Print R-squared value print("R-squared:") print(summary(lm_model)$r.squared) # Print p-value print("p-value:") print(summary(lm_model)$coefficients["CP045_2020", "Pr(>|t|)"]) # Create scatter plot for basics and CP045_2020 scatter_plot_2020 <- ggplot(data, aes(x = CP045_2020, y = basics, color = V)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "basics", color = "Country") + ggtitle("Scatter Plot of basics vs CP045_2020") + theme_minimal() # Print scatter plot print(scatter_plot_2020) # Load required libraries library(ggplot2) # Define color palette color_palette <- c( A_GE3 = "firebrick", A_GE3_DCH = "red", A2 = "salmon", A2_CDH = "tomato", A1 = "lightblue", A1_DCH = "skyblue", DEG1 = "darkblue", DEG2 = "royalblue", DEG3 = "deepskyblue", QUINTILE1 = "black", QUINTILE2 = "gray40", QUINTILE3 = "gray60", QUINTILE4 = "gray80", QUINTILE5 = "white", UNK = "yellow", UNKUNK_Italy = "yellow", Y_GE60 = "darkgreen", Y45_59 = "green", Y30_44 = "limegreen", Y_LT30 = "palegreen" ) # Define variables to correlate variables_2020 <- c("basics") # Pearson test for basics and CP045_2020 pearson_test_2020 <- cor.test(data$basics, data$CP045_2020, method = "pearson") # Spearman test for basics and CP045_2020 spearman_test_2020 <- cor.test(data$basics, data$CP045_2020, method = "spearman") # Linear regression for basics and CP045_2020 lm_model_2020 <- lm(data$basics ~ data$CP045_2020) # Predict estimate interval for basics when CP045_2020 is 100 prediction_interval <- predict(lm_model_2020, interval = "prediction", newdata = data.frame(CP045_2020 = 100)) # Print correlation coefficients and tests print(paste("Correlation coefficient (Pearson) for basics and CP045_2020:", pearson_test_2020$estimate)) print(paste("Correlation coefficient (Spearman) for basics and CP045_2020:", spearman_test_2020$estimate)) # Print Pearson test results print("Pearson Test Results for basics and CP045_2020:") print(pearson_test_2020) # Print Spearman test results print("Spearman Test Results for basics and CP045_2020:") print(spearman_test_2020) # Print regression summary print(summary(lm_model_2020)) # Print prediction interval print("Prediction Interval for basics when CP045_2020 = 100:") print(prediction_interval) # Fit linear regression model lm_model <- lm(basics ~ CP045_2020, data = data) # Predict value of "basics" when "CP045_2020" is 100 predicted_value <- predict(lm_model, newdata = data.frame(CP045_2020 = 100)) # Calculate prediction interval for "basics" when "CP045_2020" is 100 prediction_interval <- predict(lm_model, newdata = data.frame(CP045_2020 = 100), interval = "prediction", level = 0.95) # Print predicted value and interval print("Predicted Value of basics when CP045_2020 is 100:") print(predicted_value) print("Prediction Interval for basics when CP045_2020 is 100 (95% confidence level):") print(prediction_interval) # Fit linear regression model lm_model <- lm(basics ~ CP045_2020, data = data) # Print formula print("Linear Regression Formula:") print(summary(lm_model)$call) # Print R-squared value print("R-squared:") print(summary(lm_model)$r.squared) # Print p-value print("p-value:") print(summary(lm_model)$coefficients["CP045_2020", "Pr(>|t|)"]) # Create scatter plot for basics and CP045_2020 scatter_plot_2020 <- ggplot(data, aes(x = CP045_2020, y = basics, color = A)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "basics", color = "A") + ggtitle("Scatter Plot of basics vs CP045_2020") + theme_minimal() # Print scatter plot print(scatter_plot_2020) # Create scatter plot for basics and CP045_2020 scatter_plot_2020 <- ggplot(data, aes(x = CP045_2020, y = basics, color = V)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "basics", color = "Country") + ggtitle("Scatter Plot of basics vs CP045_2020") + theme_minimal() # Create normal probability plot normal_prob_plot <- qqnorm(residuals(lm_model_2020), main = "Normal Probability Plot of Residuals") qqline(residuals(lm_model_2020)) # Print scatter plot and normal probability plot print(scatter_plot_2020) print(normal_prob_plot) # Load required libraries library(ggplot2) # Define color palette color_palette <- c( A_GE3 = "firebrick", A_GE3_DCH = "red", A2 = "salmon", A2_CDH = "tomato", A1 = "lightblue", A1_DCH = "skyblue", DEG1 = "darkblue", DEG2 = "royalblue", DEG3 = "deepskyblue", QUINTILE1 = "black", QUINTILE2 = "gray40", QUINTILE3 = "gray60", QUINTILE4 = "gray80", QUINTILE5 = "white", UNK = "yellow", UNKUNK_Italy = "yellow", Y_GE60 = "darkgreen", Y45_59 = "green", Y30_44 = "limegreen", Y_LT30 = "palegreen" ) # Replace 0 with NA in NDI2020 data$NDI2020[data$NDI2020 == 0] <- NA # Ensure the columns used for color coding are factors with levels matching the color palette data$A <- factor(data$A, levels = names(color_palette)) data$V <- factor(data$V, levels = names(color_palette)) # Pearson test for NDI2020 and CP045_2020 pearson_test_2020 <- cor.test(data$NDI2020, data$CP045_2020, method = "pearson") # Spearman test for NDI2020 and CP045_2020 spearman_test_2020 <- cor.test(data$NDI2020, data$CP045_2020, method = "spearman") # Linear regression for NDI2020 and CP045_2020 lm_model_2020 <- lm(NDI2020 ~ CP045_2020, data = data) # Predict estimate interval for NDI2020 when CP045_2020 is 100 prediction_interval <- predict(lm_model_2020, interval = "prediction", newdata = data.frame(CP045_2020 = 100)) # Print correlation coefficients and tests print(paste("Correlation coefficient (Pearson) for NDI2020 and CP045_2020:", pearson_test_2020$estimate)) print(paste("Correlation coefficient (Spearman) for NDI2020 and CP045_2020:", spearman_test_2020$estimate)) # Print Pearson test results print("Pearson Test Results for NDI2020 and CP045_2020:") print(pearson_test_2020) # Print Spearman test results print("Spearman Test Results for NDI2020 and CP045_2020:") print(spearman_test_2020) # Print regression summary print(summary(lm_model_2020)) # Print prediction interval print("Prediction Interval for NDI2020 when CP045_2020 = 100:") print(prediction_interval) # Predict value of "NDI2020" when "CP045_2020" is 100 predicted_value <- predict(lm_model_2020, newdata = data.frame(CP045_2020 = 100)) # Calculate prediction interval for "NDI2020" when "CP045_2020" is 100 prediction_interval <- predict(lm_model_2020, newdata = data.frame(CP045_2020 = 100), interval = "prediction", level = 0.95) # Print predicted value and interval print("Predicted Value of NDI2020 when CP045_2020 is 100:") print(predicted_value) print("Prediction Interval for NDI2020 when CP045_2020 is 100 (95% confidence level):") print(prediction_interval) # Print formula print("Linear Regression Formula:") print(summary(lm_model_2020)$call) # Print R-squared value print("R-squared:") print(summary(lm_model_2020)$r.squared) # Print p-value print("p-value:") print(summary(lm_model_2020)$coefficients["CP045_2020", "Pr(>|t|)"]) # Create scatter plot for NDI2020 and CP045_2020 with color based on A scatter_plot_2020_A <- ggplot(data, aes(x = CP045_2020, y = NDI2020, color = A)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "NDI2020", color = "A") + ggtitle("Scatter Plot of NDI2020 vs CP045_2020") + theme_minimal() # Print scatter plot print(scatter_plot_2020_A) # Create scatter plot for NDI2020 and CP045_2020 with color based on V scatter_plot_2020_V <- ggplot(data, aes(x = CP045_2020, y = NDI2020, color = V)) + geom_point() + geom_vline(xintercept = 100, linetype = "dashed", color = "red") + scale_color_manual(values = color_palette) + labs(x = "CP045_2020", y = "NDI2020", color = "Country") + ggtitle("Scatter Plot of NDI2020 vs CP045_2020") + theme_minimal() # Print scatter plot print(scatter_plot_2020_V) # Create normal probability plot qqnorm(residuals(lm_model_2020), main = "Normal Probability Plot of Residuals") qqline(residuals(lm_model_2020)) # Create histogram for CP045_2020 in the whole database, classified by type "A" histogram_CP045_2020 <- ggplot(data, aes(x = CP045_2020, fill = A)) + geom_histogram(binwidth = 1, color = "black") + labs(x = "CP045_2020", y = "Frequency", title = "Histogram of CP045_2020 by Type A") + scale_fill_manual(values = color_palette_A) + # Set color palette geom_vline(xintercept = 100, linetype = "dashed", color = "red") + # Add vertical line at x = 100 theme_minimal() # Print histogram print(histogram_CP045_2020) # Create boxplot boxplot_CP045_2020 <- ggplot(data, aes(x = A, y = CP045_2020, fill = A)) + geom_boxplot() + labs(x = "Household Type", y = "Expenditure in Energy in 2020", title = "Boxplot of Energy Expenditure by Household Type") + scale_fill_manual(values = color_palette_A) + # Set color palette geom_hline(yintercept = 100, linetype = "dashed", color = "red") + # Add horizontal line at y = 100 theme_minimal() # Print boxplot print(boxplot_CP045_2020) # Create boxplot boxplot_CP045_2020 <- ggplot(data, aes(x = V, y = CP045_2020, fill = V)) + geom_boxplot() + labs(x = "Country", y = "Expenditure in Energy in 2020", title = "Boxplot of Energy Expenditure by Country") + scale_fill_manual(values = color_palette) + # Set color palette geom_hline(yintercept = 100, linetype = "dashed", color = "red") + # Add horizontal line at y = 100 theme_minimal() # Print boxplot print(boxplot_CP045_2020)