###Benchmark of Babies and Toddlers' Apps in the Italian context
##DataChildMap Project - University of Padova - FISSPA
#Juliana E. Raffaghelli - juliana.raffaghelli@unipd.it

# Load necessary libraries
library(readxl)
library(tidyverse)
library(ggplot2)
library(rstatix)

# Read the data from Excel file
data <- read_excel("path_to_your_file.xlsx")

# Data cleaning and preparation
data <- data %>%
  mutate(Download = str_replace_all(Download, " M +", "Mln"),
         Download = str_replace_all(Download, " Mln +", "Mln"),
         Download = str_replace_all(Download, "\\+", ""),
         Download = str_trim(Download),
         NumericDownloads = case_when(
           str_detect(Download, "Mln") ~ as.numeric(str_remove(Download, "Mln")) * 1e6,
           str_detect(Download, "k") ~ as.numeric(str_remove(Download, "k")) * 1e3,
           TRUE ~ as.numeric(NA)
         ),
         EducatorsEvaluation = rowMeans(select(., ValueLearn:Data_Ethics), na.rm = TRUE)
  )

# Convert 'Type of App' to a factor for plotting
data$`Type of App` <- as.factor(data$`Type of App`)

##Descriptive statistics and Graphs

# Load the necessary libraries
library(dplyr)
library(psych)
library(officer)
library(flextable)

# Read the data from Excel file
data <- read_excel("path_to_your_file.xlsx")

# Assuming all your numeric variables are from the second to the last column
numeric_data <- data[, 2:ncol(data)]

# Calculate descriptive statistics using the psych package
descriptive_stats <- describe(numeric_data)

# Select only the necessary statistics
selected_stats <- descriptive_stats %>% select(mean, sd, median, min, max)

# Convert to a data frame for viewing and export
descriptive_stats_df <- as.data.frame(t(selected_stats))

# Print the table
print(descriptive_stats_df)

# Create a flextable from the data frame
descriptive_stats_table <- flextable(descriptive_stats_df)

# Create a new Word document
doc <- read_docx()

# Add the flextable to the Word document
doc <- doc %>% body_add_flextable(descriptive_stats_table)

# Save the Word document
print(doc, target = "Descriptive_Statistics.docx")


# Bar chart - Total Downloads by Type of App
data %>%
  group_by(`Type of App`) %>%
  summarise(TotalDownloads = sum(NumericDownloads, na.rm = TRUE)) %>%
  ggplot(aes(x = `Type of App`, y = TotalDownloads)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  labs(title = "Total Downloads by Type of App", x = "Type of App", y = "Total Downloads")

# Bar chart - Total Public Evaluations by Type of App
data %>%
  group_by(`Type of App`) %>%
  summarise(TotalPublicEvals = sum(N_PublicEval, na.rm = TRUE)) %>%
  ggplot(aes(x = `Type of App`, y = TotalPublicEvals)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  labs(title = "Total Public Evaluations by Type of App", x = "Type of App", y = "Total Public Evaluations")

# Scatter plot - Downloads vs Public Evaluations
ggplot(data, aes(x = NumericDownloads, y = N_PublicEval)) +
  geom_point(alpha = 0.5) +
  theme_minimal() +
  labs(title = "Downloads vs Public Evaluations", x = "Downloads", y = "Public Evaluations")
# Boxplot - Public Evaluation scores vs Educators Evaluation
data_long <- data %>%
  select(Public_evalscore, EducatorsEvaluation) %>%
  pivot_longer(cols = c(Public_evalscore, EducatorsEvaluation),
               names_to = "EvaluationType",
               values_to = "Score")

ggplot(data_long, aes(x = EvaluationType, y = Score)) +
  geom_boxplot() +
  theme_minimal() +
  labs(title = "Public Evaluation Scores vs Educators Evaluation", x = "Type of Evaluation", y = "Scores")

# Boxplot - Comparison of Variables
data_long <- data %>%
  select(ValueLearn, ValueEd, InfoClear, Safety, Data_Ethics) %>%
  pivot_longer(cols = everything(),
               names_to = "Category",
               values_to = "Score")

ggplot(data_long, aes(x = Category, y = Score)) +
  geom_boxplot() +
  theme_minimal() +
  labs(title = "Comparison of Evaluation Scores by Category", x = "Evaluation Category", y = "Scores")

##Inferential statistics

# Install and load the psych package
install.packages("psych")
library(psych)

# Assuming your data frame is named 'data' and the relevant columns are from 'ValueLearn' to 'Data_Ethics'
scale_items <- data[,c("ValueLearn", "ValueEd", "InfoClear", "Safety", "Data_Ethics")]

# Calculate Cronbach's alpha
cronbachs_alpha <- alpha(scale_items)

# Print the result
print(cronbachs_alpha$overall$alpha)

# Wilcoxon Test
wilcox.test(data$EducatorsEvaluation, data$Public_evalscore, paired = TRUE)

# Friedman Test
friedman_test(data, ValueLearn, ValueEd, InfoClear, Safety, Data_Ethics)

# Post-hoc analysis with Bonferroni correction
post_hoc <- data %>%
  select(ValueLearn, ValueEd, InfoClear, Safety, Data_Ethics) %>%
  friedman_post_hoc(conover.test)


