# Required Libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(sampling)
library(readxl)
Sample to be evaluated by the research team.
# Defining the parameters
n <- 256 # Sample size
confidence_level <- 0.90 # Confidence level
# Calculate the z-score for the 90% confidence interval
z_score <- qnorm(1 - (1 - confidence_level) / 2)
# Define the estimated proportion (p). If no specific proportion is available, use 0.5 for the worst-case scenario.
p <- 0.5
# Calculate the margin of error
margin_of_error <- z_score * sqrt((p * (1 - p)) / n)
# Convert the margin of error to a percentage
margin_of_error_percent <- margin_of_error * 100
# Display the result
cat("The margin of error for a sample of", n, "items with a", confidence_level * 100, "% confidence interval is:", margin_of_error_percent, "\n")
## The margin of error for a sample of 256 items with a 90 % confidence interval is: 5.140168
Sample to be evaluated by external evaluators
# Sample size
n <- 72
# Population size
N <- 264
# Estimated proportion (0.5 for conservative estimation)
p <- 0.5
# Z-value for a 90% confidence interval
Z <- 1.645
# Margin of error calculation
ME <- Z * sqrt((p * (1 - p) / n) * ((N - n) / (N - 1)))
# Convert the margin of error to a percentage
ME_percentage <- ME * 100
# Print the margin of error as a percentage
cat("The margin of error is:", ME_percentage, "%")
## The margin of error is: 8.282137 %
(72 requests-responses out of a total of 256) divided into 3 groups.
# Initial setup
set.seed(93485)
# Load the dataset from the location on your computer
ruta_archivo <- "C:/Users/Carlos/Desktop/Investigación IA/2-Base de datos/1-Database.xlsx"
datos <- read_xlsx(ruta_archivo)
# Total number of desired sample elements
total_elementos <- 72
# Get the number of unique strata
estratos <- unique(datos$Tema)
num_estratos <- length(estratos)
# Calculate the number of elements per stratum
n_por_estrato <- rep(total_elementos / num_estratos, num_estratos)
# Proportional stratified sampling
muestra <- strata(datos, stratanames = "Tema", size = n_por_estrato, method = "srswor")
datos_muestra <- getdata(datos, muestra)
# Assign random evaluators
num_evaluadores <- 3
evaluadores <- rep(c("Evaluador A", "Evaluador B", "Evaluador C"), length.out = nrow(datos_muestra))
datos_muestra <- datos_muestra %>%
mutate(evaluador = sample(evaluadores))
# Add an increment of 1 to the result in this column so that it matches the value in the original DB
datos_muestra <- datos_muestra %>%
mutate(ID_unit = ID_unit + 1)
# Define the path to save the sample file
ruta_muestra <- "C:/Users/Carlos/Desktop/Investigación IA/2-Base de datos/original_randomized_sample.csv"
write.csv(datos_muestra, ruta_muestra, row.names = FALSE, fileEncoding = "UTF-8")
# Display the first rows of the sample
head(datos_muestra) %>%
select(-Pregunta, -Respuesta)
## Asignatura Precisión Relevancia Coherencia Id_unit Tema ID_unit
## 4 Bioquímica 5 5 5 5 Aminoácidos 5
## 7 Bioquímica 4 5 5 8 Aminoácidos 8
## 8 Bioquímica 5 5 5 9 Aminoácidos 9
## 12 Bioquímica 5 5 5 13 Monosacáridos 13
## 13 Bioquímica 5 5 5 14 Monosacáridos 14
## 19 Bioquímica 5 5 5 20 Monosacáridos 20
## Prob Stratum evaluador
## 4 0.2727273 1 Evaluador C
## 7 0.2727273 1 Evaluador B
## 8 0.2727273 1 Evaluador A
## 12 0.2727273 2 Evaluador B
## 13 0.2727273 2 Evaluador A
## 19 0.2727273 2 Evaluador A
"Once you have the CSV file, consider importing it to Excel using UTF-8 encoding and not opening it directly, as it might contain strange characters."
## [1] "Once you have the CSV file, consider importing it to Excel using UTF-8 encoding and not opening it directly, as it might contain strange characters."