Packages
Data
setwd('C:/Users/april/OneDrive/Desktop/Covid/from Dan') # File path will need to be updated
covid.df <- read.csv("covidseasonality_long.csv", stringsAsFactors = TRUE) # read data
covid.df$author <- as.factor(covid.df$study) # make author a factor
covid.df$month <- as.factor(covid.df$month) # make month a factor
229E
# Isolate virus
(virus.df <- covid.df[covid.df$virus.type == "229E", ])
## study site country virus.type month cases
## 1 Brittain-Long Gothenburg Sweden 229E 1 5
## 2 Brittain-Long Gothenburg Sweden 229E 2 4
## 3 Brittain-Long Gothenburg Sweden 229E 3 5
## 4 Brittain-Long Gothenburg Sweden 229E 4 3
## 5 Brittain-Long Gothenburg Sweden 229E 5 5
....
# Pull out complete data and same as df
complete.df <- virus.df %>% group_by(study) %>% filter(cases > 0) # filter out 0 case counts
complete.df <- complete.df %>% group_by(study) %>% add_tally() %>% filter(n >= 12) # filter out studies with <12 mo. of data
# Same for incomplete data
all.df <- virus.df %>% group_by(study) %>% add_tally() %>% filter(n >= 12)
incomplete.df <- setdiff(all.df, complete.df)
incomplete.df$cases <- incomplete.df$cases + 1 # add 1 to all cases to get rid of 0's
# Create a column with cases as a proportion for each study & df
complete.df <- complete.df %>% group_by(study) %>%
mutate(prop.study = (cases/sum(cases, na.rm = T))*100)
incomplete.df <- incomplete.df %>% group_by(study) %>%
mutate(prop.study = (cases/sum(cases, na.rm = T))*100)
# Join
total <- rbind(complete.df, incomplete.df)
# write to file
setwd('C:/Users/april/OneDrive/Desktop/Covid/from Dan')
write.csv(total, "229E_total.csv")
NL63
# Isolate virus
(virus.df <- covid.df[covid.df$virus.type == "NL63", ])
## study site country virus.type month cases
## 13 Brittain-Long Gothenburg Sweden NL63 1 10
## 14 Brittain-Long Gothenburg Sweden NL63 2 31
## 15 Brittain-Long Gothenburg Sweden NL63 3 18
## 16 Brittain-Long Gothenburg Sweden NL63 4 6
## 17 Brittain-Long Gothenburg Sweden NL63 5 8
....
# Pull out complete data and same as df
complete.df <- virus.df %>% group_by(study) %>% filter(cases > 0) # filter out 0 case counts
complete.df <- complete.df %>% group_by(study) %>% add_tally() %>% filter(n >= 12) # filter out studies with <12 mo. of data
# Same for incomplete data
all.df <- virus.df %>% group_by(study) %>% add_tally() %>% filter(n >= 12)
incomplete.df <- setdiff(all.df, complete.df)
incomplete.df$cases <- incomplete.df$cases + 1 # add 1 to all cases to get rid of 0's
# Create a column with cases as a proportion for each study & df
complete.df <- complete.df %>% group_by(study) %>%
mutate(prop.study = (cases/sum(cases, na.rm = T))*100)
incomplete.df <- incomplete.df %>% group_by(study) %>%
mutate(prop.study = (cases/sum(cases, na.rm = T))*100)
# Join
total <- rbind(complete.df, incomplete.df)
# write to file
setwd('C:/Users/april/OneDrive/Desktop/Covid/from Dan')
write.csv(total, "NL63_total.csv")
OC43
# Isolate virus
(virus.df <- covid.df[covid.df$virus.type == "OC43", ])
## study site country virus.type month cases
## 25 Brittain-Long Gothenburg Sweden OC43 1 29
## 26 Brittain-Long Gothenburg Sweden OC43 2 22
## 27 Brittain-Long Gothenburg Sweden OC43 3 9
## 28 Brittain-Long Gothenburg Sweden OC43 4 4
## 29 Brittain-Long Gothenburg Sweden OC43 5 2
....
# Pull out complete data and same as df
complete.df <- virus.df %>% group_by(study) %>% filter(cases > 0) # filter out 0 case counts
complete.df <- complete.df %>% group_by(study) %>% add_tally() %>% filter(n >= 12) # filter out studies with <12 mo. of data
# Same for incomplete data
all.df <- virus.df %>% group_by(study) %>% add_tally() %>% filter(n >= 12)
incomplete.df <- setdiff(all.df, complete.df)
incomplete.df$cases <- incomplete.df$cases + 1 # add 1 to all cases to get rid of 0's
# Create a column with cases as a proportion for each study & df
complete.df <- complete.df %>% group_by(study) %>%
mutate(prop.study = (cases/sum(cases, na.rm = T))*100)
incomplete.df <- incomplete.df %>% group_by(study) %>%
mutate(prop.study = (cases/sum(cases, na.rm = T))*100)
# Join
total <- rbind(complete.df, incomplete.df)
# write to file
setwd('C:/Users/april/OneDrive/Desktop/Covid/from Dan')
write.csv(total, "OC43_total.csv")
HKU1
# Isolate virus
(virus.df <- covid.df[covid.df$virus.type == "HKU1", ])
## study site country virus.type month cases
## 85 Galanti New York City USA HKU1 1 9
## 86 Galanti New York City USA HKU1 2 1
## 87 Galanti New York City USA HKU1 3 2
## 88 Galanti New York City USA HKU1 4 2
## 89 Galanti New York City USA HKU1 5 0
....
### Note, we have no complete studies for HKU1 so we will only look at incomplete ###
incomplete.df <- virus.df
incomplete.df$cases <- incomplete.df$cases + 1 # add 1 to all cases to get rid of 0's
# Create a column with cases as a proportion for each study & df
incomplete.df <- incomplete.df %>% group_by(study) %>%
mutate(prop.study = (cases/sum(cases, na.rm = T))*100)
# write to file
setwd('C:/Users/april/OneDrive/Desktop/Covid/from Dan')
write.csv(incomplete.df, "HKU1_total.csv")