Packages

Data

setwd('C:/Users/april/OneDrive/Desktop/Covid/from Dan') # File path will need to be updated
covid.df <- read.csv("covidseasonality_long.csv", stringsAsFactors = TRUE) # read data
covid.df$author <- as.factor(covid.df$study) # make author a factor
covid.df$month <- as.factor(covid.df$month) # make month a factor

229E

# Isolate virus
(virus.df <- covid.df[covid.df$virus.type == "229E", ])
##              study                site     country virus.type month cases
## 1    Brittain-Long          Gothenburg      Sweden       229E     1     5
## 2    Brittain-Long          Gothenburg      Sweden       229E     2     4
## 3    Brittain-Long          Gothenburg      Sweden       229E     3     5
## 4    Brittain-Long          Gothenburg      Sweden       229E     4     3
## 5    Brittain-Long          Gothenburg      Sweden       229E     5     5
....
# Pull out complete data and same as df
complete.df <- virus.df %>% group_by(study) %>% filter(cases > 0) # filter out 0 case counts
complete.df <- complete.df %>% group_by(study) %>% add_tally() %>% filter(n >= 12) # filter out studies with <12 mo. of data

# Same for incomplete data
all.df <- virus.df %>% group_by(study) %>% add_tally() %>% filter(n >= 12)
incomplete.df <- setdiff(all.df, complete.df)
incomplete.df$cases <- incomplete.df$cases + 1 # add 1 to all cases to get rid of 0's

# Create a column with cases as a proportion for each study & df
complete.df <- complete.df %>% group_by(study) %>%
  mutate(prop.study = (cases/sum(cases, na.rm = T))*100)

incomplete.df <- incomplete.df %>% group_by(study) %>%
  mutate(prop.study = (cases/sum(cases, na.rm = T))*100)

# Join
total <- rbind(complete.df, incomplete.df)

# write to file
setwd('C:/Users/april/OneDrive/Desktop/Covid/from Dan')
write.csv(total, "229E_total.csv")

NL63

# Isolate virus
(virus.df <- covid.df[covid.df$virus.type == "NL63", ])
##              study                site     country virus.type month cases
## 13   Brittain-Long          Gothenburg      Sweden       NL63     1    10
## 14   Brittain-Long          Gothenburg      Sweden       NL63     2    31
## 15   Brittain-Long          Gothenburg      Sweden       NL63     3    18
## 16   Brittain-Long          Gothenburg      Sweden       NL63     4     6
## 17   Brittain-Long          Gothenburg      Sweden       NL63     5     8
....
# Pull out complete data and same as df
complete.df <- virus.df %>% group_by(study) %>% filter(cases > 0) # filter out 0 case counts
complete.df <- complete.df %>% group_by(study) %>% add_tally() %>% filter(n >= 12) # filter out studies with <12 mo. of data

# Same for incomplete data
all.df <- virus.df %>% group_by(study) %>% add_tally() %>% filter(n >= 12)
incomplete.df <- setdiff(all.df, complete.df)
incomplete.df$cases <- incomplete.df$cases + 1 # add 1 to all cases to get rid of 0's

# Create a column with cases as a proportion for each study & df
complete.df <- complete.df %>% group_by(study) %>%
  mutate(prop.study = (cases/sum(cases, na.rm = T))*100)

incomplete.df <- incomplete.df %>% group_by(study) %>%
  mutate(prop.study = (cases/sum(cases, na.rm = T))*100)

# Join
total <- rbind(complete.df, incomplete.df)

# write to file
setwd('C:/Users/april/OneDrive/Desktop/Covid/from Dan')
write.csv(total, "NL63_total.csv")

OC43

# Isolate virus
(virus.df <- covid.df[covid.df$virus.type == "OC43", ])
##              study                site     country virus.type month cases
## 25   Brittain-Long          Gothenburg      Sweden       OC43     1    29
## 26   Brittain-Long          Gothenburg      Sweden       OC43     2    22
## 27   Brittain-Long          Gothenburg      Sweden       OC43     3     9
## 28   Brittain-Long          Gothenburg      Sweden       OC43     4     4
## 29   Brittain-Long          Gothenburg      Sweden       OC43     5     2
....
# Pull out complete data and same as df
complete.df <- virus.df %>% group_by(study) %>% filter(cases > 0) # filter out 0 case counts
complete.df <- complete.df %>% group_by(study) %>% add_tally() %>% filter(n >= 12) # filter out studies with <12 mo. of data

# Same for incomplete data
all.df <- virus.df %>% group_by(study) %>% add_tally() %>% filter(n >= 12)
incomplete.df <- setdiff(all.df, complete.df)
incomplete.df$cases <- incomplete.df$cases + 1 # add 1 to all cases to get rid of 0's

# Create a column with cases as a proportion for each study & df
complete.df <- complete.df %>% group_by(study) %>%
  mutate(prop.study = (cases/sum(cases, na.rm = T))*100)

incomplete.df <- incomplete.df %>% group_by(study) %>%
  mutate(prop.study = (cases/sum(cases, na.rm = T))*100)

# Join
total <- rbind(complete.df, incomplete.df)

# write to file
setwd('C:/Users/april/OneDrive/Desktop/Covid/from Dan')
write.csv(total, "OC43_total.csv")

HKU1

# Isolate virus
(virus.df <- covid.df[covid.df$virus.type == "HKU1", ])
##              study                site  country virus.type month cases
## 85         Galanti       New York City      USA       HKU1     1     9
## 86         Galanti       New York City      USA       HKU1     2     1
## 87         Galanti       New York City      USA       HKU1     3     2
## 88         Galanti       New York City      USA       HKU1     4     2
## 89         Galanti       New York City      USA       HKU1     5     0
....
### Note, we have no complete studies for HKU1 so we will only look at incomplete ###
incomplete.df <- virus.df
incomplete.df$cases <- incomplete.df$cases + 1 # add 1 to all cases to get rid of 0's

# Create a column with cases as a proportion for each study & df
incomplete.df <- incomplete.df %>% group_by(study) %>%
  mutate(prop.study = (cases/sum(cases, na.rm = T))*100)

# write to file
setwd('C:/Users/april/OneDrive/Desktop/Covid/from Dan')
write.csv(incomplete.df, "HKU1_total.csv")