# Load packages
library(tidyverse)
library(readxl)

# Read in data table
VirulenceFinder <- read_csv("VirulenceFinder_results.csv")

# Count and list distinct VFs
part1<-VirulenceFinder %>% group_by(isolate) %>% distinct(`Virulence factor`) %>% count(isolate)

part2<-VirulenceFinder %>% group_by(isolate) %>% distinct(`Virulence factor`) %>% pivot_wider(names_from = "isolate", values_from = "Virulence factor") %>% pivot_longer(cols=everything(),names_to = "isolate", values_to = "Virulence factor") 

combine<-full_join(part1, part2, by="isolate")

#Classify ExPEC strains, correcting for different versions of key genes
keyExPEC <- combine %>% 
  mutate(papAC = if_else(grepl("pap",`Virulence factor`), 1, 0),
         iutA = if_else(grepl("iutA",`Virulence factor`), 1, 0),
         kpsMII = if_else(grepl("kpsMII",`Virulence factor`), 1, 0),
         afadraBC = if_else(grepl("afa",`Virulence factor`), 1, 0),
         sfafocDE = if_else(grepl("focC",`Virulence factor`) | grepl("sfaE",`Virulence factor`) & 
                              grepl("focI",`Virulence factor`) | grepl("sfaD",`Virulence factor`), 1, 0),) %>% 
  mutate(ExPEC_score = papAC+iutA+kpsMII+afadraBC+sfafocDE)

final_ExPEC<-transform(keyExPEC, pathotype_1 = if_else(ExPEC_score>=2,"ExPEC","other"))

#Write and save csv
final_ExPEC %>% 
  rowwise() %>% 
  mutate_if(is.list, ~paste(unlist(.), collapse = ', ')) %>% 
  write.csv('final_ExPEC.csv', row.names = FALSE)

#Classify UPEC strains
keyUPEC <- combine %>% 
  mutate(chuA = if_else(grepl("chuA",`Virulence factor`), 1, 0),
         fyuA = if_else(grepl("fyuA",`Virulence factor`), 1, 0),
         vat = if_else(grepl("vat",`Virulence factor`), 1, 0),
         yfcV = if_else(grepl("yfcV",`Virulence factor`), 1, 0),) %>% 
  mutate(UPEC_score = chuA+fyuA+vat+yfcV)

final_UPEC<-transform(keyUPEC, pathotype_2 = if_else(UPEC_score>=3,"UPEC","other"))

#Write and save csv
final_UPEC %>% 
  rowwise() %>% 
  mutate_if(is.list, ~paste(unlist(.), collapse = ', ')) %>% 
  write.csv('final_UPEC.csv', row.names = FALSE)

#Classify ExPEC-potential strains, correcting for different versions of key genes
ExPEC_potential <- combine %>% 
  mutate(astA = if_else(grepl("astA",`Virulence factor`), 1, 0),
         cba = if_else(grepl("cba",`Virulence factor`), 1, 0),
         cea = if_else(grepl("cea",`Virulence factor`), 1, 0),
         cia = if_else(grepl("cia",`Virulence factor`), 1, 0),
         cib = if_else(grepl("cib",`Virulence factor`), 1, 0),
         clbB = if_else(grepl("clbB",`Virulence factor`), 1, 0),
         cnf = if_else(grepl("cnf",`Virulence factor`), 1, 0),
         cvaC = if_else(grepl("cvaC",`Virulence factor`), 1, 0),
         ehxA = if_else(grepl("ehxA",`Virulence factor`), 1, 0),
         espI = if_else(grepl("espI",`Virulence factor`), 1, 0),
         etsC = if_else(grepl("etsC",`Virulence factor`), 1, 0),
         fimH = if_else(grepl("fimH",`Virulence factor`), 1, 0),
         focG = if_else(grepl("focG",`Virulence factor`), 1, 0),
         hlyF = if_else(grepl("hlyF",`Virulence factor`), 1, 0),
         hra = if_else(grepl("hra",`Virulence factor`), 1, 0),
         ibeA = if_else(grepl("ibeA",`Virulence factor`), 1, 0),
         iha = if_else(grepl("iha",`Virulence factor`), 1, 0),
         ireA = if_else(grepl("ireA",`Virulence factor`), 1, 0),
         iroN = if_else(grepl("iroN",`Virulence factor`), 1, 0),
         irp2 = if_else(grepl("irp2",`Virulence factor`), 1, 0),
         iss = if_else(grepl("iss",`Virulence factor`), 1, 0),
         iucC = if_else(grepl("iucC",`Virulence factor`), 1, 0),
         kpsE = if_else(grepl("kpsE",`Virulence factor`), 1, 0),
         lpfA = if_else(grepl("lpfA",`Virulence factor`), 1, 0),
         mcbA = if_else(grepl("mcbA",`Virulence factor`), 1, 0),
         mchB = if_else(grepl("mchB",`Virulence factor`), 1, 0),
         mcmA = if_else(grepl("mcmA",`Virulence factor`), 1, 0),
         neuC = if_else(grepl("neuC",`Virulence factor`), 1, 0),
         ompT = if_else(grepl("ompT",`Virulence factor`), 1, 0),
         pic = if_else(grepl("pic",`Virulence factor`), 1, 0),
         sat = if_else(grepl("sat",`Virulence factor`), 1, 0),
         sfaS = if_else(grepl("sfaS",`Virulence factor`), 1, 0),
         sitA = if_else(grepl("sitA",`Virulence factor`), 1, 0),
         senB = if_else(grepl("senB",`Virulence factor`), 1, 0),
         tcpC = if_else(grepl("tcpC",`Virulence factor`), 1, 0),
         terC = if_else(grepl("terC",`Virulence factor`), 1, 0),
         traT = if_else(grepl("traT",`Virulence factor`), 1, 0),
         usp = if_else(grepl("usp",`Virulence factor`), 1, 0),)%>% 
  mutate(ExPEC_potential_score = astA+cba+cea+cia+cib+clbB+cnf+cvaC+ehxA+espI+etsC+fimH+focG+hlyF+hra+ibeA+iha+ireA+iroN+irp2+iss+iucC+kpsE+lpfA+mcbA+mchB+mcmA+neuC+ompT+pic+sat+sfaS+sitA+senB+tcpC+terC+traT+usp)

#Write and save csv
ExPEC_potential %>% 
  rowwise() %>% 
  mutate_if(is.list, ~paste(unlist(.), collapse = ', ')) %>% 
  write.csv('ExPECpotential.csv', row.names = FALSE)


