##•••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••
### Data analysis: Temporal, spatial and household socio-economic dynamics of typhoid: Towards forecasting outbreaks in Kasese district, Uganda
#••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••
# READ IN DATABASES S1 & S2
MOH_DB<- read.csv("MOH_NAT_DIS_DB.csv",sep = ",",header = T)
##***************************************************************
#### FORECASTING OUTBREAKS AT NATIONAL AND DISTRICT LEVEL
##***************************************************************
MOH_2016 <- read.csv("MOH_2016.csv",sep = ",",header = T)
MOH_2016_UG<-MOH_2016[,5,]
MOH_2016_KAS<-MOH_2016[,4,]
x <- read.csv("MOH_Kasese.csv",sep = ",",header = T)
bx <- x[,1:5]
FULL_MOH<-join(bx,MOH_2016, type="full")# USE PLYR LIBRARY
##••••••••••••••••••••••••••••••••••••••
### Case control data analysis:
#•••••••••••••••••••••••••••••••••••••••
## functions for univariable analysis
# function creates a table with OR, CI and p value. x required is summary(model)
mmt <- function(y){
x <-summary(y)
bla <-strsplit(as.character(x$call)[[2]],split='~', fixed=TRUE)[[1]][2]
variable <-gsub(' ','',strsplit(bla,split='+', fixed=TRUE)[[1]][1])
#OddsRatio <- round(exp(x$coefficients[,1][2:nrow(x$coefficients)]),2)
OddsRatio <- round(exp(x$coefficients[,1]),2)
LL <-round(exp(x$coefficients[,1] - 1.96* x$coefficients[,3]),2)
UL <-round(exp(x$coefficients[,1] + 1.96* x$coefficients[,3]),2)
pv <- round(x$coefficients[,5],3)
#pvalue <-c(round(an1["Pr(>Chisq)"][2,],3),rep("",nrow(x$coefficients)-1))
test <- data.frame(Variable=c(variable, 1:nrow(x$coefficients)), OR=c("",OddsRatio), LL=c("",LL), UL=c("",UL), pv=c(x$waldtest["pvalue"],pv), filter=c("yes", 1:nrow(x$coefficients)))
test$Variable <- c(variable, rownames(test)[2:length(rownames(test))])
#test$NAs <- c(na_count[variable], rep("",nrow(x$coefficients)-1))
rownames(test) <- NULL
return(test)
}
# C(Using Database S2 AND CASE CONTROL DATA SETS)
# read data in
#clg <-read_csv("cond_logreg_data.csv")
load("cond_logreg_data.RData")
# create matching variable
clg$match <- substr(clg$Questionnairenumber, 1,5)
# clean dataset to use in regression model
clg$Sex <- as.factor(as.character(clg$Sex))
clg$Levelofeducation <- as.factor(as.character(clg$Levelofeducation))
clg$Householdmembers_cut <- recode(as.factor(clg$Householdmembers!="1-5"), "TRUE"="More than 5", "FALSE"= "1-5")
clg$Doyoupreheatleftoverfood[is.na(clg$Doyoupreheatleftoverfood)] <- "No preheated food" # change NAs to no left over food
clg$Washhandsaftertoiletorlatrine<- as.factor(clg$Washhandsaftertoiletorlatrine)
clg$Evenonurination<- as.factor(clg$Evenonurination) ### not likely to be independent from presvious question
clg$Soapandwatertowashhands<- as.factor(clg$Soapandwatertowashhands) ### not likely to be independent from presvious question
clg$Usetoiletpaper<- as.factor(clg$Usetoiletpaper) ###
clg$Handlechildrenswaste<- as.factor(clg$Handlechildrenswaste) ###
clg$Wherefoodisprepared<- as.factor(clg$Wherefoodisprepared) ###
clg$Eatreadytoeatfoods<- as.factor(clg$Eatreadytoeatfoods) ###
clg$Howdoyoupreparethevegetables<- as.factor(clg$Howdoyoupreparethevegetables) ###
clg$Howdoyoupreparethevegetables[clg$Howdoyoupreparethevegetables=="1"]<-"2"
clg$Sharethesamebowlwhileeatingfoodesp<- as.factor(clg$Sharethesamebowlwhileeatingfoodesp) ###
clg$Doyouhaveanimals<- as.factor(clg$Doyouhaveanimals) ###
clg$Feverishconditions<- as.factor(clg$Feverishconditions) ###
clg$Diarrhoea<- as.factor(clg$Diarrhoea) ###
clg$Constipation<- as.factor(clg$Constipation) ###
clg$Malaise<- as.factor(clg$Malaise) ###
clg$Abdominalpains<- as.factor(clg$Abdominalpains) ###
clg$Swollenpainfulabdomen<- as.factor(clg$Swollenpainfulabdomen) ###
clg$Vomiting<- as.factor(clg$Vomiting) ###
clg$Painfulabdomennotswollen<- as.factor(clg$Painfulabdomennotswollen) ###
clg$Blood<- as.factor(clg$Blood) ###
clg$Stool<- as.factor(clg$Stool) ###
clg$Urine<- as.factor(clg$Urine) ###
clg$Saliva<- as.factor(clg$Saliva)
clg$Treatdrinkingwaterpractice<- as.factor(clg$Treatdrinkingwaterpractice)
clg$Dontshareutensilswiththevictim<- as.factor(clg$Dontshareutensilswiththevictim)
clg$Eatfoodwhilehot<- as.factor(clg$Eatfoodwhilehot)
clg$Dontsharethesamebowloffoodwiththevictim<- as.factor(clg$Dontsharethesamebowloffoodwiththevictim)###only on positive
clg$Noadvicegiven<- as.factor(clg$Noadvicegiven)
clg$Watersourcesharedbyanimals<- as.factor(clg$Watersourcesharedbyanimals)
clg$Humanscandirectlycontaminatewateratsource<- as.factor(clg$Humanscandirectlycontaminatewateratsource)
clg$Humansindirectcontactwithatwaterwhilefetching<- as.factor(clg$Humansindirectcontactwithatwaterwhilefetching)
clg$Anypitlatrinesclosetowatersource[clg$Anypitlatrinesclosetowatersource==""]<-"No"
clg$Anypitlatrinesclosetowatersource<- as.factor(clg$Anypitlatrinesclosetowatersource)
clg$Freeofalgae<- as.factor(clg$Freeofalgae)
clg$Sharedwithanimals<- as.factor(clg$Sharedwithanimals)
clg$Sharedwithanimals[clg$Sharedwithanimals==""]<-"No"
clg$Hasanimaldroppingsorfaeces<- as.factor(clg$Hasanimaldroppingsorfaeces)
clg$Foodisstoredonthefloor<- as.factor(clg$Foodisstoredonthefloor)
clg$EvidenceofinfestationwithfliesFPA<- as.factor(clg$EvidenceofinfestationwithfliesFPA)
clg$Closetothepitlatrineortoilet[clg$Closetothepitlatrineortoilet==""]<-"No"
clg$Closetothepitlatrineortoilet<- as.factor(clg$Closetothepitlatrineortoilet)
clg$PitlatrineUnderuse[clg$PitlatrineUnderuse==""]<-"No"
clg$PitlatrineUnderuse<- as.factor(clg$PitlatrineUnderuse)
clg$Contaminationintheenvironment[is.na(clg$Contaminationintheenvironment)]<-"No"
clg$Contaminationintheenvironment<- as.factor(clg$Contaminationintheenvironment)
cld<- droplevels(clg)
clg$DrinkingWaterTx <- rep(NA, nrow(clg))
clg$DrinkingWaterTx[clg$Treatdrinkingwaterbeforestorage=="No"] <- "No"
clg$DrinkingWaterTx[clg$Boiling=="Yes"] <- "Boiling"
clg$DrinkingWaterTx[clg$Chlorination=="Yes"] <- "Chlorination"
clg$DrinkingWaterTx[clg$Solardisinfection=="Yes"] <- "Solardisinfection"
clg$DrinkingWaterTx[clg$Filtration=="Yes"] <- "Filtration"
####************************************************
#### COMMENTS ON SOME COLUMS THAT CAN BE RECORDED OR REMOVED
####************************************************
### Treatdrinkingwaterbeforestorage = YES & NO ,if yes this is how they treat colunm (Boiling Chlorination Solardisinfection Filtration)
### Can modify this how do treatwater response, Don' treat == NO from before, Boiling, Chlorinating, Solar and Filtration as response for Yes
### Lackoffirewoodtoboil,Lackofmoneytobuychlorinetablets,Itisnotinherenttotreatdrinkingwater,Believethewaterissafe,Havenotime,Feelsickwhentheytaketreatedwater,Tastelessafterboiling,Unavailabilityofchlorinetablets,Fearforchildrensickness
## these are follow up responses for those who do not treat water.. asking why they do not treat..(Consider droping them or recode including response "treat water")
## Accesstosafedrinkingwater & Accesstotheuntreateddrinkingwater. Redundant question... if someone responded yes to Tap water
## WHICH ANIMALS TO HAVE PET=Dog,Guineapigs &Cat, LIVESTOCK=(Pigs,Goats,Cows), POULTRY=(Chicken ,Ducks)
# remove useless variables!
clg <- select(clg, -Dateofinterview,-Questionnairenumber, - UniqueKey, -Nameofparticipant,-Bonemarrow,
-Subcounty, -Parish, -Village, -Parish.latitude, -Victimshouldnotpreparefooduntildeclaredfreeoftyphoidfever,
-Parish.longitude, -Examplesofreadytoeatfoods, -Householdmembers,
-Positioninthehousehold, -typeofwatersource1, -Typeofwatersource2, -Notes,
-Watersourceinspected,-Watercollectionvessel, -Speciesoffaecalsample1,
-Fromwheredoyouwashyourhand., -Whydontyouseekmedicalattention,
-Designatedcupkept,-Wheredoyoustoreprepredorleftoverfood,
-FS1collected, -SalmonellaFS1, -ShigellaFS1, -EColiFS1,
-FS2collected, -Speciesoffaecalsample2, -SalmonellaFS2,
-ShigellaFS2, -EColiFS2, -Contaminated.fecal.sample, -PresenceofseparatevesselforDW
- KindofseparatevesseiforDW, -Presenceofdesignatedcup -Accesstosafedrinkingwater,
-Accesstotheuntreateddrinkingwater, -Fromwheredoyouwashyourhand.,-Wateronlytowashhands,
-Ashandwater, -Rubagainstthesurface, -Usemaizecobs, -Usenaturalanalcleansingleaves,
-Usewaterforanal, -Usewaterandsaopforanal, -Managementofchildrensfecalwaste,
-Howsoondoyoueatthefoodafterpreparation, -Doyoupreheatleftoverfood
-Wherefoodisprepared, -Howsoondoyoueatthefoodafterpreparation,
-Wheredoyoustoreprepredorleftoverfood,-Doyouusuallyhaveleftovers,
-Howdoyoudealwithleftovers,-Utensilstodryafterwashing, -Utensilsinthekitchen,
-Arevegetablesusuallyapartofyourdiet, -Anyfamilymemberorsomeoneclosewithtyphoid, -Noneoftheabove,
-Fever, -Diarrhoea1, -Headache, -Shivering, -Nausea, -Constipation1, -Jointormusclepain,
-Defaultthemedicationgiven, -Anypractisestoavoidtransmission, -Didyoufollowtheadvicegiven,
-Watermoving, -Watertreatedatsource, -Cleanontheoutside, -Hasacover, -Freeofholesandleakages,
-Usedforstorageofdrinkingwater,-Drinkingwaterstoragevessel, -CleanontheoutsideDW, -FreeofalgaeDW, -HasacoverDW,
-FreeofholesandleakagesDW, -Designatedcuptodrawthedrinkingwaterseen, -Hasadoororbarrier,-Utensilsarekeptonthefloor,
-Leftoverfoodpresent, -HandwashingfacilitypresentFPA, -HWEvidenceofuseFPA, -HWFhasassoaporashFPA, -Humanfaecalmatterclosetothearea,
-PitlatrinePresent, -Evidenceofproperuseofpitlatrine, -Holeiscovered, -Evidentflyinfestationatpitlatrine, -Magnitudeoftheinfestationatpitlatrine,
-Handwashingfacilitypresentatpitlatrine,-HWEvidenceofuseattoilet, -Drinkingwatersample,-SalmonellaWS1, -EColiWS1, -ShigellaWS1,-Watersourcesamplecollected2,
-Typeofwatersource2, -SalmonellaDW1, -EColiDW1, -ShigellaDW1, -Seconddrinkingwatersamplecollected, -Notes1, -SalmonellaDW2, -EColiDW2, -ShigellaDW2,
-OverallContaminationofDWwithSalmonella, -Watersourcesamplecollected, -typeofwatersource1, -SalmonellaWS2, -EColiWS2, -ShigellaWS2,
-Contamination.of.water.at.the.source, -FS1collected, -Speciesoffaecalsample1, -SalmonellaFS1 -ShigellaFS1, -EColiFS1, -FS2collected,
-Speciesoffaecalsample2, -SalmonellaFS2, -ShigellaFS2, -EColiFS2, -Contaminated.fecal.sample, -Householdmembers_cat, -Lackoffirewoodtoboil,-Lackofmoneytobuychlorinetablets,-Itisnotinherenttotreatdrinkingwater,-Believethewaterissafe,-Havenotime,-Feelsickwhentheytaketreatedwater,-Tastelessafterboiling,-Unavailabilityofchlorinetablets,-Fearforchildrensickness, -Timesofconfirmedtyphoidfever, - Swollenpainfulabdomen, -Urine, -Stool, -Blood, -TyphoidfeverconfirmedontheveryfirstvisittoHC, -Noadvicegiven, -Vomiting, -Saliva, -Eatfoodwhilehot, -Painfulabdomennotswollen,-Dontshareutensilswiththevictim,-Evenonurination, -Feverishconditions,-Diarrhoea, -Constipation, -Malaise, -Abdominalpains, -Howoftendoyouexperiencethesesymptoms, -Seekmedicalattentionwhenwithsymptoms, -Medicalresponseafteronsetofsymptoms)
clg<- droplevels.data.frame(clg)
# univariable analysis
library(survival)
uniques<-data.frame(name=vector("character",0), levels=vector("character",0))
for( i in 1:ncol(clg)){
a<-length(unique(clg[,i]))
b <- names(clg)[i]
new <- data.frame(name=b, levels=a)
uniques <- rbind(uniques, new)
}
uniques1 <- arrange(uniques, levels) %>% filter(levels==1)
clg <- select(clg, -which(names(clg) %in% unique(uniques1$name)))
# example model
m1 <-clogit(CaseorControl ~ Levelofeducation + strata(match), data=clg)
test<-clg %>%
select(-CaseorControl,
-match) %>%
map(~clogit(clg$CaseorControl ~ .x + strata(clg$match),method="approximate", data=clg))
result <- data.frame(Variable=vector("character",0),
OR=vector("character",0),
LL=vector("character",0),
UL=vector("character",0),
pv=vector("character",0),
filter=vector("character",0))
for(i in 1:length(test)){
a<-mmt(test[[i]])
a$Variable[1] <- names(test[i])
result <- rbind(result, a)
}
## names of variables to consider in the model arranged smaller p-value first for forward variable selection
formodel <- arrange(filter(result, filter=="yes" & pv<0.15),pv)$Variable
# dataframe to use for the model
df_model <- select(clg, formodel, CaseorControl, match)
df_model <- select(df_model,-KindofseparatevesseiforDW)
df_model <- df_model %>% drop_na()
### Forward model selection
## consider each variable in the order it appears in "formodel"
## eg the first one is formodel[1]
# AIC 254.2861
m1 <-clogit(CaseorControl ~ Treatdrinkingwaterpractice + strata(match), data=df_model, method="approximate")
#AIC(m1)
# AIC 255.9174
m2 <-clogit(CaseorControl ~ Treatdrinkingwaterpractice + Boiling + strata(match), data=df_model, method="approximate")
#AIC(m2)
## FINAL MODEL
# AIC 253.517 <- final model
m3 <-clogit(CaseorControl ~ Treatdrinkingwaterpractice + Soapandwatertowashhands + strata(match), data=df_model, method="approximate")
#AIC(m3)
## final table
select(mmt(m3), -filter)
## Variable OR LL UL pv
## 1 Treatdrinkingwaterpractice 2.741585e-12
## 2 TreatdrinkingwaterpracticeYes 9.67 5.18 18.05 0.000000e+00
## 3 SoapandwatertowashhandsYes 1.66 0.9 3.05 1.040000e-01
# AIC 256.8196
m4 <-clogit(CaseorControl ~ Treatdrinkingwaterpractice + Soapandwatertowashhands + Levelofeducation + strata(match), data=df_model, method="approximate")
#AIC(m4)
# AIC 255.4725
m5 <-clogit(CaseorControl ~ Treatdrinkingwaterpractice + Soapandwatertowashhands + N3644years + strata(match), data=df_model, method="approximate")
#AIC(m5)
# AIC 255.0848
m6 <-clogit(CaseorControl ~ Treatdrinkingwaterpractice + Soapandwatertowashhands + Washhandsaftertoiletorlatrine + strata(match), data=df_model, method="approximate")
#AIC(m6)
# AIC 254.4313
m7 <-clogit(CaseorControl ~ Treatdrinkingwaterpractice + Soapandwatertowashhands + Above65years + strata(match), data=df_model, method="approximate")
#AIC(m7)
# AIC 254.5589
m8 <-clogit(CaseorControl ~ Treatdrinkingwaterpractice + Soapandwatertowashhands + Usepapers + strata(match), data=df_model, method="approximate")
#AIC(m8)
### backward elimination
# AIC 264.2622
mall <-clogit(CaseorControl ~ Treatdrinkingwaterpractice + Boiling + Soapandwatertowashhands + Levelofeducation + N3644years + Washhandsaftertoiletorlatrine + Above65years + Usepapers + strata(match), data=df_model, method="approximate")
AIC(mall)
## [1] 264.2622
# AIC 263.6646
m9 <-clogit(CaseorControl ~ Treatdrinkingwaterpractice + Boiling + Soapandwatertowashhands + Levelofeducation + N3644years + Washhandsaftertoiletorlatrine + Above65years + strata(match), data=df_model, method="approximate")
AIC(m9)
## [1] 263.6646
# AIC 262.2305
m10 <-clogit(CaseorControl ~ Treatdrinkingwaterpractice + Boiling + Soapandwatertowashhands + Levelofeducation + N3644years + Washhandsaftertoiletorlatrine + strata(match), data=df_model, method="approximate")
AIC(m10)
## [1] 262.2305
# AIC 260.477
m11 <-clogit(CaseorControl ~ Treatdrinkingwaterpractice + Boiling + Soapandwatertowashhands + Levelofeducation + N3644years + strata(match), data=df_model, method="approximate")
AIC(m11)
## [1] 260.477
# AIC 258.4978
m12 <-clogit(CaseorControl ~ Treatdrinkingwaterpractice + Boiling + Soapandwatertowashhands + Levelofeducation + strata(match), data=df_model, method="approximate")
AIC(m12)
## [1] 258.4978
# AIC 255.273
m13 <-clogit(CaseorControl ~ Treatdrinkingwaterpractice + Boiling + Soapandwatertowashhands + strata(match), data=df_model, method="approximate")
AIC(m13)
## [1] 255.273
# AIC 255.9174
m14 <-clogit(CaseorControl ~ Treatdrinkingwaterpractice + Boiling + strata(match), data=df_model, method="approximate")
AIC(m14)
## [1] 255.9174
# AIC 253.517 <- Final model
m15 <-clogit(CaseorControl ~ Treatdrinkingwaterpractice + Soapandwatertowashhands + strata(match), data=df_model, method="approximate")
AIC(m15)
## [1] 253.517
# AIC 312.4566
m16 <-clogit(CaseorControl ~ Soapandwatertowashhands + strata(match), data=df_model, method="approximate")
AIC(m16)
## [1] 312.4566
## this is me cheating and ignoring Soapandwatertowashhands variable to see what happens. Model still seems upside down
# AIC 313.4555
m1 <-clogit(CaseorControl ~ Boiling + strata(match), data=df_model, method="approximate")
#AIC(m1)
# AIC 311.5513
m2 <-clogit(CaseorControl ~ Boiling + Soapandwatertowashhands + strata(match), data=df_model, method="approximate")
#AIC(m2)
# AIC 309.5377
m3 <-clogit(CaseorControl ~ Boiling + Soapandwatertowashhands + Levelofeducation + strata(match), data=df_model, method="approximate")
#AIC(m3)
# AIC 309.2024
m4 <-clogit(CaseorControl ~ Boiling + Soapandwatertowashhands + Levelofeducation + N3644years + strata(match), data=df_model, method="approximate")
#AIC(m4)
# AIC 311.5176
m5 <-clogit(CaseorControl ~ Boiling + Soapandwatertowashhands + Levelofeducation + Washhandsaftertoiletorlatrine + strata(match), data=df_model, method="approximate")
#AIC(m5)
# AIC 309.4235
m6 <-clogit(CaseorControl ~ Boiling + Soapandwatertowashhands + Levelofeducation + Above65years + strata(match), data=df_model, method="approximate")
#AIC(m6)
# AIC 308.9699 <- final model if cheat
m7 <-clogit(CaseorControl ~ Boiling + Soapandwatertowashhands + Levelofeducation + Usepapers + strata(match), data=df_model, method="approximate")
#AIC(m7)
# AIC 254.5589
m8 <-clogit(CaseorControl ~ Treatdrinkingwaterpractice + Soapandwatertowashhands + Usepapers + strata(match), data=df_model, method="approximate")
#AIC(m8)
## model selection
#library(MuMIn)
#mall <-clogit(CaseorControl ~ . + strata(match), data=df_model)
#options(na.action = "na.fail")
#all_models<-dredge(mall,evaluate = F)
#all_models_eval<-dredge(mall,evaluate = T)
#all_models_get <-get.models(all_models_eval, subset = T)
#########
# RC Analysis were missing values are included in the dataset and the full dataset is used
##########
#Get weather data for each week between 2012-2018
weather <- read.csv("kasese_weather_20180308.csv")
weather$TotalRain <- rowSums(weather[,c( "prec0", "prec3", "prec6", "prec9", "prec12", "prec15","prec18", "prec21")])
weather$MeanRain <- rowMeans(weather[,c( "prec0", "prec3", "prec6", "prec9", "prec12", "prec15","prec18", "prec21")], na.rm = TRUE)
weather$meantempC <- rowMeans(weather[,c("temp0", "temp3", "temp6", "temp9", "temp12", "temp15", "temp18", "temp21")], na.rm = TRUE)
weather$meanhumidity <- rowMeans(weather[,c("hum0", "hum3", "hum6", "hum9", "hum12", "hum15","hum18", "hum21")], na.rm = TRUE)
weather$Week <- as.integer(week(ymd(weather$date)))
weather$Year <- year(ymd(weather$date))
weather$Month <- as.character((month(ymd(weather$date),label=TRUE,abbr=FALSE)))
weather$Month <- factor(weather$Month,
levels=c("January","February","March","April","May","June","July","August","September","October","November","December"))
weather$date <- as.Date(weather$date, "%Y-%m-%d")
weekdate <- aggregate(weather$date, list(weather$Week, weather$Year), min)
colnames(weekdate) <- c("Week", "Year", "StartDate")
weather <- merge(weather, weekdate, by.weather = c("Week", "Year"), by.weekdate = c("Week", "Year"), all.x=TRUE)
weather1 <- weather[,c("date", "Year", "Week", "Month", "maxtempC", "mintempC", "meantempC",
"TotalRain", "MeanRain", "meanhumidity", "StartDate")]
weekly_weather <-
weather1 %>%
group_by(Year, Week, Month, StartDate) %>%
summarise_at(vars(-date), funs(mean(., na.rm=TRUE)))
#Getting the missing weeks for 2013-2017
weekly_weather1317 <- weekly_weather[weekly_weather$Year >= 2013 & weekly_weather$Year <= 2017,]
FULL_MOH_na <- merge(weekly_weather1317, FULL_MOH, by = c("Year", "Week", "Month"), all.x = TRUE)
#Removing extra weeks in 2017 for which we don't have case data
FULL_MOH_na <- subset(FULL_MOH_na, !(FULL_MOH_na$Year == 2017 & FULL_MOH_na$Week > 23))
dim(FULL_MOH_na)
## [1] 279 12
#Everthing up until 2016
FULL_MOH_na_2016 <- subset(FULL_MOH_na, FULL_MOH_na$Year < 2017)
#2017
FULL_MOH_na_2017 <- subset(FULL_MOH_na, FULL_MOH_na$Year == 2017)
#Adding 0 for cases when there was no outbreaks
FULL_MOH_na_2016$Cases.in.Kasese[which(FULL_MOH_na_2016$Week >=1 & FULL_MOH_na_2016$Week <= 13 & FULL_MOH_na_2016$Year==2013)] <- 0
FULL_MOH_na_2016$Cases.in.Uganda[which(FULL_MOH_na_2016$Week >=1 & FULL_MOH_na_2016$Week <= 13 & FULL_MOH_na_2016$Year==2013)] <- 0
FULL_MOH_na_2016$Cases.in.Kasese[which(FULL_MOH_na_2016$Week >=39 & FULL_MOH_na_2016$Year==2014)] <- 0
FULL_MOH_na_2016$Cases.in.Kasese[which(FULL_MOH_na_2016$Week <=12 & FULL_MOH_na_2016$Year==2015)] <- 0
FULL_MOH_na_2016$Cases.in.Uganda[which(FULL_MOH_na_2016$Week >=1 & FULL_MOH_na_2016$Week <= 13 & FULL_MOH_na_2016$Year==2013)] <- 0
FULL_MOH_na_2016$Cases.in.Uganda[which(FULL_MOH_na_2016$Week >=39 & FULL_MOH_na_2016$Year==2014)] <- 0
FULL_MOH_na_2016$Cases.in.Uganda[which(FULL_MOH_na_2016$Week <=12 & FULL_MOH_na_2016$Year==2015)] <- 0
FULL_MOH_na_2016_rain <- melt(FULL_MOH_na_2016[,c("Year", "Week", "Month","StartDate","TotalRain", "MeanRain")], id=c("Year", "Week", "Month", "StartDate"))
FULL_MOH_na_2016_temp <- melt(FULL_MOH_na_2016[,c("Year", "Week", "Month", "StartDate","maxtempC", "mintempC", "meantempC")], id=c("StartDate","Year", "Week", "Month"))
FULL_MOH_na_2016_hum <- melt(FULL_MOH_na_2016[,c("StartDate","Year", "Week", "Month", "meanhumidity")], id=c("StartDate","Year", "Week", "Month"))
#Time series using TS
FULL_MOH_kasese_na_ts <- ts(FULL_MOH_na_2016$Cases.in.Kasese, start=c(2013,1), end=c(2017,1), frequency=52 ,ts.eps = getOption("ts.eps"), is.ts(x))
ts_plot_kasese <- plot(FULL_MOH_kasese_na_ts)

FULL_MOH_uganda_na_ts <- ts(FULL_MOH_na_2016$Cases.in.Uganda, start=c(2013,1), end=c(2017,1), frequency=52 ,ts.eps = getOption("ts.eps"), is.ts(x))
ts_plot_uganda <- plot(FULL_MOH_uganda_na_ts)

FULL_MOH_temp_na_ts <- ts(FULL_MOH_na_2016$meantempC, start=c(2013,1), end=c(2017,1), frequency=52 ,ts.eps = getOption("ts.eps"), is.ts(x))
ts_plot_temp <- plot(FULL_MOH_temp_na_ts)

FULL_MOH_hum_na_ts <- ts(FULL_MOH_na_2016$meanhumidity, start=c(2013,1), end=c(2017,1), frequency=52 ,ts.eps = getOption("ts.eps"), is.ts(x))
ts_plot_hum <- plot(FULL_MOH_hum_na_ts)

FULL_MOH_rain_na_ts <- ts(FULL_MOH_na_2016$TotalRain, start=c(2013,1), end=c(2017,12), frequency=52 ,ts.eps = getOption("ts.eps"), is.ts(x))
ts_plot_rain <- plot(FULL_MOH_rain_na_ts)

FULL_MOH_2017_ts <- ts(FULL_MOH_na_2017$Cases.in.Kasese, start=c(2017,1), end=c(2017,12), frequency=52 ,ts.eps = getOption("ts.eps"), is.ts(x))
FULL_MOH_2017_UG_ts <- ts(FULL_MOH_na_2017$Cases.in.Uganda, start=c(2017,1), end=c(2017,12), frequency=52 ,ts.eps = getOption("ts.eps"), is.ts(x))
# #Time series using xts
# #Time series using xts
# xts_kasese <- as.xts(FULL_MOH_na_2016$Cases.in.Kasese, order.by=FULL_MOH_na_2016$StartDate, ts.eps = getOption("ts.eps"))
# xts_uganda <- as.xts(FULL_MOH_na_2016$Cases.in.Uganda, order.by=FULL_MOH_na_2016$StartDate, ts.eps = getOption("ts.eps"))
# xts_temp <- as.xts(FULL_MOH_na_2016$meantempC, order.by=FULL_MOH_na_2016$StartDate, ts.eps = getOption("ts.eps"))
# xts_rain <- as.xts(FULL_MOH_na_2016$MeanRain, order.by=FULL_MOH_na_2016$StartDate, ts.eps = getOption("ts.eps"))
# xts_hum <- as.xts(FULL_MOH_na_2016$meanhumidity, order.by=FULL_MOH_na_2016$StartDate, ts.eps = getOption("ts.eps"))
#
# xts_kasese_2017 <- as.xts(FULL_MOH_na_2017$Cases.in.Kasese, order.by=FULL_MOH_na_2017$StartDate, ts.eps = getOption("ts.eps"))
#===================================================
# PLOT FOR IMPUTE MISSING DATA FOR TIME SERIES time series
#===================================================
imp_kalman_kasese_na <- na.kalman(FULL_MOH_kasese_na_ts) ## Kasese
imp_ma_kasese_na <- na.ma(FULL_MOH_kasese_na_ts)
imp_mean_kasese_na <- na.mean(FULL_MOH_kasese_na_ts)
imp_random_kasese_na <- na.random(FULL_MOH_kasese_na_ts)
imp_locf_kasese_na <- na.locf(FULL_MOH_kasese_na_ts)
imp_interpolation_kasese_na <- na.interpolation(FULL_MOH_kasese_na_ts)
imp_kalman_Uganda_na <- na.kalman(FULL_MOH_uganda_na_ts)## Uganda
imp_ma_Uganda_na <- na.ma(FULL_MOH_uganda_na_ts)
imp_mean__Uganda_na <- na.mean(FULL_MOH_uganda_na_ts)
imp_random_Uganda_na <- na.random(FULL_MOH_uganda_na_ts)
imp_locf_Uganda_na <- na.locf(FULL_MOH_uganda_na_ts)
imp_interpolation_Uganda_na <- na.interpolation(FULL_MOH_uganda_na_ts)
imp_kalman_kasese_tx <- na.kalman(FULL_MOH_na_2016$Cases.in.Kasese)
imp_ma_kasese_na_tx <- na.ma(FULL_MOH_na_2016$Cases.in.Kasese)
imp_mean_kasese_na_tx <- na.mean(FULL_MOH_na_2016$Cases.in.Kasese)
imp_random_kasese_na_tx <- na.random(FULL_MOH_na_2016$Cases.in.Kasese)
imp_locf_kasese_na_tx <- na.locf(FULL_MOH_na_2016$Cases.in.Kasese)
imp_interpolation_kasese_na_tx <- na.interpolation(FULL_MOH_na_2016$Cases.in.Kasese)
imp_dataset_kasese <- data.frame(StartDate=FULL_MOH_na_2016$StartDate,
OriginalTS = FULL_MOH_na_2016$Cases.in.Kasese,
Kalman = imp_kalman_kasese_tx,
MA = imp_ma_kasese_na_tx,
# Mean = imp_mean_kasese_na_tx,
# Random = imp_random_kasese_na_tx,
LOCF = imp_locf_kasese_na_tx,
Interpolation = imp_interpolation_kasese_na_tx)
imp_dataset_kasese1 <- melt(imp_dataset_kasese, id="StartDate")
imp_dataset_kasese1$Week <- as.integer(week(ymd(imp_dataset_kasese1$StartDate)))
imp_dataset_kasese1$Year <- year(ymd(imp_dataset_kasese1$StartDate))
# plotNA.imputations(x.withNA = FULL_MOH_na_2016$Cases.in.Kasese, x.withImputations = na.seadec(FULL_MOH_na_2016$Cases.in.Kasese, "kalman"))
# plotNA.imputations(x.withNA = FULL_MOH_na_2016$Cases.in.Kasese, x.withImputations = na.seadec(FULL_MOH_na_2016$Cases.in.Kasese,"ma"))
imp_kalman_uganda_na_tx <- na.kalman(FULL_MOH_na_2016$Cases.in.Uganda)
imp_ma_uganda_na_tx <- na.ma(FULL_MOH_na_2016$Cases.in.Uganda)
imp_mean_uganda_na_tx <- na.mean(FULL_MOH_na_2016$Cases.in.Uganda)
imp_random_uganda_na_tx <- na.random(FULL_MOH_na_2016$Cases.in.Uganda)
imp_locf_uganda_na_tx <- na.locf(FULL_MOH_na_2016$Cases.in.Uganda)
imp_interpolation_uganda_na_tx <- na.interpolation(FULL_MOH_na_2016$Cases.in.Uganda)
imp_dataset_uganda <- data.frame(StartDate=FULL_MOH_na_2016$StartDate,
OriginalTS = FULL_MOH_na_2016$Cases.in.Uganda,
Kalman = imp_kalman_uganda_na_tx,
MA = imp_ma_uganda_na_tx,
# Mean = imp_mean_uganda_na_tx,
# Random = imp_random_uganda_na_tx,
LOCF = imp_locf_uganda_na_tx,
Interpolation = imp_interpolation_uganda_na_tx)
imp_dataset_uganda1 <- melt(imp_dataset_uganda, id="StartDate")
imp_dataset_uganda1$Week <- as.integer(week(ymd(imp_dataset_uganda1$StartDate)))
imp_dataset_uganda1$Year <- year(ymd(imp_dataset_uganda1$StartDate))
#
# # Kasese_2017_ts <- ts(FULL_MOH_na_2017$Cases.in.Kasese, start=c(2017, 1), end=c(2017, 12), frequency=52,ts.eps = getOption("ts.eps"), is.ts(x))
# # plot(Kasese_2017_ts)
#
#
# #Plotting the data
imput_kasese_plot <- ggplot(imp_dataset_kasese1, aes(x=Week, y=value, col=variable)) +
geom_line(stat = "identity") +
facet_grid(.~Year) +
scale_y_continuous("Number of cases (Kasese)") +
scale_color_discrete(name="",
breaks=c("OriginalTS", "Kalman", "MA", "LOCF", "Interpolation"),
labels=c("Original time series", "Kalman smoothing", "Weighted moving average",
"Last observation carried forward", "Interpolation")) +
scale_x_continuous("") + annotate("text", x = c(16,19), y = 28, label = "*") ### Rebecca please add these annotations for flooding reported for (week 16 in 2013, 19 in 2014, 17 & 27 in 2015, 15 in 2016)
imput_uganda_plot <- ggplot(imp_dataset_uganda1, aes(x=Week, y=value, col=variable)) +
geom_line(stat = "identity") +
facet_grid(.~Year) +
scale_y_continuous("Number of cases (Uganda)") +
scale_color_discrete(name="",
breaks=c("OriginalTS", "Kalman", "MA", "LOCF", "Interpolation"),
labels=c("Original time series", "Kalman smoothing", "Weighted moving average",
"Last observation carried forward", "Interpolation")) +
scale_x_continuous("")
rain_plot <- ggplot(FULL_MOH_na_2016_rain, aes(x = Week, y = value, col=variable)) +
geom_line(stat = "identity") +
scale_y_continuous("Rainfall (mm)") +
facet_grid(.~Year) +
scale_color_discrete(name="",
breaks=c("TotalRain", "MeanRain"),
labels=c("Total rainfall", "Mean rainfall")) +
scale_x_continuous("")
temp_plot <- ggplot(FULL_MOH_na_2016_temp, aes(x = Week, y = value, col=variable)) +
geom_line(stat = "identity") +
scale_y_continuous(expression(paste("Temperature ",(degree*C)))) +
facet_grid(.~Year) +
scale_color_discrete(name="",
breaks=c("maxtempC", "mintempC", "meantempC"),
labels=c("Maximum temperature", "Minimum temperature", "Mean temperature")) +
scale_x_continuous("")
humidity_plot <- ggplot(FULL_MOH_na_2016_hum, aes(x = Week, y = value, col=variable)) +
geom_line(stat = "identity") +
scale_y_continuous("Humidity (%)") +
facet_grid(.~Year) +
scale_color_discrete(name="",
breaks=c("meanhumidity"),
labels=c("Mean humidity"))
#
#
#grid.arrange(imput_kasese_plot, imput_uganda_plot, rain_plot, temp_plot, humidity_plot, nrow = 5, common.legend = TRUE, legend="bottom")----
pdf("../../Figures/Imputed_data.pdf",width = 10,height = 14)
multiplot(imput_kasese_plot, imput_uganda_plot,rain_plot,temp_plot,humidity_plot, cols = 1)
dev.off()
## quartz_off_screen
## 2
ts_meanrain <- ts(FULL_MOH_na_2016$MeanRain)
ts_totalrain <- ts(FULL_MOH_na_2016$TotalRain)
ts_maxtemp <- ts(FULL_MOH_na_2016$maxtempC)
ts_mintemp <- ts(FULL_MOH_na_2016$mintempC)
ts_meantemp <- ts(FULL_MOH_na_2016$meantempC)
ts_meanhum <- ts(FULL_MOH_na_2016$meanhumidity)
####*******************************************
#### TS CROSS CORRELATION WITH WEATHER IN KASESE
####*******************************************
ccf(imp_kalman_kasese_na,FULL_MOH_rain_na_ts,type = "correlation",plot = TRUE)
ccf(imp_kalman_kasese_na,FULL_MOH_rain_na_ts,type = "correlation",plot = TRUE)

ccf(imp_kalman_kasese_na,FULL_MOH_temp_na_ts,type = "correlation",plot = TRUE)

#ccf(imp_kalman_kasese_na,ts_mintemp,type = "correlation",plot = TRUE)
#ccf(imp_kalman_kasese_na,ts_meantemp,type = "correlation",plot = TRUE)
ccf(imp_kalman_kasese_na,FULL_MOH_hum_na_ts,type = "correlation",plot = TRUE)

####*******************************************
#### SEASONALITY ANALYSIS UG AND KASESE
####*******************************************
myts_NAT12 <- ts(FULL_MOH_na_2016$Cases.in.Uganda, start=c(2013, 1), end=c(2017, 1), frequency=12,ts.eps = getOption("ts.eps"), is.ts(x)) ### NATIONAL AT
myts_DIS12 <- ts(FULL_MOH_na_2016$Cases.in.Kasese, start=c(2013, 1), end=c(2017, 1), frequency=12,ts.eps = getOption("ts.eps"), is.ts(x)) ### DISTRICT AT MOTHLY FREQUENCY
myts_RAIN12 <- ts(FULL_MOH_na_2016$TotalRain, start=c(2013, 1), end=c(2017, 1), frequency=12,ts.eps = getOption("ts.eps"), is.ts(x)) ### RAIN AT DISTRICT MONTHLY
myts_TEMP12 <- ts(FULL_MOH_na_2016$maxtempC, start=c(2013, 1), end=c(2017, 1), frequency=12,ts.eps = getOption("ts.eps"), is.ts(x)) ### TEMPERATURE AT DISTRICT MONTHLY
myts_HUM12<- ts(FULL_MOH_na_2016$meanhumidity, start=c(2013, 1), end=c(2017, 1), frequency=12,ts.eps = getOption("ts.eps"), is.ts(x)) ### HUMIDITY AT DISTRICT MONTHLY
# fit_NAT_1 <- ets(myts_NAT12)
# res <- cbind(Residuals = residuals(fit_NAT_1),
# Response.residuals = residuals(fit_NAT_1, type='response'))
# autoplot(res, facets=TRUE)
#
# fit_DIS_1 <- ets(myts_DIS12)
# res <- cbind(Residuals = residuals(fit_DIS_1),
# Response.residuals = residuals(fit_DIS_1, type='response'))
# autoplot(res, facets=T)
imp_kalman_myts_NAT12 <- na.kalman(myts_NAT12) ## National impute season
imp_kalman_myts_DIS12 <- na.kalman(myts_DIS12) ## Kasese impute season
R5<-ggseasonplot(imp_kalman_myts_NAT12, polar=TRUE) + theme_bw() + labs(title= "Typhoid fever in Uganda ")
R1<-ggseasonplot(imp_kalman_myts_DIS12, polar=TRUE) + theme_bw() + labs(title= "Typhoid fever in Kasese District")
R2<-ggseasonplot(myts_RAIN12, polar=TRUE) + theme_bw() + labs(title= "Rainfall in Kasese District")
R3<-ggseasonplot(myts_TEMP12, polar=TRUE) + theme_bw() + labs(title= "Temperature in Kasese District")
R4<-ggseasonplot(myts_HUM12, polar=TRUE) + theme_bw() + labs(title= "Humidity in Kasese District")
multiplot(R1,R2,R3,R4, cols = 2)

pdf("../../Figures/Seasonality_Kasese.pdf",width = 8,height = 6)
multiplot(R1,R2,R3,R4, cols = 2)
dev.off()
## quartz_off_screen
## 2
####**********************
#### TS MODEL FITTING
####**********************
fit_kas<-auto.arima(imp_kalman_kasese_na) ### Kasese typhoid data TS fitting using kalman imputed data
checkresiduals(fit_kas)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)
## Q* = 73.2, df = 103, p-value = 0.9884
##
## Model df: 1. Total lags used: 104
fit_UG<-auto.arima(imp_kalman_Uganda_na) ### Kasese typhoid data TS fitting using kalman imputed data
checkresiduals(fit_UG)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,2)(0,0,1)[52]
## Q* = 97.095, df = 101, p-value = 0.5915
##
## Model df: 3. Total lags used: 104
####**********************
#### FORCASTING####
####**********************
Kas_for_17_intp <- forecast(object = imp_interpolation_kasese_na, h = 16, level=c(90,95))
Kas_for_17_kal <- forecast(object = imp_kalman_kasese_na, h = 16, level=c(90,95))
Kas_for_17_ma <- forecast(object = imp_ma_kasese_na, h = 16, level=c(90,95))
Ug_for_17_intp <- forecast(object = imp_interpolation_Uganda_na, h = 16, level=c(90,95))
Ug_for_17_kal <- forecast(object = imp_kalman_Uganda_na, h = 16, level=c(90,95))
Ug_for_17_ma <- forecast(object = imp_ma_Uganda_na, h = 16, level=c(90,95))
Plot_Kas_for_17<-autoplot(Kas_for_17_intp , series="interpolation 2013-2016") +
theme_bw() + labs(title=" A Forecast of Typhoid cases in Kasese district for the 1st quarter of 2017", y= "Typhoid cases") +
autolayer(Kas_for_17_kal, series="Forecast Kalman") +
autolayer(FULL_MOH_2017_ts, series="2017 DATA") +
autolayer(fitted(Kas_for_17_ma), series="Forecast Moving Average") +
autolayer(fitted(Kas_for_17_intp), series="Forecast Interpolation")+
autolayer(fitted(Kas_for_17_kal), series="fitted")
Plot_Ug_for_17<-autoplot(Ug_for_17_intp , series="interpolation 2013-2016") +
theme_bw() + labs(title=" A Forecast of Typhoid cases in Uganda for the 1st quarter of 2017", y= "Typhoid cases") +
autolayer(Ug_for_17_kal, series="Forecast Kalman") +
autolayer(FULL_MOH_2017_UG_ts, series="2017 DATA") +
autolayer(fitted(Ug_for_17_ma), series="Forecast Moving Average") +
autolayer(fitted(Ug_for_17_intp), series="Forecast Interpolation")+
autolayer(fitted(Ug_for_17_intp), series="fitted")
multiplot(Plot_Ug_for_17,Plot_Kas_for_17, cols = 1)

pdf("../../Figures/Forecasting.pdf",width = 12,height = 6)
multiplot(Plot_Ug_for_17,Plot_Kas_for_17, cols = 1)
dev.off()
## quartz_off_screen
## 2
#http://sma.epfl.ch/~lbelzile/math342/pract_sup.html
#======
#Non-Linear regession using R_INLA
#=======
# hist(FULL_MOH_na_2016$Cases.in.Kasese)
# hist(FULL_MOH_na_2016$MeanRain)
# plot(FULL_MOH_na_2016$Cases.in.Kasese, FULL_MOH_na_2016$MeanRain)
#
# #No seasonal affects
# formula <- Cases.in.Kasese ~ 1 + MeanRain
# model.linear_rain <- inla(formula, family="gaussian", data= FULL_MOH_na_2016, control.predictor = list(compute=TRUE))
# round(model.linear_rain$summary.fixed[,1:5],3) #Estimates of the marginal posterior mean
# plot(model.linear_rain$marginals.fixed$`(Intercept)`[,1], model.linear_rain$marginals.fixed$`(Intercept)`[,2]) #Posterior density plot for the intercept
# plot(model.linear_rain$marginals.fixed$`MeanRain`[,1], model.linear_rain$marginals.fixed$`MeanRain`[,2]) #Posterior density plot for the slope
# res.lin <- (FULL_MOH_na_2016$Cases.in.Kasese - model.linear_rain$summary.fitted.values$mean) / model.linear_rain$summary.fitted.values$sd #Standardized residuals - the model departs from the observations
# plot(res.lin,ylim=c(-15,20),main="",xlab="Weeks",ylab=expression((y[i] - hat(mu[i]))/hat(sigma[i])),xaxt="n") #Could possibly be a pattern??
#
#
#
# #Seaonal affects
# formula2 <- Cases.in.Kasese ~ 1 + MeanRain + Month
# model.linear_rain2 <- inla(formula2, family="gaussian", data= FULL_MOH_na_2016, control.predictor = list(compute=TRUE))
# res.lin2 <- (FULL_MOH_na_2016$Cases.in.Kasese - model.linear_rain2$summary.fitted.values$mean) / model.linear_rain2$summary.fitted.values$sd #Standardized residuals
# plot(res.lin2,ylim=c(-10,10),main="",xlab="Weeks",ylab=expression((y[i] - hat(mu[i]))/hat(sigma[i])),xaxt="n") #Could possibly be a pattern??
#
# #Seasonal affects with a random walk
# formula3 <- Cases.in.Kasese ~ 1 + MeanRain + Month + f(row.names(FULL_MOH_na_2016), model="rw2", hyper = list(prec = list(prior="loggamma", param=c(1,0.01))))
# model.linear_rain3 <- inla(formula3, family="gaussian", data= FULL_MOH_na_2016, control.predictor = list(compute=TRUE))
# res.lin3 <- (FULL_MOH_na_2016$Cases.in.Kasese - model.linear_rain3$summary.fitted.values$mean) / model.linear_rain3$summary.fitted.values$sd #Standardized residuals
# plot(res.lin3,ylim=c(-10,10),main="",xlab="Weeks",ylab=expression((y[i] - hat(mu[i]))/hat(sigma[i])),xaxt="n")