### Author: Frederico Brandão
### Date and location: November 2018, Belém, Brazil
### Data source: Own data collected in 2015
### Specifications: Calmaria settlement including non-participants, Agropalma and Biopalma participants
### Enjoy!!

### Import data as csv file in this case a file named SurveysCalmaria
### Please make sure there are no ç, à or ã

setwd("C:/Users/Frederico Brandao/Desktop/R/Data")
mydata = read.csv("SurveysCalmaria.csv", header = TRUE) #sets the dataset

## set up the variables for analysis

myvars = c("palm_pilot", "n_h_members", "hh_age", "hh_highest_education", "h_highest_education", "dependency_ratio", "hh_migrated", "n_activeh_members", "female_active_mem", "n_incomesources", "bolsafamilia", "land_totalsize","asset_index", "asset_index_log","any_h_worked_palmcompany","year_plot","plot_acquisition","n_crops","livestock","MOIndex","assoc_memb","union_memb","perceived_weelbeing", "set_years","n_marketcrops","n_consumptioncrops", "X1_income_source","X2_income_source","X3_income_source")
descriptivedata = mydata[myvars]
summary(descriptivedata)  
attach(descriptivedata)  # to use the dataset as default
palm_pilot =as.factor(palm_pilot) 
levels(palm_pilot)=c("NP","BIO","AG") #sets new labels
## to eliminate 2 observations AG31 and AG32 as they are different
descriptivedata = descriptivedata[descriptivedata$obs!=9,]
descriptivedata = descriptivedata[descriptivedata$obs!=75,]




#### Creating functions
## for descriptive stats
desstats = function(group1,group2) {
mean= round(tapply(group1,group2, mean, na.rm=T), digits=1)
median=round(tapply(group1,group2, median, na.rm=T), digits=1)
sd=round(tapply(group1,group2, sd, na.rm=T), digits=3)
min=round(tapply(group1,group2, min, na.rm=T), digits=0)
max=round(tapply(group1,group2, max, na.rm=T), digits=0)
results=cbind(mean,median,sd,min,max)
return(results)
}
##for anova
anova = function(group1,group2) {
anova1 = aov(group1~group2)
return(anova1)
}

## run descriptive stats
desstats(n_h_members,palm_pilot)
desstats(hh_age,palm_pilot)
desstats(hh_highest_education,palm_pilot)
desstats(h_highest_education,palm_pilot)
desstats(dependency_ratio,palm_pilot)
desstats(hh_migrated,palm_pilot)
desstats(set_years,palm_pilot)
desstats(n_activeh_members,palm_pilot)
desstats(any_h_worked_palmcompany,palm_pilot)
desstats(female_active_mem,palm_pilot)
desstats(n_incomesources,palm_pilot)
desstats(bolsafamilia,palm_pilot)
desstats(assoc_memb,palm_pilot)
desstats(union_memb,palm_pilot)
desstats(land_totalsize,palm_pilot)
desstats(n_crops,palm_pilot)
desstats(n_marketcrops,palm_pilot)
desstats(n_consumptioncrops,palm_pilot)
desstats(livestock,palm_pilot)


desstats(asset_index,palm_pilot)

aov_cont= aov(n_h_members~palm_pilot)
summary(aov_cont)  ## run ANOVA to check F value and p (significance)
aov_cont= aov(hh_age~palm_pilot)
summary(aov_cont)  ## run ANOVA to check F value and p (significance)
aov_cont= aov(hh_highest_education~palm_pilot)
summary(aov_cont)  ## run ANOVA to check F value and p (significance)
aov_cont= aov(h_highest_education~palm_pilot)
summary(aov_cont)  ## run ANOVA to check F value and p (significance)
aov_cont= aov(dependency_ratio~palm_pilot)
summary(aov_cont)  ## run ANOVA to check F value and p (significance)
aov_cont= aov(hh_migrated~palm_pilot)
summary(aov_cont)  ## run ANOVA to check F value and p (significance)
aov_cont= aov(set_years~palm_pilot)
summary(aov_cont)  ## run ANOVA to check F value and p (significance)
tuk=TukeyHSD(aov_cont) ## run post hoc tests to check differences between groups and in this case with p >0.05 is AG-BIO pair
tuk
plot(tuk) ## you can also look at it visually ## significancy exists if there is no overlap with 0 column
aov_cont= aov(n_activeh_members~palm_pilot)
summary(aov_cont)  ## run ANOVA to check F value and p (significance)
## as it is significant we move to some other tests
tuk=TukeyHSD(aov_cont) ## run post hoc tests to check differences between groups and in this case with p >0.05 is AG-BIO pair
tuk
plot(tuk) ## you can also look at it visually ## significancy exists if there is no overlap with 0 column
aov_cont= aov(any_h_worked_palmcompany~palm_pilot)
summary(aov_cont)  ## run ANOVA to check F value and p (significance)
tuk=TukeyHSD(aov_cont) ## run post hoc tests to check differences between groups and in this case with p >0.05 is AG-BIO pair
tuk
plot(tuk) ## you can also look at it visually ## significancy exists if there is no overlap with 0 column
aov_cont= aov(female_active_mem~palm_pilot)
summary(aov_cont)  ## run ANOVA to check F value and p (significance)
aov_cont= aov(n_incomesources~palm_pilot)
summary(aov_cont)  ## run ANOVA to check F value and p (significance)
aov_cont= aov(bolsafamilia~palm_pilot)
summary(aov_cont)  ## run ANOVA to check F value and p (significance)
tuk=TukeyHSD(aov_cont) ## run post hoc tests to check differences between groups and in this case with p >0.05 is AG-BIO pair
tuk
plot(tuk) ## you can also look at it visually ## significancy exists if there is no overlap with 0 column
aov_cont= aov(assoc_memb~palm_pilot)
summary(aov_cont)  ## run ANOVA to check F value and p (significance)
aov_cont= aov(union_memb~palm_pilot)
summary(aov_cont)  ## run ANOVA to check F value and p (significance)
aov_cont= aov(land_totalsize~palm_pilot)
summary(aov_cont)  ## run ANOVA to check F value and p (significance)
aov_cont= aov(n_crops~palm_pilot)
summary(aov_cont)  ## run ANOVA to check F value and p (significance)
tuk=TukeyHSD(aov_cont) ## run post hoc tests to check differences between groups and in this case with p >0.05 is AG-BIO pair
tuk
plot(tuk)
aov_cont= aov(n_marketcrops~palm_pilot)
summary(aov_cont)  ## run ANOVA to check F value and p (significance)
tuk=TukeyHSD(aov_cont) ## run post hoc tests to check differences between groups and in this case with p >0.05 is AG-BIO pair
tuk
plot(tuk)
aov_cont= aov(n_consumptioncrops~palm_pilot)
summary(aov_cont)  ## run ANOVA to check F value and p (significance)
aov_cont= aov(livestock~palm_pilot)
summary(aov_cont)  ## run ANOVA to check F value and p (significance)


aov_cont= aov(asset_index_log~palm_pilot)
summary(aov_cont)  ## run ANOVA to check F value and p (significance)