### Author: Frederico Brandão ### Date and location: November 2018, Belém, Brazil ### Data source: Own data collected in 2015 ### Specifications: Calmaria settlement including non-participants, Agropalma and Biopalma participants ### Enjoy!! ### Import data as csv file in this case a file named SurveysCalmaria ### Please make sure there are no ç, à or ã setwd("C:/Users/Frederico Brandao/Desktop/R/Data") mydata = read.csv("SurveysCalmaria.csv", header = TRUE) #sets the dataset ## set up the variables for analysis myvars = c("palm_pilot", "n_h_members", "hh_age", "hh_highest_education", "h_highest_education", "dependency_ratio", "hh_migrated", "n_activeh_members", "female_active_mem", "n_incomesources", "bolsafamilia", "land_totalsize","asset_index", "asset_index_log","any_h_worked_palmcompany","year_plot","plot_acquisition","n_crops","livestock","MOIndex","assoc_memb","union_memb","perceived_weelbeing", "set_years","n_marketcrops","n_consumptioncrops", "X1_income_source","X2_income_source","X3_income_source") descriptivedata = mydata[myvars] summary(descriptivedata) attach(descriptivedata) # to use the dataset as default palm_pilot =as.factor(palm_pilot) levels(palm_pilot)=c("NP","BIO","AG") #sets new labels ## to eliminate 2 observations AG31 and AG32 as they are different descriptivedata = descriptivedata[descriptivedata$obs!=9,] descriptivedata = descriptivedata[descriptivedata$obs!=75,] #### Creating functions ## for descriptive stats desstats = function(group1,group2) { mean= round(tapply(group1,group2, mean, na.rm=T), digits=1) median=round(tapply(group1,group2, median, na.rm=T), digits=1) sd=round(tapply(group1,group2, sd, na.rm=T), digits=3) min=round(tapply(group1,group2, min, na.rm=T), digits=0) max=round(tapply(group1,group2, max, na.rm=T), digits=0) results=cbind(mean,median,sd,min,max) return(results) } ##for anova anova = function(group1,group2) { anova1 = aov(group1~group2) return(anova1) } ## run descriptive stats desstats(n_h_members,palm_pilot) desstats(hh_age,palm_pilot) desstats(hh_highest_education,palm_pilot) desstats(h_highest_education,palm_pilot) desstats(dependency_ratio,palm_pilot) desstats(hh_migrated,palm_pilot) desstats(set_years,palm_pilot) desstats(n_activeh_members,palm_pilot) desstats(any_h_worked_palmcompany,palm_pilot) desstats(female_active_mem,palm_pilot) desstats(n_incomesources,palm_pilot) desstats(bolsafamilia,palm_pilot) desstats(assoc_memb,palm_pilot) desstats(union_memb,palm_pilot) desstats(land_totalsize,palm_pilot) desstats(n_crops,palm_pilot) desstats(n_marketcrops,palm_pilot) desstats(n_consumptioncrops,palm_pilot) desstats(livestock,palm_pilot) desstats(asset_index,palm_pilot) aov_cont= aov(n_h_members~palm_pilot) summary(aov_cont) ## run ANOVA to check F value and p (significance) aov_cont= aov(hh_age~palm_pilot) summary(aov_cont) ## run ANOVA to check F value and p (significance) aov_cont= aov(hh_highest_education~palm_pilot) summary(aov_cont) ## run ANOVA to check F value and p (significance) aov_cont= aov(h_highest_education~palm_pilot) summary(aov_cont) ## run ANOVA to check F value and p (significance) aov_cont= aov(dependency_ratio~palm_pilot) summary(aov_cont) ## run ANOVA to check F value and p (significance) aov_cont= aov(hh_migrated~palm_pilot) summary(aov_cont) ## run ANOVA to check F value and p (significance) aov_cont= aov(set_years~palm_pilot) summary(aov_cont) ## run ANOVA to check F value and p (significance) tuk=TukeyHSD(aov_cont) ## run post hoc tests to check differences between groups and in this case with p >0.05 is AG-BIO pair tuk plot(tuk) ## you can also look at it visually ## significancy exists if there is no overlap with 0 column aov_cont= aov(n_activeh_members~palm_pilot) summary(aov_cont) ## run ANOVA to check F value and p (significance) ## as it is significant we move to some other tests tuk=TukeyHSD(aov_cont) ## run post hoc tests to check differences between groups and in this case with p >0.05 is AG-BIO pair tuk plot(tuk) ## you can also look at it visually ## significancy exists if there is no overlap with 0 column aov_cont= aov(any_h_worked_palmcompany~palm_pilot) summary(aov_cont) ## run ANOVA to check F value and p (significance) tuk=TukeyHSD(aov_cont) ## run post hoc tests to check differences between groups and in this case with p >0.05 is AG-BIO pair tuk plot(tuk) ## you can also look at it visually ## significancy exists if there is no overlap with 0 column aov_cont= aov(female_active_mem~palm_pilot) summary(aov_cont) ## run ANOVA to check F value and p (significance) aov_cont= aov(n_incomesources~palm_pilot) summary(aov_cont) ## run ANOVA to check F value and p (significance) aov_cont= aov(bolsafamilia~palm_pilot) summary(aov_cont) ## run ANOVA to check F value and p (significance) tuk=TukeyHSD(aov_cont) ## run post hoc tests to check differences between groups and in this case with p >0.05 is AG-BIO pair tuk plot(tuk) ## you can also look at it visually ## significancy exists if there is no overlap with 0 column aov_cont= aov(assoc_memb~palm_pilot) summary(aov_cont) ## run ANOVA to check F value and p (significance) aov_cont= aov(union_memb~palm_pilot) summary(aov_cont) ## run ANOVA to check F value and p (significance) aov_cont= aov(land_totalsize~palm_pilot) summary(aov_cont) ## run ANOVA to check F value and p (significance) aov_cont= aov(n_crops~palm_pilot) summary(aov_cont) ## run ANOVA to check F value and p (significance) tuk=TukeyHSD(aov_cont) ## run post hoc tests to check differences between groups and in this case with p >0.05 is AG-BIO pair tuk plot(tuk) aov_cont= aov(n_marketcrops~palm_pilot) summary(aov_cont) ## run ANOVA to check F value and p (significance) tuk=TukeyHSD(aov_cont) ## run post hoc tests to check differences between groups and in this case with p >0.05 is AG-BIO pair tuk plot(tuk) aov_cont= aov(n_consumptioncrops~palm_pilot) summary(aov_cont) ## run ANOVA to check F value and p (significance) aov_cont= aov(livestock~palm_pilot) summary(aov_cont) ## run ANOVA to check F value and p (significance) aov_cont= aov(asset_index_log~palm_pilot) summary(aov_cont) ## run ANOVA to check F value and p (significance)