# 6_modelVsSurvey_baseline library(mgcv) library(ggplot2) library(ggpubr) rm(list=ls()) # get aligned data source('X:/Kirsty Hassall/Anna Szyniszewska/Cassava/Stats/1_alignData.R') # Baseline Model ----- cor(dat_civ[, c("tot_cassava_area","tot_cassava_area_w","tot_monoculture_area","tot_intercrop_area","Cass_Prod","Cass_HA","SPAM2010","MapSPAM_HA")], method="spearman", use="pairwise") cor(dat_civ[, c("tot_cassava_area","tot_cassava_area_w","tot_monoculture_area","tot_intercrop_area","Population")], method="spearman", use="pairwise") # Production in CIV c <- min(dat_civ$Cass_Prod[dat_civ$Cass_Prod > 0], na.rm=TRUE) / 2 m1 <- lm(tot_cassava_area ~ Cass_Prod, data=dat_civ) m2 <- lm(tot_cassava_area ~ log(Cass_Prod + c), data=dat_civ) m4 <- gam(tot_cassava_area ~ s(Cass_Prod), data=dat_civ) AIC(m1) AIC(m2) AIC(m4) par(mfrow=c(2,2)) plot(m2) summary(m2)$adj.r.squared g1 <- ggplot(dat_civ, aes(x=log(Cass_Prod + c), y=tot_cassava_area)) + geom_point() + labs(x="Predicted Cassva production (log)", y="Total Cassava Area") g2 <- ggplot(dat_civ, aes(x=log(Cass_Prod + c), y=tot_cassava_area_w)) + geom_point() + labs(x="Predicted Cassva production (log)", y="Total Cassava Area (weighted)") g3 <- ggplot(dat_civ, aes(x=log(Cass_Prod + c), y=tot_monoculture_area)) + geom_point() + labs(x="Predicted Cassva production (log)", y="Total Monoculture Area") g4 <- ggplot(dat_civ, aes(x=log(Cass_Prod + c), y=tot_intercrop_area)) + geom_point() + labs(x="Predicted Cassva production (log)", y="Total Intercropped Area") ggarrange(g1,g2,g3,g4) # Harvest Area in CIV c <- min(dat_civ$Cass_HA[dat_civ$Cass_HA > 0], na.rm=TRUE) / 2 m1 <- lm(tot_cassava_area ~ Cass_HA, data=dat_civ) m2 <- lm(tot_cassava_area ~ log(Cass_HA + c), data=dat_civ) m4 <- gam(tot_cassava_area ~ s(Cass_HA), data=dat_civ) AIC(m1) AIC(m2) AIC(m4) par(mfrow=c(2,2)) plot(m2) summary(m2)$adj.r.squared # Now looking at cassava presence g1 <- ggplot(dat_civ, aes(y=log(Cass_Prod + c), x=cass_area_presence)) + geom_boxplot() + labs(y="Predicted Cassva production (log)", x="Presence/absence of cassava production") + geom_point() g2 <- ggplot(dat_civ, aes(y=log(Cass_HA + c), x=cass_area_presence)) + geom_boxplot() + labs(y="Predicted Cassva harvest area (log)", x="Presence/absence of cassava production") + geom_point() g3 <- ggplot(dat_civ, aes(y=log(SPAM2010), x=cass_area_presence)) + geom_boxplot() + labs(y="Predicted Cassva production SPAM (log)", x="Presence/absence of cassava production") + geom_point() ggarrange(g1, g2, g3, nrow=1) offset <- min(dat_civ$Cass_Prod[dat_civ$Cass_Prod > 0], na.rm=TRUE)/2 t.test(log(Cass_Prod + offset) ~ cass_area_presence, data=dat_civ) offset <- min(dat_civ$Cass_HA[dat_civ$Cass_HA > 0], na.rm=TRUE)/2 t.test(log(Cass_HA + offset) ~ cass_area_presence, data=dat_civ) # Production in Uganda c <- min(dat_uga$Cass_Prod[dat_uga$Cass_Prod > 0], na.rm=TRUE) / 2 m1 <- lm(tot_cassava_area ~ Cass_Prod, data=dat_uga) m2 <- lm(tot_cassava_area ~ log(Cass_Prod + c), data=dat_uga) m4 <- gam(tot_cassava_area ~ s(Cass_Prod), data=dat_uga) AIC(m1) AIC(m2) AIC(m4) par(mfrow=c(2,2)) plot(m2) summary(m2)$adj.r.squared g1 <- ggplot(dat_uga, aes(x=log(Cass_Prod + c), y=tot_cassava_area)) + geom_point() + labs(x="Predicted Cassva production (log)", y="Total Cassava Area") g2 <- ggplot(dat_uga, aes(x=log(Cass_Prod + c), y=tot_cassava_area_w)) + geom_point() + labs(x="Predicted Cassva production (log)", y="Total Cassava Area (weighted)") g3 <- ggplot(dat_uga, aes(x=log(Cass_Prod + c), y=tot_monoculture_area)) + geom_point() + labs(x="Predicted Cassva production (log)", y="Total Monoculture Area") g4 <- ggplot(dat_uga, aes(x=log(Cass_Prod + c), y=tot_intercrop_area)) + geom_point() + labs(x="Predicted Cassva production (log)", y="Total Intercropped Area") ggarrange(g1,g2,g3,g4) # Harvest Area in Uganda c <- min(dat_uga$Cass_HA[dat_uga$Cass_HA > 0], na.rm=TRUE) / 2 m1 <- lm(tot_cassava_area ~ Cass_HA, data=dat_uga) m2 <- lm(tot_cassava_area ~ log(Cass_HA + c), data=dat_uga) m4 <- gam(tot_cassava_area ~ s(Cass_HA), data=dat_uga) AIC(m1) AIC(m2) AIC(m4) par(mfrow=c(2,2)) plot(m1) summary(m1)$adj.r.squared # Now looking at cassava presence g1 <- ggplot(dat_uga, aes(y=log(Cass_Prod + c), x=cass_area_presence)) + geom_boxplot() + labs(y="Predicted Cassva production (log)", x="Presence/absence of cassava production") + geom_point() g2 <- ggplot(dat_uga, aes(y=log(Cass_HA + c), x=cass_area_presence)) + geom_boxplot() + labs(y="Predicted Cassva harvest area (log)", x="Presence/absence of cassava production") + geom_point() g3 <- ggplot(dat_uga, aes(y=log(SPAM2010), x=cass_area_presence)) + geom_boxplot() + labs(y="Predicted Cassva production SPAM (log)", x="Presence/absence of cassava production") + geom_point() ggarrange(g1, g2, g3, nrow=1) offset <- min(dat_uga$Cass_Prod[dat_uga$Cass_Prod > 0], na.rm=TRUE)/2 t.test(log(Cass_Prod + offset) ~ cass_area_presence, data=dat_uga) offset <- min(dat_uga$Cass_HA[dat_uga$Cass_HA > 0], na.rm=TRUE)/2 t.test(log(Cass_HA + offset) ~ cass_area_presence, data=dat_uga)