# Round 2.... # CIV library(ggplot2) library(sf) library("rnaturalearth") library("rnaturalearthdata") library(rgeos) library(ggpubr) library(lattice) library(gstat) library(sp) library(mgcv) panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) { usr <- par("usr"); on.exit(par(usr)) par(usr = c(0, 1, 0, 1)) r <- (cor(x, y, use="pairwise")) txt <- format(c(r, 0.123456789), digits = digits)[1] txt <- paste0(prefix, txt) if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt) text(0.5, 0.5, txt, cex = cex.cor) } civ <- ne_countries(scale = "medium", returnclass = "sf", country="Ivory Coast") # area of individual plants in each study area indplants <- read.csv("KLH_Civ_individual_plants_poly.csv") head(indplants) table(indplants$field_id) # aggregate number of plants per field plants <- data.frame(aggregate(area_m2 ~ field_id, FUN=sum, data=indplants), table(indplants$field_id)) names(plants) <- c("field_id", "tot_area_ind_plants", "x", "tot_ind_plants") plants <- plants[, c("field_id", "tot_area_ind_plants", "tot_ind_plants")] # number of buildings / inhabitants in each study area inhabit <- read.csv("KLH_Civ_inhabited_pts.csv") head(inhabit) table(inhabit$Subcategor) table(inhabit$FloorNo) table(inhabit$field_id) # aggregate number of buildings per field temp <- data.frame(table(inhabit$field_id, inhabit$FloorNo)) names(temp) <- c("field_id", "FloorNo", "Freq") temp2 <- reshape(temp, direction="wide", v.names="Freq", timevar="FloorNo", idvar="field_id") temp2$tot <- temp2$Freq.1 + temp2$Freq.2 buildings <- temp2 names(buildings)<- c("field_id", "1story_buildings", "2story_buildings", "tot_buildings") buildings$field_id <- as.numeric(buildings$field_id) # cassava production models. also includes area of each study area models <- read.csv("Civ_study_area_centre_sample_updated.csv") head(models) # cassava cultivation cassava <- read.csv("KLH_Civ_cassava_fields_poly.csv") head(cassava) table(cassava$Name) table(cassava$Name, cassava$Subcategor) table(cassava$Density) table(cassava$Density, cassava$field_id) table(cassava$Density, cassava$Subcategor) # aggregate to area in monoculture and area in intercropping per field, total cassava area, what is intercropped, number of fields - bit dodgy due to intersecting polygons... c1 <- aggregate(area_m2 ~ field_id, data=cassava, FUN=sum) names(c1) <- c("field_id", "tot_cassava_area") c2 <- aggregate(area_m2 ~ field_id, data=cassava[cassava$Name == "Cassava Monoculture", ], FUN=sum) names(c2) <- c("field_id", "tot_monoculture_area") c3 <- aggregate(area_m2 ~ field_id, data=cassava[cassava$Name == "Cassava", ], FUN=sum) names(c3) <- c("field_id", "tot_intercrop_area") c4 <- data.frame(table(cassava$field_id)) names(c4) <- c("field_id", "no_fields") c5 <- aggregate(cbind(Banana, Pineapple, Palm.oil, Maize, Papaya, Avocado, Scivr.cane, Cacao, Ivory.Palm, Guava, Eggplant, Cashew, Coffee, Peanut, Okra, Pepper, Mango, Coconut, Rubber, Taro, Lemon, Chilli, Palm.Tree, Soursop, Popo) ~ field_id, data=cassava, FUN=sum, na.rm=TRUE, na.action=na.pass) fields <- merge(c1, c2, by="field_id", all.x=TRUE, all.y=TRUE) fields <- merge(fields, c3, by="field_id", all.x=TRUE, all.y=TRUE) fields <- merge(fields, c4, by="field_id", all.x=TRUE, all.y=TRUE) fields <- merge(fields, c5, by="field_id", all.x=TRUE, all.y=TRUE) # so, can we refine the estimate of cassava production from the study sites... # issue 1. area sampled is different # scale area by size sampled areaSampled <- models$area_m2 # issue 2. different recorded densities # look at how many field sampled per site hist(table(cassava$field_id), breaks=-.5:35.5, col="grey", xlab="Number of separate fields in Cassava producing sites", main="") as.data.frame(table(cassava$Density)) # issue 3. individual plants barplot(table(plants$tot_ind_plants), xlab="Number of individual plants", ylab="Number of fields") plot(plants$tot_area_ind_plants, plants$tot_monoculture_area) # issue 4. cropping table(cassava$Name) table(cassava$Name, cassava$field_id) propTable <- as.matrix(table(cassava$Name, cassava$field_id)) / matrix(colSums(as.matrix(table(cassava$Name, cassava$field_id))), nrow=2, ncol=51, byrow=TRUE) levelplot(as.matrix(table(cassava$Name, cassava$field_id)), asp=1, xlab="", ylab="Field site", scales=list(y=list(lab=NULL)), main="Number of Fields") levelplot(propTable, asp=1, xlab="", ylab="Field site", scales=list(y=list(lab=NULL)), main="Proportion of Fields", col.regions=heat.colors(50)) hist(propTable[1,], col="grey", main="", xlab="Proportion of Intercropping Fields", breaks=10) cassava$weights <- 1 cassave$weights[cassava$Density == "dense"] <- 1.5 cassava$weights[cassava$Desnity == "dense..?"] <- 1.5 cassava$weights[cassava$Desnity == "densea"] <- 1.5 cassava$weights[cassava$Desnity == "regular/sparse"] <- 0.75 cassava$weights[cassava$Desnity == "sparce"] <- 0.5 cassava$weights[cassava$Desnity == "sparse"] <- 0.5 cassava$weights[cassava$Desnity == "sparse and uprooted"] <- 0.5 cassava$weights[cassava$Desnity == "sparse but"] <- 0.5 cassava$weights[cassava$Desnity == "sparse2aqqw1"] <- 0.5 cassava$weights[cassava$Desnity == "very dense"] <- 1.75 cassava$weights[cassava$Desnity == "very sparce"] <- 0.25 cassava$weights[cassava$Desnity == "very sparse"] <- 0.25 cassava$area_w <- cassava$area_m2 * cassava$weights c1 <- aggregate(area_w ~ field_id, data=cassava, FUN=sum) names(c1) <- c("field_id", "tot_cassava_area_w") c2 <- aggregate(area_w ~ field_id, data=cassava[cassava$Name == "Cassava Monoculture", ], FUN=sum) names(c2) <- c("field_id", "tot_monoculture_area_w") c3 <- aggregate(area_w ~ field_id, data=cassava[cassava$Name == "Cassava", ], FUN=sum) names(c3) <- c("field_id", "tot_intercrop_area_w") fields_w <- merge(c1, c2, by="field_id", all.x=TRUE, all.y=TRUE) fields_w <- merge(fields_w, c3, by="field_id", all.x=TRUE, all.y=TRUE) # check field ids plants$field_id[which(!plants$field_id %in% models$field_id)] buildings$field_id[which(!buildings$field_id %in% models$field_id )] fields$field_id[which(!fields$field_id %in% models$field_id )] fields_w$field_id[which(!fields_w$field_id %in% models$field_id )] dim(models) dim(plants) dat_civ <- merge(models, plants, by="field_id", all.x=TRUE, all.y=TRUE) dim(dat_civ) dim(buildings) dat_civ <- merge(dat_civ, buildings, by="field_id", all.x=TRUE, all.y=TRUE) dim(dat_civ) dat_civ <- merge(dat_civ, fields, by="field_id", all.x=TRUE, all.y=TRUE) dim(dat_civ) dat_civ <- merge(dat_civ, fields_w, by="field_id", all.x=TRUE, all.y=TRUE) dim(dat_civ) # where no Cassava production was recorded, replace NAs by 0 names(dat_civ) which(!models$field_id %in% plants$field_id) which(is.na(dat_civ$tot_area_ind_plants)) dat_civ[which(!models$field_id %in% plants$field_id), "tot_area_ind_plants"] <- 0 dat_civ[which(!models$field_id %in% plants$field_id), "tot_ind_plants"] <- 0 which(!models$field_id %in% buildings$field_id) which(is.na(dat_civ$`1story_buildings`)) which(is.na(dat_civ$`2story_buildings`)) which(is.na(dat_civ$tot_buildings)) dat_civ[which(!models$field_id %in% buildings$field_id), "1story_buildings"] <- 0 dat_civ[which(!models$field_id %in% buildings$field_id), "2story_buildings"] <- 0 dat_civ[which(!models$field_id %in% buildings$field_id), "tot_buildings"] <- 0 which(is.na(dat_civ$tot_cassava_area)) which(is.na(dat_civ$tot_intercrop_area)) which(is.na(dat_civ$tot_monoculture_area)) which(!models$field_id %in% fields$field_id) dat_civ[which(!models$field_id %in% fields$field_id), c("tot_cassava_area", "tot_monoculture_area", "tot_intercrop_area", "no_fields")] <- 0 dat_civ[which(is.na(dat_civ$tot_intercrop_area)), "tot_intercrop_area"] <- 0 dat_civ[which(is.na(dat_civ$tot_monoculture_area)), "tot_monoculture_area"] <- 0 dat_civ[which(!models$field_id %in% fields$field_id), c("Banana", "Pineapple", "Palm.oil","Maize", "Papaya","Avocado", "Scivr.cane", "Cacao", "Ivory.Palm", "Guava","Eggplant","Cashew","Coffee", "Peanut","Okra","Pepper" ,"Mango", "Coconut", "Rubber","Taro","Lemon","Chilli","Palm.Tree","Soursop","Popo")] <- 0 dat_civ[which(!models$field_id %in% fields_w$field_id), c("tot_cassava_area_w", "tot_monoculture_area_w", "tot_intercrop_area_w")] <- 0 dat_civ[which(is.na(dat_civ$tot_intercrop_area_w)), "tot_intercrop_area_w"] <- 0 dat_civ[which(is.na(dat_civ$tot_monoculture_area_w)), "tot_monoculture_area_w"] <- 0 dat_civ$tot_cassava_area <- dat_civ$tot_area_ind_plants + dat_civ$tot_intercrop_area + dat_civ$tot_monoculture_area dat_civ$tot_cassava_area_w <- (dat_civ$tot_area_ind_plants + dat_civ$tot_intercrop_area_w*0.75 + dat_civ$tot_monoculture_area_w) / dat_civ$area_m2 # create a Cassava presence/absence category dat_civ$cass_area_presence <- factor(dat_civ$tot_cassava_area > 0) table(dat_civ$cass_area_presence) table(dat_civ$tot_cassava_area_w > 0) ggplot(dat_civ, aes(x=tot_cassava_area, y=tot_cassava_area_w)) + geom_point() hist(dat_civ$tot_cassava_area_w, col="grey", breaks=30, xlab="Weighted Cassava Production (per m2)", main="") ggplot(civ) + geom_sf() + geom_point(data=dat_civ, aes(x=Centre_Lon, y=Centre_Lat)) + theme_bw() + labs(x="Longitude", y="Latitude") # Spatial Cassava production ---- ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, colour=cass_area_presence)) + geom_point(size=2) + labs(x="Longitude", y="Latitude") + theme_bw() + guides(size="none") + coord_fixed() ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, size=tot_cassava_area_w, colour=tot_cassava_area_w)) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(colour="none") + coord_fixed() ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, size=tot_cassava_area_w, colour=log(tot_cassava_area_w))) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(size="none") + coord_fixed() ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, size=tot_intercrop_area_w, colour=log(tot_intercrop_area_w))) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(size="none") + coord_fixed() ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, size=tot_monoculture_area_w, colour=log(tot_monoculture_area_w))) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(size="none") + coord_fixed() ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, size=tot_area_ind_plants, colour=log(tot_area_ind_plants))) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(size="none") + coord_fixed() # Cassava production by region... offset <- min(dat_civ$tot_cassava_area_w[dat_civ$tot_cassava_area_w > 0]) / 2 names(dat_civ) # district and province table(dat_civ$District) table(dat_civ$Province) table(dat_civ$admin_name) ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, colour=admin_name)) + geom_point(size=2) + labs(x="Longitude", y="Latitude") + theme_bw() + guides(size="none") + coord_fixed() ggplot(dat_civ, aes(x=admin_name, y=log(tot_cassava_area_w +offset), colour=admin_name)) + geom_point() a1 <- lm(log(tot_cassava_area_w +offset) ~ admin_code, data=dat_civ) anova(a1) par(mfrow=c(2,2)) plot(a1) gm <- gam(log(tot_cassava_area_w + offset) ~s(Centre_Lon) + s(Centre_Lat), data=dat_civ) par(mfrow=c(2,2)) gam.check(gm) par(mfrow=c(1,2)) plot(gm) # Cassava production (modelled vs sampled) ----- table(dat_civ$cass_area_presence) g1 <- ggplot(dat_civ, aes(x=cass_area_presence, y=log(Cass_HA))) + geom_boxplot() g2 <- ggplot(dat_civ, aes(x=cass_area_presence, y=log(Cass_Prod))) + geom_boxplot() g3 <- ggplot(dat_civ, aes(x=cass_area_presence, y=log(SPAM2010))) + geom_boxplot() g4 <- ggplot(dat_civ, aes(x=cass_area_presence, y=log(MapSPAM_HA))) + geom_boxplot() ggarrange(g1,g2,g3,g4) # t.tests offset <- min(dat_civ$Cass_HA[dat_civ$Cass_HA > 0], na.rm=TRUE)/2 # do a welch t-test t.test(log(Cass_HA + offset) ~ cass_area_presence, data=dat_civ) offset <- min(dat_civ$Cass_Prod[dat_civ$Cass_Prod > 0], na.rm=TRUE)/2 t.test(log(Cass_Prod + offset) ~ cass_area_presence, data=dat_civ) t.test(log(SPAM2010) ~ cass_area_presence, data=dat_civ) t.test(log(MapSPAM_HA) ~ cass_area_presence, data=dat_civ) offset <- min(dat_civ$tot_cassava_area_w[dat_civ$tot_cassava_area_w > 0]) / 2 g1 <- ggplot(dat_civ, aes(x=tot_cassava_area_w, y=Cass_HA)) + geom_point() g2 <- ggplot(dat_civ, aes(x=tot_cassava_area_w, y=Cass_Prod)) + geom_point() g3 <- ggplot(dat_civ, aes(x=tot_cassava_area_w, y=SPAM2010)) + geom_point() g4 <- ggplot(dat_civ, aes(x=tot_cassava_area_w, y=MapSPAM_HA)) + geom_point() ggarrange(g1,g2,g3,g4, nrow=1, ncol=4) g1 <- ggplot(dat_civ, aes(x=tot_monoculture_area_w, y=Cass_HA)) + geom_point() g2 <- ggplot(dat_civ, aes(x=tot_monoculture_area_w, y=Cass_Prod)) + geom_point() g3 <- ggplot(dat_civ, aes(x=tot_monoculture_area_w, y=SPAM2010)) + geom_point() g4 <- ggplot(dat_civ, aes(x=tot_monoculture_area_w, y=MapSPAM_HA)) + geom_point() ggarrange(g1,g2,g3,g4, nrow=1, ncol=4) g1 <- ggplot(dat_civ, aes(x=tot_intercrop_area_w, y=Cass_HA)) + geom_point() g2 <- ggplot(dat_civ, aes(x=tot_intercrop_area_w, y=Cass_Prod)) + geom_point() g3 <- ggplot(dat_civ, aes(x=tot_intercrop_area_w, y=SPAM2010)) + geom_point() g4 <- ggplot(dat_civ, aes(x=tot_intercrop_area_w, y=MapSPAM_HA)) + geom_point() ggarrange(g1,g2,g3,g4, nrow=1, ncol=4) g1 <- ggplot(dat_civ, aes(x=tot_area_ind_plants, y=Cass_HA)) + geom_point() g2 <- ggplot(dat_civ, aes(x=tot_area_ind_plants, y=Cass_Prod)) + geom_point() g3 <- ggplot(dat_civ, aes(x=tot_area_ind_plants, y=SPAM2010)) + geom_point() g4 <- ggplot(dat_civ, aes(x=tot_area_ind_plants, y=MapSPAM_HA)) + geom_point() ggarrange(g1,g2,g3,g4, nrow=1, ncol=4) offset <- min(dat_civ$Cass_HA[dat_civ$Cass_HA > 0], na.rm=TRUE)/2 gmod1 <- gam(log(Cass_HA + offset) ~s(Centre_Lon) + s(Centre_Lat), data=dat_civ) par(mfrow=c(2,2)) gam.check(gmod1) par(mfrow=c(1,2)) plot(gmod1) title(main="Cass HA", outer=TRUE, line=-2) ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, size=log(Cass_HA+offset), colour=log(Cass_HA+offset))) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(size="none") + coord_fixed() offset <- min(dat_civ$Cass_Prod[dat_civ$Cass_Prod > 0], na.rm=TRUE)/2 gmod2 <- gam(log(Cass_Prod + offset) ~s(Centre_Lon) + s(Centre_Lat), data=dat_civ) par(mfrow=c(2,2)) gam.check(gmod2) par(mfrow=c(1,2)) plot(gmod2) title(main="Cass Prod", outer=TRUE, line=-2) ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, size=log(Cass_Prod+offset), colour=log(Cass_Prod+offset))) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(size="none") + coord_fixed() gmod3 <- gam(log(SPAM2010) ~s(Centre_Lon) + s(Centre_Lat), data=dat_civ) par(mfrow=c(2,2)) gam.check(gmod3) par(mfrow=c(1,2)) plot(gmod3) title(main="SPAM 2010", outer=TRUE, line=-2) ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, size=log(SPAM2010), colour=log(SPAM2010))) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(size="none") + coord_fixed() gmod4 <- gam(log(MapSPAM_HA) ~s(Centre_Lon) + s(Centre_Lat), data=dat_civ) par(mfrow=c(2,2)) gam.check(gmod4) par(mfrow=c(1,2)) plot(gmod4) title(main="MapSPAM_HA", outer=TRUE, line=-2) ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, size=log(MapSPAM_HA), colour=log(MapSPAM_HA))) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(size="none") + coord_fixed() # Cassava production and population ---- ggplot(dat_civ, aes(x=cass_area_presence, y=log10(Population))) + geom_boxplot() t.test(log10(Population) ~ factor(cass_area_presence), data=dat_civ) # fails due to too little variability :-/ # do a mann.whitney instead wilcox.test(log10(Population) ~ factor(cass_area_presence), data=dat_civ) offset <- min(dat_civ$tot_cassava_area_w[dat_civ$tot_cassava_area_w > 0]) / 2 ggplot(dat_civ, aes(x=log(tot_cassava_area_w + offset), y=log10(Population))) + geom_point() ggplot(dat_civ, aes(x=(tot_cassava_area_w ), y=(Population))) + geom_point() ggplot(dat_civ, aes(y=log(tot_cassava_area_w + offset), x=tot_buildings)) + geom_point() g1 <- ggplot(dat_civ, aes(x=Cass_HA, y=Population)) + geom_point() g2 <- ggplot(dat_civ, aes(x=Cass_Prod, y=Population)) + geom_point() g3 <- ggplot(dat_civ, aes(x=MapSPAM_HA, y=Population)) + geom_point() g4 <- ggplot(dat_civ, aes(x=SPAM2010, y=Population)) + geom_point() ggarrange(g1,g2,g3,g4) ggplot(dat_civ, aes(x=tot_buildings, y=Population)) + geom_point() ggplot(dat_civ, aes(x=tot_buildings, y=log(tot_cassava_area_w))) + geom_point() dat_civ$tot_cassava_area_w_build <- dat_civ$tot_cassava_area_w * dat_civ$area_m2 / (dat_civ$area_m2 - dat_civ$tot_buildings*15) ggplot(dat_civ, aes(x=tot_cassava_area_w, y=tot_cassava_area_w_build)) + geom_point() ggplot(dat_civ, aes(x=(tot_cassava_area_w_build ),y=Population)) + geom_point()