# an initial look at some study area summaries... library(ggplot2) library(sf) library("rnaturalearth") library("rnaturalearthdata") library(rgeos) library(ggpubr) panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) { usr <- par("usr"); on.exit(par(usr)) par(usr = c(0, 1, 0, 1)) r <- (cor(x, y, use="pairwise")) txt <- format(c(r, 0.123456789), digits = digits)[1] txt <- paste0(prefix, txt) if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt) text(0.5, 0.5, txt, cex = cex.cor) } civ <- ne_countries(scale = "medium", returnclass = "sf", country="Ivory Coast") # area of individual plants in each study area indplants <- read.csv("KLH_Civ_individual_plants_poly.csv") head(indplants) table(indplants$field_id) # aggregate number of plants per field plants <- data.frame(aggregate(area_m2 ~ field_id, FUN=sum, data=indplants), table(indplants$field_id)) names(plants) <- c("field_id", "tot_area_ind_plants", "x", "tot_ind_plants") plants <- plants[, c("field_id", "tot_area_ind_plants", "tot_ind_plants")] # number of buildings / inhabitants in each study area inhabit <- read.csv("KLH_Civ_inhabited_pts.csv") head(inhabit) table(inhabit$Subcategor) table(inhabit$FloorNo) table(inhabit$field_id) # aggregate number of buildings per field temp <- data.frame(table(inhabit$field_id, inhabit$FloorNo)) names(temp) <- c("field_id", "FloorNo", "Freq") temp2 <- reshape(temp, direction="wide", v.names="Freq", timevar="FloorNo", idvar="field_id") temp2$tot <- temp2$Freq.1 + temp2$Freq.2 buildings <- temp2 names(buildings)<- c("field_id", "1story_buildings", "2story_buildings", "tot_buildings") # cassava production models. also includes area of each study area models <- read.csv("KLH_Civ_study_area_centre_sample.csv") head(models) # cassava cultivation cassava <- read.csv("KLH_Civ_cassava_fields_poly.csv") head(cassava) table(cassava$Name) table(cassava$Name, cassava$Subcategor) # aggregate to area in monoculture and area in intercropping per field, total cassava area, what is intercropped, number of fields - bit dodgy due to intersecting polygons... c1 <- aggregate(area_m2 ~ field_id, data=cassava, FUN=sum) names(c1) <- c("field_id", "tot_cassava_area") c2 <- aggregate(area_m2 ~ field_id, data=cassava[cassava$Name == "Cassava Monoculture", ], FUN=sum) names(c2) <- c("field_id", "tot_monoculture_area") c3 <- aggregate(area_m2 ~ field_id, data=cassava[cassava$Name == "Cassava", ], FUN=sum) names(c3) <- c("field_id", "tot_intercrop_area") c4 <- data.frame(table(cassava$field_id)) names(c4) <- c("field_id", "no_fields") c5 <- aggregate(cbind(Banana, Pineapple, Palm.oil, Maize, Papaya, Avocado, Sugar.cane, Cacao, Ivory.Palm, Guava, Eggplant, Cashew, Coffee, Peanut, Okra, Pepper, Mango, Coconut, Rubber, Taro, Lemon, Chilli, Palm.Tree, Soursop, Popo) ~ field_id, data=cassava, FUN=sum, na.rm=TRUE, na.action=na.pass) fields <- merge(c1, c2, by="field_id", all.x=TRUE, all.y=TRUE) fields <- merge(fields, c3, by="field_id", all.x=TRUE, all.y=TRUE) fields <- merge(fields, c4, by="field_id", all.x=TRUE, all.y=TRUE) fields <- merge(fields, c5, by="field_id", all.x=TRUE, all.y=TRUE) # check field ids plants$field_id[which(!plants$field_id %in% models$field_id)] buildings$field_id[which(!buildings$field_id %in% models$field_id )] fields$field_id[which(!fields$field_id %in% models$field_id )] dim(models) dim(plants) dat_civ <- merge(models, plants, by="field_id", all.x=TRUE, all.y=TRUE) dim(dat_civ) dim(buildings) dat_civ <- merge(dat_civ, buildings, by="field_id", all.x=TRUE, all.y=TRUE) dim(dat_civ) dat_civ <- merge(dat_civ, fields, by="field_id", all.x=TRUE, all.y=TRUE) dim(dat_civ) ggplot(civ) + geom_sf() + geom_point(data=dat_civ, aes(x=Centre_Lon, y=Centre_Lat)) + theme_bw() + labs(x="Longitude", y="Latitude") g1 <- ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, size=area_m2, colour=area_m2)) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(colour="none") + coord_fixed() g2 <- ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, size=Population, colour=Population)) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(colour="none") + coord_fixed() g1 # ggarrange(g1, g2, nrow=1, ncol=2) # look at population information ggplot(dat_civ, aes(x=tot_buildings, y=log10(Population))) + geom_point() + labs(x="Number of buildings per sample site", y="Estimated population from LandScan (log10)") ggplot(dat_civ, aes(x=`1story_buildings`, y=log10(Population))) + geom_point() + labs(x="Number of buildings per sample site", y="Estimated population from LandScan (log10)") # look at cassava production p1 <- ggplot(dat_civ, aes(x=tot_cassava_area)) + geom_histogram(bins=10, fill="grey",color="black") + labs(x="Total field area of\n cassava production (m2)") p2 <- ggplot(dat_civ, aes(x=tot_monoculture_area)) + geom_histogram(bins=10, fill="grey",color="black") + labs(x="Total field area of\n monoculture cassava production (m2)") p3 <- ggplot(dat_civ, aes(x=tot_intercrop_area)) + geom_histogram(bins=10, fill="grey",color="black") + labs(x="Total field area of\n intercropped cassava production (m2)") p4 <- ggplot(dat_civ, aes(x=tot_area_ind_plants)) + geom_histogram(bins=10, fill="grey",color="black") + labs(x="Total area of\n individual cassava plants (m2)") ggarrange(p1,p4,p2,p3,nrow=2,ncol=2) ggplot(dat_civ, aes(x=tot_monoculture_area, y=tot_intercrop_area)) + geom_point() pairs(dat_civ[, c("tot_cassava_area","tot_monoculture_area", "tot_intercrop_area", "tot_area_ind_plants", "Cass_Prod", "Cass_HA", "SPAM2010")],lower.panel = panel.cor) g3 <- ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, size=Cass_Prod, colour=Cass_Prod)) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(colour="none") + coord_fixed() g4 <- ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, size=Cass_HA, colour=Cass_HA)) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(colour="none") + coord_fixed() g5 <- ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, size=SPAM2010, colour=SPAM2010)) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(colour="none") + coord_fixed() ggarrange(g3,g4,g5, nrow=1, ncol=3) q1 <- ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, size=tot_cassava_area, colour=tot_cassava_area)) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(colour="none") + coord_fixed() q4 <- ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, size=tot_area_ind_plants, colour=tot_area_ind_plants)) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(colour="none") + coord_fixed() q2 <- ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, size=tot_monoculture_area, colour=tot_monoculture_area)) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(colour="none") + coord_fixed() q3 <- ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, size=tot_intercrop_area, colour=tot_intercrop_area)) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(colour="none") + coord_fixed() ggarrange(q1,q4,q2,q3,nrow=2,ncol=2) # take a look at the intercropping... colSums(dat_civ[, c("Banana", "Pineapple", "Palm.oil", "Maize", "Papaya", "Avocado", "Sugar.cane", "Cacao", "Ivory.Palm", "Guava", "Eggplant", "Cashew", "Coffee", "Peanut", "Okra", "Pepper", "Mango", "Coconut", "Rubber", "Taro", "Lemon", "Chilli", "Palm.Tree", "Soursop", "Popo")], na.rm=TRUE) # cassava production vs population pairs(dat_civ[, c("tot_buildings","Population", "Cass_Prod", "Cass_HA", "SPAM2010")],lower.panel = panel.cor) pairs(dat_civ[, c("tot_buildings","Population", "tot_cassava_area","tot_monoculture_area", "tot_intercrop_area", "tot_area_ind_plants")],lower.panel = panel.cor)