# Script to look at large scale spatial trends through the raster buffers library(ggplot2) library(mgcv) library(data.table) library(car) library(predictmeans) rm(list=ls()) # get aligned data source('X:/Kirsty Hassall/Anna Szyniszewska/Cassava/Stats/1_alignData.R') dat_civ$Centre_Lon <- round(dat_civ$Centre_Lon, 7) dat_civ$Centre_Lat <- round(dat_civ$Centre_Lat, 7) dat_uga$Centre_Lon <- round(dat_uga$Centre_Lon, 7) dat_uga$Centre_Lat <- round(dat_uga$Centre_Lat, 7) # create different subsets # remove zeros dat_civ_pres <- dat_civ[dat_civ$tot_cassava_area >0, ] dat_uga_pres <- dat_uga[dat_uga$tot_cassava_area >0, ] # read in rasters cassava_prod <- read.csv("X:/Kirsty Hassall/Anna Szyniszewska/Cassava/Stats/raster_to_points/cassava_prod.txt") # cut down to CIV and UGA and then delete to free up space! range(dat_civ$Centre_Lon) range(dat_civ$Centre_Lat) ind_civ <- cassava_prod$Latitude > 5 & cassava_prod$Latitude < 8 & cassava_prod$Longitude > -6 & cassava_prod$Longitude < -3 civ_prod <- cassava_prod[ind_civ, ] range(dat_uga$Centre_Lon) range(dat_uga$Centre_Lat) ind_uga <- cassava_prod$Latitude > 0 & cassava_prod$Latitude < 3.5 & cassava_prod$Longitude > 31 & cassava_prod$Longitude < 34.5 uga_prod <- cassava_prod[ind_uga, ] ggplot() + geom_point(data=civ_prod, aes(x=Longitude, y=Latitude, colour=GRID_CODE)) + geom_point(data=dat_civ, aes(x=Centre_Lon, y=Centre_Lat), colour="red") ggplot() + geom_point(data=uga_prod, aes(x=Longitude, y=Latitude, colour=GRID_CODE)) + geom_point(data=dat_uga, aes(x=Centre_Lon, y=Centre_Lat), colour="red") rm(cassava_prod) # cassava_ha <- read.csv("X:/Kirsty Hassall/Anna Szyniszewska/Cassava/Stats/raster_to_points/cassava_ha.txt") # # ind_civ <- cassava_ha$Latitude > 5 & cassava_ha$Latitude < 8 & cassava_ha$Longitude > -6 & cassava_ha$Longitude < -3 # civ_ha <- cassava_ha[ind_civ, ] # # ind_uga <- cassava_ha$Latitude > 0 & cassava_ha$Latitude < 3.5 & cassava_ha$Longitude > 31 & cassava_ha$Longitude < 34.5 # uga_ha <- cassava_ha[ind_uga, ] # # ggplot() + geom_point(civ_ha, aes(x=Longitude, y=Latitude, colour=GRID_CODE)) + geom_point(dat_civ, aes(x=Centre_Lon, y=Centre_lat)) # ggplot() + geom_point(uga_ha, aes(x=Longitude, y=Latitude, colour=GRID_CODE)) + geom_point(dat_uga, aes(x=Centre_Lon, y=Centre_lat)) # # rm(cassava_ha) civ_HA_2000 <- read.csv("X:/Kirsty Hassall/Anna Szyniszewska/Cassava/Stats/raster_to_points/civ_ha_2000.txt") civ_HA_5000 <- read.csv("X:/Kirsty Hassall/Anna Szyniszewska/Cassava/Stats/raster_to_points/civ_ha_5000.txt") civ_HA_10000 <- read.csv("X:/Kirsty Hassall/Anna Szyniszewska/Cassava/Stats/raster_to_points/civ_ha_10000.txt") civ_prod_2000 <- read.csv("X:/Kirsty Hassall/Anna Szyniszewska/Cassava/Stats/raster_to_points/civ_prod_2000.txt") civ_prod_5000 <- read.csv("X:/Kirsty Hassall/Anna Szyniszewska/Cassava/Stats/raster_to_points/civ_prod_5000.txt") civ_prod_10000 <- read.csv("X:/Kirsty Hassall/Anna Szyniszewska/Cassava/Stats/raster_to_points/civ_prod_10000.txt") uga_HA_2000 <- read.csv("X:/Kirsty Hassall/Anna Szyniszewska/Cassava/Stats/raster_to_points/uga_ha_2000.txt") uga_HA_5000 <- read.csv("X:/Kirsty Hassall/Anna Szyniszewska/Cassava/Stats/raster_to_points/uga_ha_5000.txt") uga_HA_10000 <- read.csv("X:/Kirsty Hassall/Anna Szyniszewska/Cassava/Stats/raster_to_points/uga_ha_10000.txt") uga_prod_2000 <- read.csv("X:/Kirsty Hassall/Anna Szyniszewska/Cassava/Stats/raster_to_points/uga_prod_2000.txt") uga_prod_5000 <- read.csv("X:/Kirsty Hassall/Anna Szyniszewska/Cassava/Stats/raster_to_points/uga_prod_5000.txt") uga_prod_10000 <- read.csv("X:/Kirsty Hassall/Anna Szyniszewska/Cassava/Stats/raster_to_points/uga_prod_10000.txt") # Survey spaial trends (CIV) ----- ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, colour=cass_area_presence)) + geom_point(size=2) + labs(x="Longitude", y="Latitude") + theme_bw() + guides(size="none") + coord_fixed() ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, size=tot_cassava_area_w, colour=tot_cassava_area_w)) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(size="none", colour=guide_legend(title="Total Cassava Area (weighted)")) + coord_fixed() ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, size=tot_intercrop_area_w, colour=log(tot_intercrop_area_w))) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(size="none") + coord_fixed() ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, size=tot_monoculture_area_w, colour=log(tot_monoculture_area_w))) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(size="none") + coord_fixed() ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, size=tot_area_ind_plants, colour=log(tot_area_ind_plants))) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(size="none") + coord_fixed() # Cassava production by region... offset <- min(dat_civ$tot_cassava_area_w[dat_civ$tot_cassava_area_w > 0]) / 2 # district and province table(dat_civ$District) table(dat_civ$Province) table(dat_civ$admin_name) ggplot(dat_civ, aes(x=Centre_Lon, y=Centre_Lat, colour=admin_name)) + geom_point(size=2) + labs(x="Longitude", y="Latitude") + theme_bw() + guides(size="none") + coord_fixed() + guides(colour=guide_legend(title="Administration")) ggplot(dat_civ, aes(x=admin_name, y=log(tot_cassava_area_w +offset), colour=admin_name)) + geom_point() a1 <- lm(sqrt(tot_cassava_area_w ) ~ admin_code, data=dat_civ) anova(a1) par(mfrow=c(2,2)) plot(a1) # marginal in all data, but what about only analysing presence... a1 <- lm(sqrt(tot_cassava_area_w ) ~ admin_code, data=dat_civ_pres) anova(a1) par(mfrow=c(2,2)) plot(a1) # nope # gam trends gm <- gam(log(tot_cassava_area_w + offset) ~s(Centre_Lon) + s(Centre_Lat), data=dat_civ) par(mfrow=c(2,2)) gam.check(gm) par(mfrow=c(1,2)) plot(gm) # make prettier plot(gm, select=1, xlab="Longitude", ylab="Predicted Smooth") plot(gm, select=2, xlab="Latitude", ylab="Predicted Smooth") # Raster spatial trends (CIV) ---- ggplot(civ_HA_2000, aes(x=Longitude, y=Latitude, colour=GRID_CODE)) + geom_point() ggplot(civ_HA_5000, aes(x=Longitude, y=Latitude, colour=GRID_CODE)) + geom_point() # g1 <- ggplot(civ_ha, aes(x=Longitude, y=Latitude, colour=GRID_CODE)) + geom_point() + theme_bw() + guides(colour=guide_legend(title="Harvest Area")) # g2 <- ggplot(civ_HA_10000, aes(x=Longitude, y=Latitude, colour=GRID_CODE)) + geom_point() + theme_bw() + guides(colour=guide_legend(title="Harvest Area")) # ggarrange(g1, g2) g1 <- ggplot() + geom_point(data=civ_prod, aes(x=Longitude, y=Latitude, colour=GRID_CODE)) + geom_point(data=dat_civ, aes(x=Centre_Lon, y=Centre_Lat), colour="red") + theme_bw() + guides(colour=guide_legend(title="Cassava production")) + coord_fixed() g2 <- ggplot(civ_prod_10000, aes(x=Longitude, y=Latitude, colour=GRID_CODE)) + geom_point() + theme_bw() + guides(colour=guide_legend(title="Cassava production")) + coord_fixed() ggarrange(g1, g2) # look at gam trends in the buffered data offset <- min(civ_prod_10000$GRID_CODE[civ_prod_10000$GRID_CODE > 0], na.rm=TRUE) / 2 gm_prod <- gam(log(GRID_CODE + offset) ~ s(Longitude) + s(Latitude), data=civ_prod_10000) par(mfrow=c(2,2)) gam.check(gm_prod) par(mfrow=c(1,2)) plot(gm_prod) plot(gm_prod, select=1, xlab="Longitude", ylab="Predicted Smooth") plot(gm_prod, select=2, xlab="Latitude", ylab="Predicted Smooth") # same for harvest area offset <- min(civ_HA_10000$GRID_CODE[civ_HA_10000$GRID_CODE > 0], na.rm=TRUE) / 2 gm_ha <- gam(log(GRID_CODE + offset) ~ s(Longitude) + s(Latitude), data=civ_HA_10000) par(mfrow=c(2,2)) gam.check(gm_ha) par(mfrow=c(1,2)) plot(gm_ha) # essentially the same # how does this change if we change the buffer offset <- min(civ_prod_2000$GRID_CODE[civ_prod_2000$GRID_CODE > 0], na.rm=TRUE) / 2 gm_prod <- gam(log(GRID_CODE + offset) ~ s(Longitude) + s(Latitude), data=civ_prod_2000) par(mfrow=c(2,2)) gam.check(gm_prod) par(mfrow=c(1,2)) plot(gm_prod) # not quite as good offset <- min(civ_prod_5000$GRID_CODE[civ_prod_5000$GRID_CODE > 0], na.rm=TRUE) / 2 gm_prod <- gam(log(GRID_CODE + offset) ~ s(Longitude) + s(Latitude), data=civ_prod_5000) par(mfrow=c(2,2)) gam.check(gm_prod) par(mfrow=c(1,2)) plot(gm_prod) # Survey spatial trends (UGA) ----- ggplot(dat_uga, aes(x=Centre_Lon, y=Centre_Lat, colour=cass_area_presence)) + geom_point(size=2) + labs(x="Longitude", y="Latitude") + theme_bw() + guides(size="none") + coord_fixed() ggplot(dat_uga, aes(x=Centre_Lon, y=Centre_Lat, size=tot_cassava_area_w, colour=tot_cassava_area_w)) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(colour="none") + coord_fixed() # colouring on log scale to better distinguish g1 <- ggplot(dat_uga, aes(x=Centre_Lon, y=Centre_Lat, size=tot_cassava_area_w, colour=log(tot_cassava_area_w))) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(size="none") + coord_fixed() + guides(colour=guide_legend(title="Total Cassava Area (weighted)")) g2 <- ggplot(dat_uga, aes(x=Centre_Lon, y=Centre_Lat, size=tot_intercrop_area_w, colour=log(tot_intercrop_area_w))) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(size="none") + coord_fixed() + guides(colour=guide_legend(title="Total Intercrop Area (weighted)")) g3 <- ggplot(dat_uga, aes(x=Centre_Lon, y=Centre_Lat, size=tot_monoculture_area_w, colour=log(tot_monoculture_area_w))) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(size="none") + coord_fixed() + guides(colour=guide_legend(title="Total Monoculture Area (weighted)")) ggarrange(g1,g2,g3, nrow=2, ncol=2) ggplot(dat_uga, aes(x=Centre_Lon, y=Centre_Lat, size=tot_area_ind_plants, colour=log(tot_area_ind_plants))) + geom_point() + labs(x="Longitude", y="Latitude") + theme_bw() + guides(size="none") + coord_fixed() # Cassava production by region... # admin_name , region table(dat_uga$admin_name) table(dat_uga$region) g4 <- ggplot(dat_uga, aes(x=Centre_Lon, y=Centre_Lat, colour=region)) + geom_point(size=2) + labs(x="Longitude", y="Latitude") + theme_bw() + guides(size="none") + coord_fixed() + guides(colour=guide_legend(title="Region")) ggarrange(g1,g2,g3, g4, nrow=2, ncol=2) ggplot(dat_uga, aes(x=region, y=log(tot_cassava_area_w +offset), colour=region)) + geom_boxplot() a1 <- lm(sqrt(tot_cassava_area_w +offset) ~ region, data=dat_uga) anova(a1) par(mfrow=c(2,2)) plot(a1) a1 <- lm(sqrt(tot_monoculture_area_w +offset) ~ region, data=dat_uga) anova(a1) a1 <- lm(sqrt(tot_intercrop_area_w +offset) ~ region, data=dat_uga) anova(a1) # Cassava spatial model... # how about fitting a gam gm <- gam(log(tot_cassava_area_w + offset) ~s(Centre_Lon) + s(Centre_Lat), data=dat_uga) par(mfrow=c(2,2)) gam.check(gm) par(mfrow=c(1,2)) plot(gm) anova(gm) plot(gm, select=1, xlab="Longitude", ylab="Predicted Smooth") plot(gm, select=2, xlab="Latitude", ylab="Predicted Smooth") # Raster spatial trends (UGA) ---- g1 <- ggplot() + geom_point(data=uga_prod, aes(x=Longitude, y=Latitude, colour=GRID_CODE)) + geom_point(data=dat_uga, aes(x=Centre_Lon, y=Centre_Lat), colour="red") + theme_bw() + guides(colour=guide_legend(title="Cassava production")) + coord_fixed() g2 <- ggplot(uga_prod_10000, aes(x=Longitude, y=Latitude, colour=GRID_CODE)) + geom_point() + theme_bw() + guides(colour=guide_legend(title="Cassava production")) + coord_fixed() ggarrange(g1, g2) # look at gam trends in the buffered data offset <- min(uga_prod_10000$GRID_CODE[uga_prod_10000$GRID_CODE > 0], na.rm=TRUE) / 2 gm_prod <- gam(log(GRID_CODE + offset) ~ s(Longitude) + s(Latitude), data=uga_prod_10000) par(mfrow=c(2,2)) gam.check(gm_prod) par(mfrow=c(1,2)) plot(gm_prod) plot(gm_prod, select=1, xlab="Longitude", ylab="Predicted Smooth") plot(gm_prod, select=2, xlab="Latitude", ylab="Predicted Smooth") # same for harvest area offset <- min(uga_HA_10000$GRID_CODE[uga_HA_10000$GRID_CODE > 0], na.rm=TRUE) / 2 gm_ha <- gam(log(GRID_CODE + offset) ~ s(Longitude) + s(Latitude), data=uga_HA_10000) par(mfrow=c(2,2)) gam.check(gm_ha) par(mfrow=c(1,2)) plot(gm_ha) # quite different # how does this change if we change the buffer offset <- min(uga_prod_2000$GRID_CODE[uga_prod_2000$GRID_CODE > 0], na.rm=TRUE) / 2 gm_prod <- gam(log(GRID_CODE + offset) ~ s(Longitude) + s(Latitude), data=uga_prod_2000) par(mfrow=c(2,2)) gam.check(gm_prod) par(mfrow=c(1,2)) plot(gm_prod) # not quite as good offset <- min(uga_prod_5000$GRID_CODE[uga_prod_5000$GRID_CODE > 0], na.rm=TRUE) / 2 gm_prod <- gam(log(GRID_CODE + offset) ~ s(Longitude) + s(Latitude), data=uga_prod_5000) par(mfrow=c(2,2)) gam.check(gm_prod) par(mfrow=c(1,2)) plot(gm_prod)