# Moth declines are most severe in broadleaf woodlands despite a net gain in habitat availability - Blumgart et al., 2021 # Script_5_species_specific_analysis # March 2021 # This script models the interaction between each trait in each region, habitat and a habitat*region # interaction. # Libraries and functions #### library(dplyr) library(ggplot2) library(gridExtra) library(lme4) library(DHARMa) library(emmeans) library(mgcv) library(poptrend) # Ben Bolker's code to get 95% CIs from mixed effects models. It excludes the random component of the # uncertainty. # Got from here: https://bbolker.github.io/mixedmodels-misc/ecostats_chap.html # - March 2021 easyPredCI <- function(model,newdata=NULL,alpha=0.05) { ## baseline prediction, on the linear predictor (logit) scale: pred0 <- predict(model,re.form=NA,newdata=newdata) ## fixed-effects model matrix for new data X <- model.matrix(formula(model,fixed.only=TRUE)[-2],newdata) beta <- fixef(model) ## fixed-effects coefficients V <- vcov(model) ## variance-covariance matrix of beta pred.se <- sqrt(diag(X %*% V %*% t(X))) ## std errors of predictions ## inverse-link function linkinv <- family(model)$linkinv ## construct 95% Normal CIs on the link scale and ## transform back to the response (probability) scale: crit <- -qnorm(alpha/2) linkinv(cbind(conf.low=pred0-crit*pred.se, conf.high=pred0+crit*pred.se)) } # The anaylis will go as follows: # 1. Species-specific trends - no traits # a) Habitat # b) Region # c) Hab*Region # 2. Feeding_guild # a) Feeding_guild # b) Feeding_guild*Habitat # c) Feeding_guild*Region # d) Feeding_guild*Region*Habitat # 3. Host_plant_light (Ellenberg value for light) # a) Host_plant_light # b) Host_plant_light*Habitat # c) Host_plant_light*Region # d) Host_plant_light*Region*Habitat # Due to sparsity of sites, Region*Habitat will only include broadleaf woodland. So this is # essentially testing whether there is an interaction between traits and region within broadleaf woodland. # # # # Data import #### # For species-specific trends I need the moth trends modeled separately (this is in the same # df as for habitat-specific trends as Habitat == "Overall) Habitat_species_trends <- read.csv("Data/Hab_trends.csv") # For region specific trends I need each species trend modeled in each region Region_species_trends <- read.csv("Data/Region_trends.csv") # For region*habitat interaction I need each species trend modeled in each habitat (only Broadleaf # woodland) in each region Hab_region_species_trends <- read.csv("Data/Hab_region_trends.csv") # Note: in the three data frames imported above, the outliers are already removed. The code # showing how I dealt with the outliers is below. To generate the data complete with outliers # you can run Script 4 to produce these three data frames. # I also need info on species traits Traits <- read.csv("Data/Moth_traits.csv") # Site_info Site_info <- read.csv("Data/Site_info.csv") # # # # Data prep #### # Prepare Habitat_species_trends and Traits for merging names(Habitat_species_trends) names(Traits) Habitat_species_trends$Common_name <- NULL Habitat_species_trends$Binomial <- NULL # Merge on traits Habitat_species_trends_2 <- merge(Habitat_species_trends, Traits, by = "RIS_code", all.x = TRUE) # Prepare Region_species_trends and Traits for merging names(Region_species_trends) Region_species_trends$Common_name <- NULL Region_species_trends$Binomial <- NULL Region_species_trends$X <- NULL # Merge on traits Region_species_trends_2 <- merge(Region_species_trends, Traits, by = "RIS_code", all.x = TRUE) # Prepare Hab_region_species_trends and Traits for merging names(Hab_region_species_trends) Hab_region_species_trends$Common_name <- NULL Hab_region_species_trends$Binomial <- NULL Hab_region_species_trends$X <- NULL # Merge on traits Hab_region_species_trends_2 <- merge(Hab_region_species_trends, Traits, by = "RIS_code", all.x = TRUE) # Swap out -100% trends for -99.99% in all three so that they can be transformed Habitat_species_trends_2$Poptrend_perc_change[Habitat_species_trends_2$Poptrend_perc_change == -100] <- -99.99 Region_species_trends_2$Poptrend_perc_change[Region_species_trends_2$Poptrend_perc_change == -100] <- -99.99 Hab_region_species_trends_2$Poptrend_perc_change[Hab_region_species_trends_2$Poptrend_perc_change == -100] <- -99.99 # Cut out all species*habitat, species*region or species*Hab_region combination with a total sample # size of less than 200. This is an arbitrary cutoff point, but those with very small sample sizes have # very unreliable trends. # Habitat Habitat_species_trends_2 <- Habitat_species_trends_2 %>% dplyr::filter(Sample_size >= 200) # Region Region_species_trends_2 <- Region_species_trends_2 %>% dplyr::filter(Sample_size >= 200) # Hab_region Hab_region_species_trends_2 <- Hab_region_species_trends_2 %>% dplyr::filter(Sample_size >= 200) # Solving outliers #### # Some of the species models resulted in very extreme percentage changes that have an overly strong # influence on the subsequent models. These come about when a species is not recorded at all # in the first part of the time series and they increase later in the time series. As an increase # from zero to any positive number equates to an infinite percentage growth, the resulting percentage # change estimated by the model gets silly. Where this has happened, I reran the model starting at half-way # through the time-series - i.e., 1993. # **YOU cAN SKIP THIS BIT AND GO STARIGHT TO LINE 751 ("Re-reading in data") # THE DATAFRAMES INCLUDED ALREADY HAVE THE OUTLIERS DEALT WITH** # Solving outliers for Habitat #### # What outliers are there? ggplot(Habitat_species_trends_2, aes(x = log(Poptrend_perc_change+100))) + geom_histogram(bins =200) + facet_wrap(~ Habitat) + geom_vline(xintercept = log(100), col = "red") # There is one noticable outlier: # 540 (Blair's Shoulder−knot) Arable # I will take the mid-point of the time-series (1992) and rerun models for Blair's Shoulder-knot # and substitute them back in. # Read in the raw data for Blair's Shoulder Knot BSK <- readRDS("Data/Sindex_files/540.rds") # Rename year BSK <- BSK %>% rename(Year = YEAR) # Merge on site-info BSK_plus_sites <- merge(BSK, Site_info, by = "SITE", all.x = TRUE) # Get rid of Channel Isles sites first and cut out anything pre-1993 BSK_plus_sites_B <- BSK_plus_sites %>% dplyr::filter(Region != "Channel Islands") %>% dplyr::filter(Year >=1993) # Cut out any site-years with NM less than 0.5 BSK_plus_sites_C <- BSK_plus_sites_B %>% dplyr::filter(NM >= 0.5) # Make an actual_years variable to merge on Actual_years_ran <- BSK_plus_sites_C %>% dplyr::group_by(SITE) %>% dplyr::summarise(Actual_years = length(unique(Year))) # Merge it on BSK_plus_sites_D <- merge(BSK_plus_sites_C, Actual_years_ran, by = "SITE") # Cut out anything with Actual_years of less than 3 BSK_plus_sites_2 <- BSK_plus_sites_D %>% dplyr::filter(Actual_years >= 3) # Make Site_name a factor BSK_plus_sites_2$Site_name <- as.factor(BSK_plus_sites_2$Site_name) # Get a vector of habitats Hab_vec <- unique(BSK_plus_sites_2$Habitat_500m) # Run the model and impute BSK_all_sites <- ptrend(My_raw_sindex ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + s(Site_name, bs = "re"), family = nb, data = BSK_plus_sites_2) # Extract the percentage change and round it to 2 decimal places BSK_all_sites_perc_cng <- as.numeric(change(BSK_all_sites, start = 1968, end = 2016, alpha = 0.05)[1]) BSK_all_sites_perc_cng <- round(BSK_all_sites_perc_cng, digits = 2) # -34.34. Curious # Now impute this into main df Habitat_species_trends_2$Poptrend_perc_change[Habitat_species_trends_2$RIS_code == "540" & Habitat_species_trends_2$Habitat == "Overall"] <- BSK_all_sites_perc_cng # Add back in the new sample size Habitat_species_trends_2$Sample_size[Habitat_species_trends_2$RIS_code == "540" & Habitat_species_trends_2$Habitat == "Overall"] <- sum(BSK_plus_sites_2$Raw_total) # and do the same for the other habitats so it is consistent for(i_hab in 1:length(Hab_vec)){ # Only run the model if the trend in this habitat could previous be modeled. # Else it stays as NA # Make an obect called Rerun which is the percentage change for this species in this habitat. Only rerun # the model if rerun both exists and is not equal to NA. Rerun <- Habitat_species_trends_2$Poptrend_perc_change[Habitat_species_trends_2$RIS_code == "540" & Habitat_species_trends_2$Habitat == Hab_vec[i_hab]] if(length(Rerun) != 0){ if(!is.na(Rerun)){ BSK_mod <- ptrend(My_raw_sindex ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + s(Site_name, bs = "re"), family = nb, data = subset(BSK_plus_sites_2, Habitat_500m == Hab_vec[i_hab])) # Extract the percentage change and round it to 2 decimal places BSK_perc_cng <- as.numeric(change(BSK_mod, start = 1968, end = 2016, alpha = 0.05)[1]) BSK_perc_cng <- round(BSK_perc_cng, digits = 2) print(paste0("Perc change for ", Hab_vec[i_hab], " = ", BSK_perc_cng)) # Add back in the new sample size Habitat_species_trends_2$Sample_size[Habitat_species_trends_2$RIS_code == "540" & Habitat_species_trends_2$Habitat == Hab_vec[i_hab]] <- sum(subset(BSK_plus_sites_2, Habitat_500m == Hab_vec[i_hab])$Raw_total) # Now impute this into main df Habitat_species_trends_2$Poptrend_perc_change[Habitat_species_trends_2$RIS_code == "540" & Habitat_species_trends_2$Habitat == Hab_vec[i_hab]] <- BSK_perc_cng } } } # # # # Solving outliers for Region #### # What outliers are there? ggplot(Region_species_trends_2, aes(x = log(Poptrend_perc_change+100))) + geom_histogram(bins =200) + facet_wrap(~ N_S) + geom_vline(xintercept = log(100), col = "red") # Two outliers # 178 (Buff Footman) North # 940 (Satin Beauty) North # Rerun the model for these two # Buff_footman B_foot <- readRDS("Data/Sindex_files/178.rds") # Rename year B_foot <- B_foot %>% rename(Year = YEAR) # Merge on site-info B_foot_plus_sites <- merge(B_foot, Site_info, by = "SITE", all.x = TRUE) # Get rid of Channel Isles sites first and cut out anything pre-1993 B_foot_plus_sites_B <- B_foot_plus_sites %>% dplyr::filter(Region != "Channel Islands") %>% dplyr::filter(Year >=1993) # Cut out any site-years with NM less than 0.5 B_foot_plus_sites_C <- B_foot_plus_sites_B %>% dplyr::filter(NM >= 0.5) # Make an actual_years variable to merge on Actual_years_ran <- B_foot_plus_sites_C %>% dplyr::group_by(SITE) %>% dplyr::summarise(Actual_years = length(unique(Year))) # Merge it on B_foot_plus_sites_D <- merge(BSK_plus_sites_C, Actual_years_ran, by = "SITE") # Cut out anything with Actual_years of less than 3 B_foot_plus_sites_2 <- B_foot_plus_sites_D %>% dplyr::filter(Actual_years >= 3) # Make Site_name a factor B_foot_plus_sites_2$Site_name <- as.factor(B_foot_plus_sites_2$Site_name) # Get a vector of N_S Reg_vec <- as.vector(unique(B_foot_plus_sites_2$N_S)) for(i_reg in 1:length(Reg_vec)){ # Run the model B_foot_mod <- ptrend(My_raw_sindex ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + s(Site_name, bs = "re"), family = nb, data = subset(B_foot_plus_sites_2, N_S == Reg_vec[i_reg])) # Extract the percentage change and round it to 2 decimal places B_foot_perc_cng <- as.numeric(change(B_foot_mod, start = 1968, end = 2016, alpha = 0.05)[1]) B_foot_perc_cng <- round(B_foot_perc_cng, digits = 2) print(paste0("Perc change for ", Reg_vec[i_reg], " = ", B_foot_perc_cng)) # Now impute this into main df Region_species_trends_2$Poptrend_perc_change[Region_species_trends_2$RIS_code == "178" & Region_species_trends_2$N_S == Reg_vec[i_reg]] <- B_foot_perc_cng # Add back in the new sample size Region_species_trends_2$Sample_size[Region_species_trends_2$RIS_code == "178" & Region_species_trends_2$N_S == Reg_vec[i_reg]] <- sum(subset(B_foot_plus_sites_2, N_S == Reg_vec[i_reg])$Raw_total) } # Do the same for Satin Beauty (no. 940) S_beaut <- readRDS("Data/Sindex_files/940.rds") # Rename year S_beaut <- S_beaut %>% rename(Year = YEAR) # Merge on site-info S_beaut_plus_sites <- merge(S_beaut, Site_info, by = "SITE", all.x = TRUE) # Get rid of Channel Isles sites first and cut out anything pre-1993 S_beaut_plus_sites_B <- S_beaut_plus_sites %>% dplyr::filter(Region != "Channel Islands") %>% dplyr::filter(Year >=1993) # Cut out any site-years with NM less than 0.5 S_beaut_plus_sites_C <- S_beaut_plus_sites_B %>% dplyr::filter(NM >= 0.5) # Make an actual_years variable to merge on Actual_years_ran <- S_beaut_plus_sites_C %>% dplyr::group_by(SITE) %>% dplyr::summarise(Actual_years = length(unique(Year))) # Merge it on S_beaut_plus_sites_D <- merge(BSK_plus_sites_C, Actual_years_ran, by = "SITE") # Cut out anything with Actual_years of less than 3 S_beaut_plus_sites_2 <- S_beaut_plus_sites_D %>% dplyr::filter(Actual_years >= 3) # Make Site_name a factor S_beaut_plus_sites_2$Site_name <- as.factor(S_beaut_plus_sites_2$Site_name) # Get a vector of N_S Reg_vec <- as.vector(unique(S_beaut_plus_sites_2$N_S)) for(i_reg in 1:length(Reg_vec)){ # Run the model S_beaut_mod <- ptrend(My_raw_sindex ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + s(Site_name, bs = "re"), family = nb, data = subset(S_beaut_plus_sites_2, N_S == Reg_vec[i_reg])) # Extract the percentage change and round it to 2 decimal places S_beaut_perc_cng <- as.numeric(change(S_beaut_mod, start = 1968, end = 2016, alpha = 0.05)[1]) S_beaut_perc_cng <- round(S_beaut_perc_cng, digits = 2) print(paste0("Perc change for ", Reg_vec[i_reg], " = ", S_beaut_perc_cng)) # Now impute this into main df Region_species_trends_2$Poptrend_perc_change[Region_species_trends_2$RIS_code == "940" & Region_species_trends_2$N_S == Reg_vec[i_reg]] <- S_beaut_perc_cng # Add back in the new sample size Region_species_trends_2$Sample_size[Region_species_trends_2$RIS_code == "940" & Region_species_trends_2$N_S == Reg_vec[i_reg]] <- sum(subset(S_beaut_plus_sites_2, N_S == Reg_vec[i_reg])$Raw_total) } # # # # Solving outliers for Hab*region #### ggplot(Hab_region_species_trends_2, aes(x = log(Poptrend_perc_change+100))) + geom_histogram(bins =200) + facet_wrap(~ Hab_region) + geom_vline(xintercept = log(100), col = "red") # Another three outliers # 178 (Buff Footman) North_Deciduous_woodland # 455 (Slender Brindle) North_Deciduous_woodland # 940 (Satin Beauty) North_Deciduous_woodland # 178 (Buff Footman) B_foot <- readRDS("Data/Sindex_files/178.rds") # Rename year B_foot <- B_foot %>% rename(Year = YEAR) # Merge on site-info B_foot_plus_sites <- merge(B_foot, Site_info, by = "SITE", all.x = TRUE) # Get rid of Channel Isles sites first and cut out anything pre-1993 B_foot_plus_sites_B <- B_foot_plus_sites %>% dplyr::filter(Region != "Channel Islands") %>% dplyr::filter(Year >=1993) # Cut out any site-years with NM less than 0.5 B_foot_plus_sites_C <- B_foot_plus_sites_B %>% dplyr::filter(NM >= 0.5) # Make an actual_years variable to merge on Actual_years_ran <- B_foot_plus_sites_C %>% dplyr::group_by(SITE) %>% dplyr::summarise(Actual_years = length(unique(Year))) # Merge it on B_foot_plus_sites_D <- merge(BSK_plus_sites_C, Actual_years_ran, by = "SITE") # Cut out anything with Actual_years of less than 3 B_foot_plus_sites_2 <- B_foot_plus_sites_D %>% dplyr::filter(Actual_years >= 3) # Make Site_name a factor B_foot_plus_sites_2$Site_name <- as.factor(B_foot_plus_sites_2$Site_name) # Actualy make a Hab_region variable B_foot_plus_sites_2$Hab_region <- paste0(B_foot_plus_sites_2$N_S,"_",B_foot_plus_sites_2$Habitat_500m) # And cut out only the two I need B_foot_plus_sites_2 <- B_foot_plus_sites_2 %>% dplyr::filter(Hab_region == "North_Deciduous_woodland" | Hab_region == "South_Deciduous_woodland") # Get a vector of Hab_region Habreg_vec <- as.vector(unique(B_foot_plus_sites_2$Hab_region)) for(i_habreg in 1:length(Habreg_vec)){ # Run the model B_foot_mod <- ptrend(My_raw_sindex ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + s(Site_name, bs = "re"), family = nb, data = subset(B_foot_plus_sites_2, Hab_region == Habreg_vec[i_habreg])) # Extract the percentage change and round it to 2 decimal places B_foot_perc_cng <- as.numeric(change(B_foot_mod, start = 1968, end = 2016, alpha = 0.05)[1]) B_foot_perc_cng <- round(B_foot_perc_cng, digits = 2) print(paste0("Perc change for ", Habreg_vec[i_habreg], " = ", B_foot_perc_cng)) # Now impute this into main df Hab_region_species_trends_2$Poptrend_perc_change[Hab_region_species_trends_2$RIS_code == "178" & Hab_region_species_trends_2$Hab_region == Habreg_vec[i_habreg]] <- B_foot_perc_cng # Add back in the new sample size Hab_region_species_trends_2$Sample_size[Hab_region_species_trends_2$RIS_code == "178" & Hab_region_species_trends_2$Hab_region == Habreg_vec[i_habreg]] <- sum(subset(B_foot_plus_sites_2, Hab_region == Habreg_vec[i_habreg])$Raw_total) } # Buff footman could not be modeled in the north. Remove this one Hab_region_species_trends_2 <- Hab_region_species_trends_2 %>% dplyr::filter(!(RIS_code == "178" & Hab_region == "North_Deciduous_woodland")) # # # # Same for no. 455 (Slender Brindle) in North_Deciduous_woodland S_brin <- readRDS("Data/Sindex_files/455.rds") # Rename year S_brin <- S_brin %>% rename(Year = YEAR) # Merge on site-info S_brin_plus_sites <- merge(S_brin, Site_info, by = "SITE", all.x = TRUE) # Get rid of Channel Isles sites first and cut out anything pre-1993 S_brin_plus_sites_B <- S_brin_plus_sites %>% dplyr::filter(Region != "Channel Islands") %>% dplyr::filter(Year >=1993) # Cut out any site-years with NM less than 0.5 S_brin_plus_sites_C <- S_brin_plus_sites_B %>% dplyr::filter(NM >= 0.5) # Make an actual_years variable to merge on Actual_years_ran <- S_brin_plus_sites_C %>% dplyr::group_by(SITE) %>% dplyr::summarise(Actual_years = length(unique(Year))) # Merge it on S_brin_plus_sites_D <- merge(BSK_plus_sites_C, Actual_years_ran, by = "SITE") # Cut out anything with Actual_years of less than 3 S_brin_plus_sites_2 <- S_brin_plus_sites_D %>% dplyr::filter(Actual_years >= 3) # Make Site_name a factor S_brin_plus_sites_2$Site_name <- as.factor(S_brin_plus_sites_2$Site_name) # Actualy make a Hab_region variable S_brin_plus_sites_2$Hab_region <- paste0(S_brin_plus_sites_2$N_S,"_",S_brin_plus_sites_2$Habitat_500m) # And cut out only the two I need S_brin_plus_sites_2 <- S_brin_plus_sites_2 %>% dplyr::filter(Hab_region == "North_Deciduous_woodland" | Hab_region == "South_Deciduous_woodland") # Get a vector of Hab_region Habreg_vec <- as.vector(unique(S_brin_plus_sites_2$Hab_region)) for(i_habreg in 1:length(Habreg_vec)){ # Run the model S_brin_mod <- ptrend(My_raw_sindex ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + s(Site_name, bs = "re"), family = nb, data = subset(S_brin_plus_sites_2, Hab_region == Habreg_vec[i_habreg])) # Extract the percentage change and round it to 2 decimal places S_brin_perc_cng <- as.numeric(change(S_brin_mod, start = 1968, end = 2016, alpha = 0.05)[1]) S_brin_perc_cng <- round(S_brin_perc_cng, digits = 2) print(paste0("Perc change for ", Habreg_vec[i_habreg], " = ", S_brin_perc_cng)) # Now impute this into main df Hab_region_species_trends_2$Poptrend_perc_change[Hab_region_species_trends_2$RIS_code == "455" & Hab_region_species_trends_2$Hab_region == Habreg_vec[i_habreg]] <- S_brin_perc_cng # Add back in the new sample size Hab_region_species_trends_2$Sample_size[Hab_region_species_trends_2$RIS_code == "455" & Hab_region_species_trends_2$Hab_region == Habreg_vec[i_habreg]] <- sum(subset(S_brin_plus_sites_2, Hab_region == Habreg_vec[i_habreg])$Raw_total) } # Slender Brindle could not be modeled in the north. Remove this one Hab_region_species_trends_2 <- Hab_region_species_trends_2 %>% dplyr::filter(!(RIS_code == "455" & Hab_region == "North_Deciduous_woodland")) # # # Same for no. 940 (Satin Beauty) in North_Deciduous_woodland S_beaut <- readRDS("Data/Sindex_files/940.rds") # Rename year S_beaut <- S_beaut %>% rename(Year = YEAR) # Merge on site-info S_beaut_plus_sites <- merge(S_beaut, Site_info, by = "SITE", all.x = TRUE) # Get rid of Channel Isles sites first and cut out anything pre-1993 S_beaut_plus_sites_B <- S_beaut_plus_sites %>% dplyr::filter(Region != "Channel Islands") %>% dplyr::filter(Year >=1993) # Cut out any site-years with NM less than 0.5 S_beaut_plus_sites_C <- S_beaut_plus_sites_B %>% dplyr::filter(NM >= 0.5) # Make an actual_years variable to merge on Actual_years_ran <- S_beaut_plus_sites_C %>% dplyr::group_by(SITE) %>% dplyr::summarise(Actual_years = length(unique(Year))) # Merge it on S_beaut_plus_sites_D <- merge(BSK_plus_sites_C, Actual_years_ran, by = "SITE") # Cut out anything with Actual_years of less than 3 S_beaut_plus_sites_2 <- S_beaut_plus_sites_D %>% dplyr::filter(Actual_years >= 3) # Make Site_name a factor S_beaut_plus_sites_2$Site_name <- as.factor(S_beaut_plus_sites_2$Site_name) # Actualy make a Hab_region variable S_beaut_plus_sites_2$Hab_region <- paste0(S_beaut_plus_sites_2$N_S,"_",S_beaut_plus_sites_2$Habitat_500m) # And cut out only the two I need S_beaut_plus_sites_2 <- S_beaut_plus_sites_2 %>% dplyr::filter(Hab_region == "North_Deciduous_woodland" | Hab_region == "South_Deciduous_woodland") # Get a vector of Hab_region Habreg_vec <- as.vector(unique(S_beaut_plus_sites_2$Hab_region)) for(i_habreg in 1:length(Habreg_vec)){ # Run the model S_beaut_mod <- ptrend(My_raw_sindex ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + s(Site_name, bs = "re"), family = nb, data = subset(S_beaut_plus_sites_2, Hab_region == Habreg_vec[i_habreg])) # Extract the percentage change and round it to 2 decimal places S_beaut_perc_cng <- as.numeric(change(S_beaut_mod, start = 1968, end = 2016, alpha = 0.05)[1]) S_beaut_perc_cng <- round(S_beaut_perc_cng, digits = 2) print(paste0("Perc change for ", Habreg_vec[i_habreg], " = ", S_beaut_perc_cng)) # Now impute this into main df Hab_region_species_trends_2$Poptrend_perc_change[Hab_region_species_trends_2$RIS_code == "940" & Hab_region_species_trends_2$Hab_region == Habreg_vec[i_habreg]] <- S_beaut_perc_cng # Add back in the new sample size Hab_region_species_trends_2$Sample_size[Hab_region_species_trends_2$RIS_code == "940" & Hab_region_species_trends_2$Hab_region == Habreg_vec[i_habreg]] <- sum(subset(S_beaut_plus_sites_2, Hab_region == Habreg_vec[i_habreg])$Raw_total) } # Satin Beauty could not be modeled in the north either! Remove this one Hab_region_species_trends_2 <- Hab_region_species_trends_2 %>% dplyr::filter(!(RIS_code == "940" & Hab_region == "North_Deciduous_woodland")) # # # # Just check the histograms again to make sure there are no leftover outliers # Habitat ggplot(Habitat_species_trends_2, aes(x = log(Poptrend_perc_change+100))) + geom_histogram(bins =100) + facet_wrap(~ Habitat) + geom_vline(xintercept = log(100), col = "red") # Region ggplot(Region_species_trends_2, aes(x = log(Poptrend_perc_change+100))) + geom_histogram(bins =100) + facet_wrap(~ N_S) + geom_vline(xintercept = log(100), col = "red") # Hab_region ggplot(Hab_region_species_trends_2, aes(x = log(Poptrend_perc_change+100))) + geom_histogram(bins =100) + facet_wrap(~ Hab_region) + geom_vline(xintercept = log(100), col = "red") # Make a log(Sample_size) var that can be used as weights in the models Habitat_species_trends_2$Log_sample_size = log(Habitat_species_trends_2$Sample_size) Region_species_trends_2$Log_sample_size = log(Region_species_trends_2$Sample_size) Hab_region_species_trends_2$Log_sample_size = log(Hab_region_species_trends_2$Sample_size) # And make a transformed trend variable that will be the response Habitat_species_trends_2$Trend_trans <- log(Habitat_species_trends_2$Poptrend_perc_change + 100) Region_species_trends_2$Trend_trans <- log(Region_species_trends_2$Poptrend_perc_change + 100) Hab_region_species_trends_2$Trend_trans <- log(Hab_region_species_trends_2$Poptrend_perc_change + 100) # Make a new df for renaming and reordering Habitat_species_trends_3 <- Habitat_species_trends_2 Region_species_trends_3 <- Region_species_trends_2 Hab_region_species_trends_3 <- Hab_region_species_trends_2 # Rename habitats so I don't need to faff around in ggplot Habitat_species_trends_3$Habitat[Habitat_species_trends_3$Habitat == "Lowland_semi_natural"] <- "'Other semi-natural'" Habitat_species_trends_3$Habitat[Habitat_species_trends_3$Habitat == "Improved_grassland"] <- "Improved grassland" Habitat_species_trends_3$Habitat[Habitat_species_trends_3$Habitat == "Conifer_plantation"] <- "Conifer plantation" Habitat_species_trends_3$Habitat[Habitat_species_trends_3$Habitat == "Deciduous_woodland"] <- "Broadleaf woodland" # Rename Hab_regions so no faffing in ggplot Hab_region_species_trends_3$Hab_region[Hab_region_species_trends_3$Hab_region == "North_Deciduous_woodland"] <- "Broadleaf woodland (North)" Hab_region_species_trends_3$Hab_region[Hab_region_species_trends_3$Hab_region == "South_Deciduous_woodland"] <- "Broadleaf woodland (South)" # Get the N_S and Habitat variables back Hab_region_species_trends_3$Habitat[Hab_region_species_trends_3$Hab_region == "Broadleaf woodland (North)"] <- "Broadleaf woodland" Hab_region_species_trends_3$Habitat[Hab_region_species_trends_3$Hab_region == "Broadleaf woodland (South)"] <- "Broadleaf woodland" Hab_region_species_trends_3$N_S[Hab_region_species_trends_3$Hab_region == "Broadleaf woodland (North)"] <- "North" Hab_region_species_trends_3$N_S[Hab_region_species_trends_3$Hab_region == "Broadleaf woodland (South)"] <- "South" # Write these dfs to a csv # Note you will need to change the address and un-hash the code ##write.csv(Habitat_species_trends_3, "Data/Hab_trends_ready.csv", row.names = FALSE) ##write.csv(Region_species_trends_3, "Data/Region_trends_ready.csv", row.names = FALSE) ##write.csv(Hab_region_species_trends_3, "Data/Hab_region_trends_ready.csv", row.names = FALSE) # ~~~Re-reading in data #### # Only necessary to reread in data if you re-generated the data anew using Script 4 Habitat_species_trends_3 <- read.csv("Data/Hab_trends_ready.csv") Region_species_trends_3 <- read.csv("Data/Region_trends_ready.csv") Hab_region_species_trends_3 <- read.csv("Data/Hab_region_trends_ready.csv") # How many moths used overall? sum(Habitat_species_trends_3$Sample_size[Habitat_species_trends_3$Habitat == "Overall"]) # 10,963,959 moths # How many species? length(unique(Habitat_species_trends_3$Common_name)) # 427 # Reorder habitat levels so they come out in right order levels(as.factor(Habitat_species_trends_3$Habitat)) Habitat_species_trends_3$Habitat <- factor(Habitat_species_trends_3$Habitat, levels = c("Overall", "Urban", "Upland", "'Other semi-natural'", "Improved grassland", "Conifer plantation", "Broadleaf woodland", "Arable")) # Reorder region levels(as.factor(Region_species_trends_3$N_S)) Region_species_trends_3$N_S <- factor(Region_species_trends_3$N_S, levels = c("South", "North")) # Relevel Hab_region levels(as.factor(Hab_region_species_trends_3$Hab_region)) Hab_region_species_trends_3$Hab_region <- factor(Hab_region_species_trends_3$Hab_region, levels = c("Broadleaf woodland (South)", "Broadleaf woodland (North)")) # # # # ~~~ #### # i. Species-specific traits - overall - to compare to all others #### # Run a linear model with the transformed percentage change with an intercept only Species_spec_only <- lm(Trend_trans ~ 1, weights = Log_sample_size, data = subset(Habitat_species_trends_3, Habitat == "Overall")) # A simple t-test testing whether the mean trend differs from zero temp <- Habitat_species_trends_3 %>% dplyr::filter(Habitat == "Overall") t.test(temp$Trend_trans) #One Sample t-test ## data: temp$Trend_trans #t = 58.604, df = 426, p-value < 2.2e-16 #alternative hypothesis: true mean is not equal to 0 # 95 percent confidence interval: # 3.939682 4.213121 # Run the emmeans "comparisons" Species_spec_only_emmeans <- emmeans(Species_spec_only, pairwise ~ NULL) # Extract the dfs of contratsts and emmeans # Emmeans Species_spec_only_emmeans_df <- data.frame(Species_spec_only_emmeans$emmeans) # Get estimated percentage change for each group Species_spec_only_emmeans_df %>% dplyr::select(emmean) %>% dplyr::mutate(Perc_change = (exp(emmean)-100)) # the average moth has declined by 40% # emmean Perc_change # 1 4.089591 -40.28454 # How many species altogether? temp <- Habitat_species_trends_3 %>% dplyr::filter(Habitat == "Overall") length(unique(temp$Common_name)) # 427 # # # # Some summary stats #### # How many species? length(unique(Habitat_species_trends_3$Common_name)) # 427 # Check all the species look sensible Species_list <- Habitat_species_trends_3 %>% dplyr::select(RIS_code, Common_name, Binomial, Feeding_guild) %>% distinct() # It's ok, everything dodgy like 'unidentified macros' etc has been removed before this stage # ~~~ #### # 1. Species-specific trends - no traits #### # # ~~~a) Habitat #### # Run model Species_spec_1 <- lmer(Trend_trans ~ Habitat + (1|Common_name), weights = Log_sample_size, data = subset(Habitat_species_trends_3, Habitat != "Overall")) # check residuals plot(Species_spec_1) # Pretty good # Is the effect of habitat significant? drop1(Species_spec_1, test = "Chisq") # npar AIC LRT Pr(Chi) # 6916.8 # Habitat 6 7027.2 122.37 < 2.2e-16 *** # Yes, the effect of habitat is significant. Get the delta-AIC 7027.2 - 6916.8 # delta-AIC = 110.4 # Run the emmeans comparisons Species_spec_emmeans <- emmeans(Species_spec_1, pairwise ~ Habitat) # Extract the dfs of contratsts and emmeans # Contrasts if needed Species_spec_contrasts_df <- data.frame(Species_spec_emmeans$contrasts) # Emmeans Species_spec_emmeans_df <- data.frame(Species_spec_emmeans$emmeans) # Get estimated percentage change for each group Species_spec_emmeans_df %>% dplyr::select(Habitat, emmean) %>% dplyr::mutate(Perc_change = (exp(emmean)-100)) %>% dplyr::arrange(desc(Habitat)) # Habitat emmean Perc_change #1 Arable 4.635456 3.074915 #2 Broadleaf woodland 3.737133 -58.022543 #3 Conifer plantation 4.141887 -37.078566 #4 Improved grassland 4.070710 -41.401463 #5 'Other semi-natural' 3.875914 -51.773230 #6 Upland 4.540672 -6.246184 #7 Urban 3.646115 -61.674526 # Get number of species modelled in each group Habitat_species_trends_3 %>% group_by(Habitat) %>% summarise(Total_species = length(unique(Common_name))) %>% dplyr::arrange(desc(Habitat)) #Habitat Total_species # #1 Arable 297 #2 Broadleaf woodland 365 #3 Conifer plantation 211 #4 Improved grassland 380 #5 'Other semi-natural' 202 #6 Upland 109 #7 Urban 285 #8 Overall 427 # How many species altogether? temp <- Habitat_species_trends_3 %>% dplyr::filter(Habitat != "Overall") length(unique(temp$Common_name)) # 405 # Relevel for plotting Species_spec_emmeans_df$Habitat <- factor(Species_spec_emmeans_df$Habitat, levels = c("Urban", "Upland", "'Other semi-natural'", "Improved grassland", "Conifer plantation", "Broadleaf woodland", "Arable")) # Plot (Fig. S8b) ggplot(data = Species_spec_emmeans_df, aes(x = Habitat, y = emmean)) + geom_point() + #size = 2 geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = log(100), linetype = 2) + #, colour = "red" coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Percentage change 1968 - 2016") + ggtitle("Single-species trends") + xlab("Habitat type") + scale_y_continuous(breaks = c(log(-99+100), log(-75+100), log(-50+100), log(-25+100), log(33+100), log(100+100), log(300+100), log(9000+100)), labels = c(" -99%", "- 75%", " -50%", " -25%", " 33%", " 100%", " 300%", " 9000%"), limits = c(3.2, 6)) + theme(axis.text.x = element_text(angle = 35, hjust = 1)) # Saved as 5 x 6 inch pdf # # # # ~~~b) Region #### # Run model Species_region_1 <- lmer(Trend_trans ~ N_S + (1|Common_name), weights = Log_sample_size, data = Region_species_trends_3) # Check residuals plot(Species_region_1) # A positive trend in the residuals. I've tried everything and this is the best I # can do. This seems to be because the distribution of the data is almost a normal distribution but the # peak is too steep. Interpret p-values with caution, especially when they are near to the 0.05 cut-off # for significance. # Is the effect of region significant? drop1(Species_region_1, test = "Chisq") # npar AIC LRT Pr(Chi) # 2657.9 # N_S 1 2688.4 32.502 1.191e-08 *** # 2688.4-2657.9 # delta-AIC = 30.5 # Run the emmeans comparisons Species_spec_reg_emmeans <- emmeans(Species_region_1, pairwise ~ N_S) # Extract the dfs of contratsts and emmeans # Contrasts if needed Species_spec_reg_contrasts_df <- data.frame(Species_spec_reg_emmeans$contrasts) # Emmeans Species_spec_reg_emmeans_df <- data.frame(Species_spec_reg_emmeans$emmeans) # Get estimated percentage change for each group Species_spec_reg_emmeans_df %>% dplyr::select(N_S, emmean) %>% dplyr::mutate(Perc_change = (exp(emmean)-100)) # N_S emmean Perc_change #1 South 3.914436 -49.87919 #2 North 4.448264 -14.52159 # Get number of species modelled in each group Region_species_trends_3 %>% group_by(N_S) %>% summarise(Total_species = length(unique(Common_name))) # N_S Total_species #1 South 417 #2 North 309 # How many species altogether? length(unique(Region_species_trends_3$Common_name)) # 426 # Relevel for plotting Species_spec_reg_emmeans_df$N_S <- factor(Species_spec_reg_emmeans_df$N_S, levels = c("South", "North")) # Plot (Fig. S8a) ggplot(data = Species_spec_reg_emmeans_df, aes(x = N_S, y = emmean)) + geom_point() + #size = 2 geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = log(100), linetype = 2) + #, colour = "red" coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Percentage change 1968 - 2016") + ggtitle("Single-species trends") + xlab("Region") + scale_y_continuous(breaks = c(log(-99+100), log(-75+100), log(-50+100), log(-25+100), log(33+100), log(100+100), log(300+100), log(9000+100)), labels = c(" -99%", "- 75%", " -50%", " -25%", " 33%", " 100%", " 300%", " 9000%"), limits = c(3.7, 5.3)) + theme(axis.text.x = element_text(angle = 35, hjust = 1)) # Saved as 5 x 5 inch pdf # # # # ~~~c) Hab*Region #### # Run the model Species_hab_region_1 <- lmer(Trend_trans ~ N_S + (1|Common_name), weights = Log_sample_size, data = Hab_region_species_trends_3) # Check residuals plot(Species_hab_region_1) # Not great. See line 944. I don't know why some of the residuals are # correlated so neatly with the fitted values # Is the effect of region significant for woodlands? drop1(Species_hab_region_1, test = "Chisq") #npar AIC LRT Pr(Chi) # 2201.6 # N_S 1 2237.7 38.091 6.752e-10 *** # There is a significant difference in mean abundance trends between north and south in broadleaf woodland ## 2237.7 - 2201.6 # delta-AIC = 36.1 # Run the emmeans comparisons Species_spec_hab_reg_emmeans <- emmeans(Species_hab_region_1, pairwise ~ N_S) # Extract the dfs of contratsts and emmeans # Contrasts if needed Species_spec_hab_reg_contrasts_df <- data.frame(Species_spec_hab_reg_emmeans$contrasts) # Emmeans Species_spec_hab_reg_emmeans_df <- data.frame(Species_spec_hab_reg_emmeans$emmeans) # Get estimated percentage change for each group Species_spec_hab_reg_emmeans_df %>% dplyr::select(N_S, emmean) %>% dplyr::mutate(Perc_change = (exp(emmean)-100)) # N_S emmean Perc_change # 1 North 4.295322 -26.64416 # 2 South 3.445955 -68.62676 # Get number of species modelled in each group Hab_region_species_trends_3 %>% group_by(N_S) %>% summarise(Total_species = length(unique(Common_name))) # N_S Total_species #1 North 227 #2 South 335 # How many species altogether? length(unique(Region_species_trends_3$Common_name)) # 426 # Reorder North and South levels(as.factor(Species_spec_hab_reg_emmeans_df$N_S)) Species_spec_hab_reg_emmeans_df$N_S <- factor(Species_spec_hab_reg_emmeans_df$N_S, levels = c("South", "North")) # Plot (Fig. S8c) ggplot(data = Species_spec_hab_reg_emmeans_df, aes(x = N_S, y = emmean)) + geom_point() + #size = 2 geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = log(100), linetype = 2) + #, colour = "red" coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("lnPercentage change 1968 - 2016") + ggtitle("Single-species trends in Broadleaf woodland") + xlab("Region (broadleaf woodland only)") + scale_y_continuous(breaks = c(log(-70+100), log(-50+100), log(-25+100), log(33+100), log(100+100)), labels = c("-70%", " -50%", " -25%", " 33%", " 100%"), limits = c(3.2, 5.3)) + theme(axis.text.x = element_text(angle = 35, hjust = 1)) # Saved as 5 x 5 inch pdf # ~~~ #### # 2. Feeding_guild #### # Data prep # Get rid of the unused feeding guild categories (these have too few species in them) Habitat_species_trends_x_guild <- Habitat_species_trends_3 %>% dplyr::filter(Feeding_guild != "Other" & Feeding_guild != "Plants_low" & Feeding_guild != "Unknown" & Feeding_guild != "Woody" ) levels(as.factor(Habitat_species_trends_x_guild$Feeding_guild)) # Rename the feeding guilds to their final names Habitat_species_trends_x_guild$Feeding_guild[Habitat_species_trends_x_guild$Feeding_guild == "Coniferous"] <- "Conifers" Habitat_species_trends_x_guild$Feeding_guild[Habitat_species_trends_x_guild$Feeding_guild == "Deciduous"] <- "Broadleaf polyphagous" Habitat_species_trends_x_guild$Feeding_guild[Habitat_species_trends_x_guild$Feeding_guild == "Shrubs_decid"] <- "Broadleaf shrubs" Habitat_species_trends_x_guild$Feeding_guild[Habitat_species_trends_x_guild$Feeding_guild == "Trees_decid"] <- "Broadleaf trees" Habitat_species_trends_x_guild$Feeding_guild[Habitat_species_trends_x_guild$Feeding_guild == "Highly_polyphagous"] <- "Highly polyphagous" # Reorder them in the order I'd like them to appear in the plots Habitat_species_trends_x_guild$Feeding_guild <- factor(Habitat_species_trends_x_guild$Feeding_guild, levels = c("Forbs","Grasses","Broadleaf shrubs", "Broadleaf trees","Broadleaf polyphagous", "Conifers","Lichen","Highly polyphagous")) # ~~~a) Feeding_guild #### # Run the model Feeding_guild_mod <- lm(Trend_trans ~ Feeding_guild, weights = Log_sample_size, data = subset(Habitat_species_trends_x_guild, Habitat == "Overall")) # Check residuals plot(Feeding_guild_mod) # Pretty good # Is the effect of feeding guild significant? drop1(Feeding_guild_mod, test = "Chisq") # Df Sum of Sq RSS AIC Pr(>Chi) # 6840.0 1171.7 # Feeding_guild 7 592.53 7432.5 1191.9 1.618e-05 *** # 1191.9-1171.7 # delta-AIC = 20.2 # Run the emmeans comparisons Feeding_guild_emmeans <- emmeans(Feeding_guild_mod, pairwise ~ Feeding_guild) # Extract the dfs of contratsts and emmeans # Contrasts if needed Feeding_guild_contrasts_df <- data.frame(Feeding_guild_emmeans$contrasts) # Emmeans Feeding_guild_emmeans_df <- data.frame(Feeding_guild_emmeans$emmeans) # Get estimated percentage change for each group Feeding_guild_emmeans_df %>% dplyr::select(Feeding_guild, emmean) %>% dplyr::mutate(Perc_change = (exp(emmean)-100)) # Feeding_guild emmean Perc_change #1 Forbs 3.995358 -45.65470 #2 Grasses 4.136792 -37.39834 #3 Broadleaf shrubs 3.689909 -59.95881 #4 Broadleaf trees 4.156344 -36.16231 #5 Broadleaf polyphagous 4.058506 -42.11224 #6 Conifers 4.275910 -28.05441 #7 Lichen 6.055170 326.31121 #8 Highly polyphagous 3.885407 -51.31327 # Get number of species modelled in each group Habitat_species_trends_x_guild %>% group_by(Feeding_guild) %>% summarise(Total_species = length(unique(Common_name))) # Feeding_guild Total_species #1 Forbs 105 #2 Grasses 42 #3 Broadleaf shrubs 65 #4 Broadleaf trees 61 #5 Broadleaf polyphagous 58 #6 Conifers 15 #7 Lichen 13 #8 Highly polyphagous 52 # How many species altogether? length(unique(Habitat_species_trends_x_guild$Common_name)) # 411 # Flip order of Feeding guild levels(as.factor(Feeding_guild_emmeans_df$Feeding_guild)) Feeding_guild_emmeans_df$Feeding_guild <- factor(Feeding_guild_emmeans_df$Feeding_guild, levels = c("Highly polyphagous", "Lichen", "Conifers", "Broadleaf polyphagous", "Broadleaf trees", "Broadleaf shrubs", "Grasses", "Forbs")) # Plot (Fig. S9a) ggplot(data = Feeding_guild_emmeans_df, aes(x = Feeding_guild, y = emmean)) + geom_point() + #size = 2 geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = log(100), linetype = 2) + #, colour = "red" coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Percentage change 1968 - 2016") + xlab("Feeding guild") + scale_y_continuous(breaks = c(log(-75+100), log(-50+100), log(-25+100), log(25+100), log(100+100), log(300+100), log(900+100)), labels = c("-75%", "-50%", "-25%", "25%", "100%", "300%", "900%"), limits = c(3, 7)) + theme(axis.text.x = element_text(angle = 35, hjust = 1)) # Saved as 5 x 5 inch pdf # # # # ~~~b) Feeding_guild*Habitat #### # Run the model Hab_guild_mod <- lmer(Trend_trans ~ Habitat*Feeding_guild + (1|Common_name), weights = Log_sample_size, data = subset(Habitat_species_trends_x_guild, Habitat != "Overall")) # Residual checks. plot(Hab_guild_mod) # Good enough # Is there a significant interaction between feeding guild and habitat? drop1(Hab_guild_mod, test = "Chisq") # npar AIC LRT Pr(Chi) # 6581.6 #Habitat:Feeding_guild 42 6587.9 90.308 2.187e-05 *** # 6587.9-6581.6 # delta-AIC = 6.3 # Run the emmeans comparisons Guild_emmeans <- emmeans(Hab_guild_mod, pairwise ~ Habitat|Feeding_guild) # Note that running this # as Feeding_guild|Habitat gives exactly the same results in a different order # Try a contrasts test for everything, subset by Habitat joint_tests(Hab_guild_mod, by = "Habitat") # There's no effect of feeding guild in Improved grassland # Extract the dfs of contratsts and emmeans # Contrasts Guild_contrasts_df <- data.frame(Guild_emmeans$contrasts) # Emmeans Guild_emmeans_df <- data.frame(Guild_emmeans$emmeans) # Get estimated percentage change for each group Guild_emmeans_df %>% dplyr::select(Habitat, Feeding_guild, emmean) %>% dplyr::mutate(Perc_change = (exp(emmean)-100)) # First four rows: # Habitat Feeding_guild emmean Perc_change #1 Urban Forbs 3.502534 -66.800514 #2 Upland Forbs 4.477639 -11.973421 #3 'Other semi-natural' Forbs 3.317124 -72.419095 #4 Improved grassland Forbs 3.975250 -46.736557 # Flip the order of the Habitats so Arable comes first Guild_emmeans_df$Habitat <- factor(Guild_emmeans_df$Habitat, levels = c("Arable","Broadleaf woodland", "Conifer plantation", "Improved grassland", "'Other semi-natural'", "Upland", "Urban")) # Get the feeding guilds in the order in which I want them to appear Guild_emmeans_df$Feeding_guild <- factor(Guild_emmeans_df$Feeding_guild, levels = c("Highly polyphagous", "Lichen", "Conifers", "Broadleaf polyphagous", "Broadleaf trees", "Broadleaf shrubs", "Grasses", "Forbs")) # Run a model with no interaction effect to plot the results in the background of the main plot Hab_guild_mod_2 <- lmer(Trend_trans ~ Habitat + Feeding_guild + (1|Common_name), weights = Log_sample_size, data = subset(Habitat_species_trends_x_guild, Habitat != "Overall")) # Run the emmeans comparisons Guild_emmeans_2 <- emmeans(Hab_guild_mod_2, pairwise ~ Habitat|Feeding_guild) # Emmeans Guild_emmeans_df_2 <- data.frame(Guild_emmeans_2$emmeans) # Flip the order of the variables to match Guild_emmeans_df_2$Habitat <- factor(Guild_emmeans_df_2$Habitat, levels = c("Arable","Broadleaf woodland", "Conifer plantation", "Improved grassland", "'Other semi-natural'", "Upland", "Urban")) Guild_emmeans_df_2$Feeding_guild <- factor(Guild_emmeans_df_2$Feeding_guild, levels = c("Highly polyphagous", "Lichen", "Conifers", "Broadleaf polyphagous", "Broadleaf trees", "Broadleaf shrubs", "Grasses", "Forbs")) # Plot (Fig. 4) ggplot(data = Guild_emmeans_df_2, aes(x = Feeding_guild, y = emmean)) + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0, color = "grey80", size = 3) + geom_point(data = Guild_emmeans_df, aes(x = Feeding_guild, y = emmean)) + geom_errorbar(data = Guild_emmeans_df, aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + geom_hline(yintercept = log(100), linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + xlab("Feeding guild") + ylab("Percentage change 1968 - 2016") + facet_wrap(~Habitat, nrow = 3) + scale_y_continuous(breaks = c(log(-90+100), log(-50+100), log(100+100), log(900+100), log(9900+100)), labels = c("- 90%", " -50%", " 100%", " 900%", " 9900%"), limits = c(1.5, 12)) + theme(axis.text.x = element_text(angle = 35, hjust = 1)) # Saved as 8 x 8 inch pdf # How many species in each subset? Species_numbers <- Habitat_species_trends_x_guild %>% group_by(Feeding_guild, Habitat) %>% summarise(Total_species = length(unique(Common_name))) # # # # ~~~c) Feeding_guild*Region #### # Data prep # Get rid of the unused feeding guild categories (these have too few species in them) Region_species_trends_x_guild <- Region_species_trends_3 %>% dplyr::filter(Feeding_guild != "Other" & Feeding_guild != "Plants_low" & Feeding_guild != "Unknown" & Feeding_guild != "Woody" ) levels(as.factor(Region_species_trends_x_guild$Feeding_guild)) # Rename them to their final names Region_species_trends_x_guild$Feeding_guild[Region_species_trends_x_guild$Feeding_guild == "Coniferous"] <- "Conifers" Region_species_trends_x_guild$Feeding_guild[Region_species_trends_x_guild$Feeding_guild == "Deciduous"] <- "Broadleaf polyphagous" Region_species_trends_x_guild$Feeding_guild[Region_species_trends_x_guild$Feeding_guild == "Shrubs_decid"] <- "Broadleaf shrubs" Region_species_trends_x_guild$Feeding_guild[Region_species_trends_x_guild$Feeding_guild == "Trees_decid"] <- "Broadleaf trees" Region_species_trends_x_guild$Feeding_guild[Region_species_trends_x_guild$Feeding_guild == "Highly_polyphagous"] <- "Highly polyphagous" # Reorder them in the order I'd like them to appear Region_species_trends_x_guild$Feeding_guild <- factor(Region_species_trends_x_guild$Feeding_guild, levels = c("Forbs","Grasses","Broadleaf shrubs", "Broadleaf trees","Broadleaf polyphagous", "Conifers","Lichen","Highly polyphagous")) # Check levels(as.factor(Region_species_trends_x_guild$Feeding_guild)) # Run the model Region_guild_mod <- lmer(Trend_trans ~ N_S*Feeding_guild + (1|Common_name), weights = Log_sample_size, data = subset(Region_species_trends_x_guild)) # Residual checks. plot(Region_guild_mod) # Positive trend. See line 951 # Is there a significant interaction between feeding guild and region? drop1(Region_guild_mod, test = "Chisq") # npar AIC LRT Pr(Chi) # 2544.6 #N_S:Feeding_guild 7 2550.2 19.607 0.006484 ** # 2550.2-2544.6 # delta-AIC = 5.6 # Run the emmeans comparisons Region_guild_emmeans <- emmeans(Region_guild_mod, pairwise ~ N_S|Feeding_guild) # Try a contrasts test for everything, subset by N_S joint_tests(Region_guild_mod, by = "N_S") # Extract the dfs of contratsts and emmeans # Contrasts Region_guild_contrasts_df <- data.frame(Region_guild_emmeans$contrasts) # Emmeans Region_guild_emmeans_df <- data.frame(Region_guild_emmeans$emmeans) # Get estimated percentage change for each group Region_guild_emmeans_df %>% dplyr::select(N_S, Feeding_guild, emmean) %>% dplyr::mutate(Perc_change = (exp(emmean)-100)) # First four rows: # N_S Feeding_guild emmean Perc_change #1 South Forbs 3.838718 -53.53411 #2 North Forbs 4.227462 -31.45698 #3 South Grasses 4.234938 -30.94257 #4 North Grasses 4.055832 -42.26680 # Change order of feeding guild for plotting Region_guild_emmeans_df$Feeding_guild <- factor(Region_guild_emmeans_df$Feeding_guild, levels = c("Highly polyphagous", "Lichen", "Conifers", "Broadleaf polyphagous", "Broadleaf trees", "Broadleaf shrubs", "Grasses", "Forbs")) # change order of Region Region_guild_emmeans_df$N_S <- factor(Region_guild_emmeans_df$N_S, levels = c("North", "South")) # Run a model with no interaction effect to plot the results in the background of the main plot Region_guild_mod_2 <- lmer(Trend_trans ~ N_S + Feeding_guild + (1|Common_name), weights = Log_sample_size, data = subset(Region_species_trends_x_guild)) # Run the emmeans comparisons Region_guild_emmeans_2 <- emmeans(Region_guild_mod_2, pairwise ~ N_S|Feeding_guild) # Emmeans Region_guild_emmeans_df_2 <- data.frame(Region_guild_emmeans_2$emmeans) # Region_guild_emmeans_df_2$Feeding_guild <- factor(Region_guild_emmeans_df_2$Feeding_guild, levels = c("Highly polyphagous", "Lichen", "Conifers", "Broadleaf polyphagous", "Broadleaf trees", "Broadleaf shrubs", "Grasses", "Forbs")) # change order of Region Region_guild_emmeans_df_2$N_S <- factor(Region_guild_emmeans_df_2$N_S, levels = c("North", "South")) # Plot (Fig. S10a) ggplot(data = Region_guild_emmeans_df_2, aes(x = Feeding_guild, y = emmean)) + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0, color = "grey80", size = 3) + geom_point(data = Region_guild_emmeans_df, aes(x = Feeding_guild, y = emmean)) + geom_errorbar(data = Region_guild_emmeans_df, aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + geom_hline(yintercept = log(100), linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + xlab("") + ylab("Percentage change 1968 - 2016") + facet_wrap(~N_S, nrow = 1) + scale_y_continuous(breaks = c(log(-85+100), log(-50+100), log(100+100), log(566+100)), labels = c("- 85%", " -50%", " 100%", " 566%"), limits = c(2.7, 7.5)) + theme(axis.text.x = element_text(angle = 35, hjust = 1)) # Saved as 3 x 6 inch PDF # # # # ~~~d) Feeding_guild*Region*Habitat #### # Data prep # Get rid of the unused feeding guild categories (these have too few species in them) Hab_region_species_trends_x_guild <- Hab_region_species_trends_3 %>% dplyr::filter(Feeding_guild != "Other" & Feeding_guild != "Plants_low" & Feeding_guild != "Unknown" & Feeding_guild != "Woody" ) # Check levels(as.factor(Hab_region_species_trends_x_guild$Feeding_guild)) # Rename them to their final names Hab_region_species_trends_x_guild$Feeding_guild[Hab_region_species_trends_x_guild$Feeding_guild == "Coniferous"] <- "Conifers" Hab_region_species_trends_x_guild$Feeding_guild[Hab_region_species_trends_x_guild$Feeding_guild == "Deciduous"] <- "Broadleaf polyphagous" Hab_region_species_trends_x_guild$Feeding_guild[Hab_region_species_trends_x_guild$Feeding_guild == "Shrubs_decid"] <- "Broadleaf shrubs" Hab_region_species_trends_x_guild$Feeding_guild[Hab_region_species_trends_x_guild$Feeding_guild == "Trees_decid"] <- "Broadleaf trees" Hab_region_species_trends_x_guild$Feeding_guild[Hab_region_species_trends_x_guild$Feeding_guild == "Highly_polyphagous"] <- "Highly polyphagous" # Reorder them in the order I'd like them to appear Hab_region_species_trends_x_guild$Feeding_guild <- factor(Hab_region_species_trends_x_guild$Feeding_guild, levels = c("Forbs","Grasses","Broadleaf shrubs", "Broadleaf trees","Broadleaf polyphagous", "Conifers","Lichen","Highly polyphagous")) levels(as.factor(Hab_region_species_trends_x_guild$Feeding_guild)) # Run the model Hab_region_guild_mod <- lmer(Trend_trans ~ N_S*Feeding_guild + (1|Common_name), weights = Log_sample_size, data = subset(Hab_region_species_trends_x_guild)) # Residual checks. plot(Hab_region_guild_mod) # Not ideal. See line 982 # Is there a significant interaction between feeding guild and region within woodland? drop1(Hab_region_guild_mod, test = "Chisq") # npar AIC LRT Pr(Chi) # 2086.1 #N_S:Feeding_guild 7 2110.3 38.265 2.698e-06 *** # 2110.3 - 2086.1 # delta-AIC = 24.2 # Run the emmeans comparisons Hab_region_guild_emmeans <- emmeans(Hab_region_guild_mod, pairwise ~ N_S|Feeding_guild) # Extract the dfs of contratsts and emmeans # Contrasts Hab_region_guild_contrasts_df <- data.frame(Hab_region_guild_emmeans$contrasts) # Emmeans Hab_region_guild_emmeans_df <- data.frame(Hab_region_guild_emmeans$emmeans) # Get estimated percentage change for each group Hab_region_guild_emmeans_df %>% dplyr::select(N_S, Feeding_guild, emmean) %>% dplyr::mutate(Perc_change = (exp(emmean)-100)) # First four rows: # N_S Feeding_guild emmean Perc_change #1 North Forbs 3.908007 -50.20041 #2 South Forbs 3.653447 -61.39247 #3 North Grasses 3.472542 -67.78145 #4 South Grasses 3.661471 -61.08147 # Run a model with no interaction effect to plot the results in the background of the main plot Hab_region_guild_mod_2 <- lmer(Trend_trans ~ N_S + Feeding_guild + (1|Common_name), weights = Log_sample_size, data = subset(Hab_region_species_trends_x_guild)) # Get the emmeans Hab_region_guild_emmeans_2 <- emmeans(Hab_region_guild_mod_2, pairwise ~ N_S|Feeding_guild) Hab_region_guild_emmeans_df_2 <- data.frame(Hab_region_guild_emmeans_2$emmeans) # Reorder feeding guild levels(as.factor(Hab_region_guild_emmeans_df_2$Feeding_guild)) Hab_region_guild_emmeans_df_2$Feeding_guild <- factor(Hab_region_guild_emmeans_df_2$Feeding_guild, levels = c("Highly polyphagous", "Lichen", "Conifers", "Broadleaf polyphagous", "Broadleaf trees", "Broadleaf shrubs", "Grasses", "Forbs")) # Plot (Fig. S10b) ggplot(data = Hab_region_guild_emmeans_df_2, aes(x = Feeding_guild, y = emmean)) + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL ), width = 0, color = "grey80", size = 3) + geom_point(data = Hab_region_guild_emmeans_df, aes(x = Feeding_guild, y = emmean)) + geom_errorbar(data = Hab_region_guild_emmeans_df, aes(ymax = upper.CL, ymin = lower.CL ), width = 0.1) + geom_hline(yintercept = log(100), linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + xlab("Feeding guild") + ylab("Percentage change 1968 - 2016") + facet_wrap(~ N_S, nrow = 1) + scale_y_continuous(breaks = c(log(-85+100), log(-50+100), log(100+100), log(566+100)), labels = c("- 85%", " -50%", " 100%", " 566%"), limits = c(2, 8.2)) + theme(axis.text.x = element_text(angle = 35, hjust = 1)) + ggtitle("Broadleaf woodland sites only") # Save as an 3 x 6 inch pdf # # # # ~~~ #### # 3. Host_plant_light #### # Data prep # Cut all species that are too polyphagous (with more than three hostplants) Habitat_species_trends_x_Ellenberg <- Habitat_species_trends_3 %>% dplyr::filter(Use_Ellenbergs == "y") # # # # ~~~a) Host_plant_light #### # Run model Light_mod <- lm(Trend_trans ~ Ellenberg_mean_Light, weights = Log_sample_size, data = subset(Habitat_species_trends_x_Ellenberg, Habitat == "Overall")) # Check residuals plot(Light_mod) # Okish # Is there a significant effect of light-affinity? drop1(Light_mod, test = "F") # Df Sum of Sq RSS AIC F value Pr(>F) # 1559.8 287.33 #Ellenberg_mean_Light 1 36.65 1596.5 287.77 2.4201 0.1229 # 287.77-287.33 # delta-AIC = 0.44 # Get model predictions New_X <- expand.grid(Ellenberg_mean_Light = seq(from = min(Habitat_species_trends_x_Ellenberg$Ellenberg_mean_Light, na.rm = NA), to = max(Habitat_species_trends_x_Ellenberg$Ellenberg_mean_Light, na.rm = NA)), Log_sample_size = mean(Habitat_species_trends_x_Ellenberg$Log_sample_size)) # Predict means from data frame New_Y <- predict(Light_mod, newdata = New_X, type = "response", re.form = NA, se = TRUE) # Add together into a single data frame. New_df <- data.frame(New_X, New_Y) # Plot (Fig. S9b) ggplot(data = New_df, aes(x = Ellenberg_mean_Light, y = fit)) + geom_line() + geom_ribbon(aes(ymin = (fit - 1.96*se.fit), ymax = (fit + 1.96*se.fit)), alpha = 0.2) + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + geom_point(data = subset(Habitat_species_trends_x_Ellenberg, Habitat == "Overall"), aes(x = Ellenberg_mean_Light, y = Trend_trans, size = Sample_size), alpha = 0.5) + xlab("Mean Ellenberg light") + ylab("Percentage change 1968 - 2016") + geom_hline(yintercept = log(100), linetype = "dashed") + scale_y_continuous(breaks = c(log(-99.9+100), log(-95+100), log(-50+100), log(100+100), log(900+100)), labels = c("-99.9%", "-95%", "-50%", "100%", "900%"), limits = c(-4.65, 8)) # Saved as 4 x 5 inch pdf # # # # ~~~b) Host_plant_light*Habitat #### # Run model Hab_light_mod <- lmer(Trend_trans ~ Habitat*Ellenberg_mean_Light + (1|Common_name), weights = Log_sample_size, data = subset(Habitat_species_trends_x_Ellenberg, Habitat != "Overall")) # Check residuals plot(Hab_light_mod) # Okish # Is there a significant interaction between light-affinity and habitat? drop1(Hab_light_mod, test = "Chisq") # npar AIC LRT Pr(Chi) # 1276.9 #Habitat:Ellenberg_mean_Light 6 1272.0 7.1839 0.3042 # 1272.0-1276.9 # = -4.9 # Get model predictions temp <- Habitat_species_trends_x_Ellenberg %>% dplyr::filter(Habitat != "Overall") temp <- droplevels(temp) New_X <- expand.grid(Ellenberg_mean_Light = seq(from = min(Habitat_species_trends_x_Ellenberg$Ellenberg_mean_Light, na.rm = TRUE), to = max(Habitat_species_trends_x_Ellenberg$Ellenberg_mean_Light, na.rm = TRUE)), Habitat = levels(as.factor(temp$Habitat)), Log_sample_size = mean(Habitat_species_trends_x_Ellenberg$Log_sample_size)) # Predict means from data frame New_Y <- predict(Hab_light_mod, newdata = New_X, type = "response", re.form = NA) # Add together into a single data frame. New_df <- data.frame(New_X, New_Y) # Predict CIs from data frame cpred1.CI <- easyPredCI(model = Hab_light_mod, newdata = New_X) # Add it to the new data frame: New_df <- data.frame(New_df, cpred1.CI) # See which ones are sig Emtrends_df <- emtrends(Hab_light_mod, pairwise ~ Habitat, var = "Ellenberg_mean_Light") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) Emtrends_df_2 # Habitat Ellenberg_mean_Light.trend SE df lower.CL upper.CL #1 Arable -0.2146090 0.2879065 1989.260 -0.7792389 0.35002090 #2 Broadleaf woodland -0.3081562 0.2219184 1214.389 -0.7435423 0.12722987 #3 Conifer plantation -0.6157636 0.3916241 4442.556 -1.3835420 0.15201476 #4 Improved grassland -0.4111878 0.2162384 1066.438 -0.8354889 0.01311333 #5 'Other semi-natural' -0.3710155 0.4010770 4699.516 -1.1573144 0.41528342 #6 Upland 0.5147669 0.5960284 9734.781 -0.6535726 1.68310629 #7 Urban 0.1369274 0.2917668 2088.639 -0.4352566 0.70911137 # Only Improved grassland are sig diff from zero # Change order of habitats levels(New_df$Habitat) New_df$Habitat <- factor(New_df$Habitat, levels = c("Arable","Broadleaf woodland", "Conifer plantation","Improved grassland", "'Other semi-natural'","Upland", "Urban")) levels(as.factor(Habitat_species_trends_x_Ellenberg$Habitat)) Habitat_species_trends_x_Ellenberg$Habitat <- factor(Habitat_species_trends_x_Ellenberg$Habitat, levels = c("Arable","Broadleaf woodland", "Conifer plantation","Improved grassland", "'Other semi-natural'","Upland", "Urban", "Overall")) # Plot (not in paper) ggplot(data = New_df, aes(x = Ellenberg_mean_Light, y = New_Y)) + geom_line() + geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.2) + ylab("ln(Trend + 100") + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + geom_hline(yintercept = log(100), col = "red") + facet_wrap(~Habitat) + geom_point(data = subset(Habitat_species_trends_x_Ellenberg, Habitat != "Overall"), aes(x = Ellenberg_mean_Light, y = Trend_trans, size = Sample_size), alpha = 0.5) # # # # ~~~c) Host_plant_light*Region #### # Data prep # Cut all species that are too polyphagous (more than three hostplants) Region_species_trends_x_Ellenberg <- Region_species_trends_3 %>% dplyr::filter(Use_Ellenbergs == "y") # Run model Region_light_mod <- lmer(Trend_trans ~ N_S*Ellenberg_mean_Light + (1|Common_name), weights = Log_sample_size, data = Region_species_trends_x_Ellenberg) # Check residuals plot(Region_light_mod) # Okish # Is there as ignificant interaction between light-affinity and region? drop1(Region_light_mod, test = "Chisq") # npar AIC LRT Pr(Chi) # 592.09 #N_S:Ellenberg_mean_Light 1 591.13 1.0367 0.3086 # 591.13-592.09 # = -0.96 # Get model predictions New_X <- expand.grid(Ellenberg_mean_Light = seq(from = min(Region_species_trends_x_Ellenberg$Ellenberg_mean_Light, na.rm = TRUE), to = max(Region_species_trends_x_Ellenberg$Ellenberg_mean_Light, na.rm = TRUE)), N_S = levels(as.factor(Region_species_trends_x_Ellenberg$N_S)), Log_sample_size = mean(Region_species_trends_x_Ellenberg$Log_sample_size)) # Predict means from data frame New_Y <- predict(Region_light_mod, newdata = New_X, type = "response", re.form = NA) # Add together into a single data frame. New_df <- data.frame(New_X, New_Y) # Predict CIs from data frame cpred1.CI <- easyPredCI(model = Region_light_mod, newdata = New_X) # Add it to the new data frame: New_df <- data.frame(New_df, cpred1.CI) # Plot (not in paper) ggplot(data = New_df, aes(x = Ellenberg_mean_Light, y = New_Y)) + geom_line() + geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.2) + ylab("ln(Trend + 100") + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + geom_hline(yintercept = log(100), col = "red") + facet_wrap(~N_S) + geom_point(data = Region_species_trends_x_Ellenberg, aes(x = Ellenberg_mean_Light, y = Trend_trans, size = Sample_size), alpha = 0.5) # ~~~d) Host_plant_light*Region*Habitat #### # Data prep # Cut all species that are too polyphagous (more than three hostplants) Hab_region_species_trends_x_Ellenberg <- Hab_region_species_trends_3 %>% dplyr::filter(Use_Ellenbergs == "y") # Run model Hab_region_light_mod <- lmer(Trend_trans ~ N_S*Ellenberg_mean_Light + (1|Common_name), weights = Log_sample_size, data = Hab_region_species_trends_x_Ellenberg) # check residuals plot(Hab_region_light_mod) # Not great # Is there a significant interaction between light-affinity and region within woodlands? drop1(Hab_region_light_mod, test = "Chisq") # npar AIC LRT Pr(Chi) # 453.07 #N_S:Ellenberg_mean_Light 1 451.12 0.048141 0.8263 # 451.12-453.07 # delta-AIC = 1.95 # Get model predictions New_X <- expand.grid(Ellenberg_mean_Light = seq(from = min(Hab_region_species_trends_x_Ellenberg$Ellenberg_mean_Light, na.rm = TRUE), to = max(Hab_region_species_trends_3$Ellenberg_mean_Light, na.rm = TRUE)), N_S = levels(as.factor(Hab_region_species_trends_x_Ellenberg$N_S)), Habitat = levels(as.factor(Hab_region_species_trends_x_Ellenberg$Habitat)), Log_sample_size = mean(Hab_region_species_trends_x_Ellenberg$Log_sample_size)) # Predict means from data frame New_Y <- predict(Hab_region_light_mod, newdata = New_X, type = "response", re.form = NA) # Add together into a single data frame. New_df <- data.frame(New_X, New_Y) # Predict CIs from data frame cpred1.CI <- easyPredCI(model = Hab_region_light_mod, newdata = New_X) # Add it to the new data frame: New_df <- data.frame(New_df, cpred1.CI) # Plot (not in paper) ggplot(data = New_df, aes(x = Ellenberg_mean_Light, y = New_Y)) + geom_line() + geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.2) + ylab("ln(Trend + 100") + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + geom_hline(yintercept = log(100), col = "red") + facet_wrap(~N_S) + geom_point(data = Hab_region_species_trends_x_Ellenberg, aes(x = Ellenberg_mean_Light, y = Trend_trans, size = Sample_size), alpha = 0.5) # # # # ~~~ #### # # # # Bonus bit - hostplant*habitat #### # Make a new hostplant variable. I will create new factor levels for this to include species such as # thise that feed only on willos and poplars, etc. Habitat_species_trends_3$Hostplant <- NA # In every case where Foodplant_2 equals 'NA', I will make Hostplant equal to Foodplant_1, # as these are either strictly monophagous or are already grouped at the genus level. Habitat_species_trends_3$Hostplant[is.na(Habitat_species_trends_3$Foodplant_2)] <- Habitat_species_trends_3$Foodplant_1[is.na(Habitat_species_trends_3$Foodplant_2)] # Check it worked ##View(Habitat_species_trends_3[, c(35, 64:65, 101)]) # good # Now make a special case for poplars/willow Habitat_species_trends_3$Hostplant[Habitat_species_trends_3$Foodplant_1 == "Salix" & Habitat_species_trends_3$Foodplant_2 == "Populus"] <- "Salix/Populus" # And the other option Habitat_species_trends_3$Hostplant[Habitat_species_trends_3$Foodplant_1 == "Populus" & Habitat_species_trends_3$Foodplant_2 == "Salix"] <- "Salix/Populus" # Willowherbs Habitat_species_trends_3$Hostplant[Habitat_species_trends_3$Foodplant_1 == "Chamerion_angustifolium" & Habitat_species_trends_3$Foodplant_2 == "Epilobium_hirsutum"] <- "Willowherbs" # Heathers Habitat_species_trends_3$Hostplant[Habitat_species_trends_3$Foodplant_1 == "Calluna_vulgaris" & Habitat_species_trends_3$Foodplant_2 == "Erica"] <- "Heathers (calluna and Erica sp.)" # Silene Habitat_species_trends_3$Hostplant[Habitat_species_trends_3$Foodplant_1 == "Silene_dioica" & Habitat_species_trends_3$Foodplant_2 == "Silene_latifolia"] <- "Silene" # Deschampsia (wavy hair grass) Habitat_species_trends_3$Hostplant[Habitat_species_trends_3$Foodplant_1 == "Deschampsia_flexuosa" & Habitat_species_trends_3$Foodplant_2 == "Deschampsia_cespitosa"] <- "Deschampsia" # Small Angle-shades feeds mainly on bracken Habitat_species_trends_3$Hostplant[Habitat_species_trends_3$Foodplant_1 == "Pteridium_aquilinum" & Habitat_species_trends_3$Foodplant_2 == "Dryopteris_filix-mas"] <- "Pteridium_aquilinum (mainly)" # Rubus sp Habitat_species_trends_3$Hostplant[Habitat_species_trends_3$Foodplant_1 == "Rubus_fruticosus" & Habitat_species_trends_3$Foodplant_2 == "Rubus_idaea"] <- "Rubus" # Rubus sp Habitat_species_trends_3$Hostplant[Habitat_species_trends_3$Foodplant_1 == "Rubus_fruticosus"] <- "Rubus" # Gallium Habitat_species_trends_3$Hostplant[Habitat_species_trends_3$Foodplant_1 == "Galium_verum" & Habitat_species_trends_3$Foodplant_2 == "Galium_mollugo"] <- "Galium" Habitat_species_trends_3$Hostplant[Habitat_species_trends_3$Foodplant_1 == "Galium_palustre" & Habitat_species_trends_3$Foodplant_2 == "Galium_uliginosum"] <- "Galium" # Achillea Habitat_species_trends_3$Hostplant[Habitat_species_trends_3$Foodplant_1 == "Achillea_millefolium" & Habitat_species_trends_3$Foodplant_2 == "Achillea_ptarmica"] <- "Achillea" # Cut the data frame down to only include hostplants that I am interested in, and also to remove the # ones with very small smaple sizes. Get rid of the follwing: Habitat_species_trends_x_hostplant <- Habitat_species_trends_3 %>% dplyr::filter(Hostplant != "Achillea" & Hostplant != "Alchemilla_mollis" & Hostplant != "Brassicaceae" & Hostplant != "Cupressus" & Hostplant != "Eriophorum_angustifolium" & Hostplant != "Larix_decidua" & Hostplant != "Linaria_vulgaris" & Hostplant != "Odontites_verna" & Hostplant != "Phragmites_australis" & Hostplant != "Rhamnus_cathartica" & Hostplant != "Stellaria" & Hostplant != "Tilia" & Hostplant != "Vicia_cracca" & Hostplant != "Willowherbs" & Hostplant != "Silene_dioica" & Hostplant != "Acer_campestre" & Hostplant != "Ulmus" & Hostplant != "Ulmus_glabra" & Hostplant != "Deschampsia_cespitosa" & Hostplant != "Cytisus_scoparius" & Hostplant != "Epilobium" & Hostplant != "Rhinanthus_minor" & Hostplant != "Populus" & Hostplant != "Pinus_sylvestris" & Hostplant != "Jacobaea_vulgaris" & Hostplant != "Deschampsia" & Hostplant != "Juniperus_communis" & Hostplant != "Hypericum" & Hostplant != "Euonymus_europaeus" & Hostplant != "Digitalis_purpurea") levels(as.factor(Habitat_species_trends_x_hostplant$Hostplant)) # 20 hostplants. # Now do the models and plot the interaction plots Hostplant_mod <- lmer(Trend_trans ~ Habitat*Hostplant + (1|Common_name), weights = Log_sample_size, data = subset(Habitat_species_trends_x_hostplant, Habitat != "Overall")) # Residual checks. plot(Hostplant_mod) # ok # Is there a significant interaction between hostplant and habitat? drop1(Hostplant_mod, test = "Chisq") # Trend_trans ~ Habitat * Hostplant + (1 | Common_name) # npar AIC LRT Pr(Chi) # 1313.2 #Habitat:Hostplant 90 1280.6 147.42 0.0001291 *** # # Sig interaction # Run the emmeans comparisons Hostplant_emmeans <- emmeans(Hostplant_mod, pairwise ~ Habitat|Hostplant) # Extract the dfs of contratsts and emmeans # Emmeans Hostplant_emmeans_df <- data.frame(Hostplant_emmeans$emmeans) # Get estimated percentage change for each group Hostplant_emmeans_df %>% dplyr::select(Habitat, Hostplant, emmean) %>% dplyr::mutate(Perc_change = (exp(emmean)-100)) # First four rows #Habitat Hostplant emmean Perc_change #1 Arable Alnus_glutinosa 4.676708 7.415876 #2 Broadleaf woodland Alnus_glutinosa 4.685057 8.316398 #3 Conifer plantation Alnus_glutinosa 4.352856 -22.299920 #4 Improved grassland Alnus_glutinosa 5.183141 78.241857 # Flip the order of the Habitats so Arable comes first Hostplant_emmeans_df$Habitat <- factor(Hostplant_emmeans_df$Habitat, levels = c("Arable","Broadleaf woodland", "Conifer plantation", "Improved grassland", "'Other semi-natural'", "Upland", "Urban")) # Run a model with no interaction to plot in the background of the main plot Hostplant_mod_3 <- lmer(Trend_trans ~ Habitat + Hostplant + (1|Common_name), weights = Log_sample_size, data = subset(Habitat_species_trends_x_hostplant, Habitat != "Overall")) # Run the emmeans comparisons Hostplant_emmeans_2 <- emmeans(Hostplant_mod_3, pairwise ~ Habitat|Hostplant) # Emmeans Hostplant_emmeans_df_2 <- data.frame(Hostplant_emmeans_2$emmeans) # Flip the order of the Habitats so Arable comes first Hostplant_emmeans_df_2$Habitat <- factor(Hostplant_emmeans_df_2$Habitat, levels = c("Arable","Broadleaf woodland", "Conifer plantation", "Improved grassland", "'Other semi-natural'", "Upland", "Urban")) # The plants for which no interaction could be made are not showing up... Fix this with some merging Hostplant_emmeans_df_B <- na.omit(Hostplant_emmeans_df) Hostplant_emmeans_df_B <- merge(Hostplant_emmeans_df_B, Hostplant_emmeans_df_2, by = c("Habitat", "Hostplant"), all.x = TRUE) # Get the sample size for each habitat-hostplant interaction Sample_sizes <- Habitat_species_trends_x_hostplant %>% dplyr::select(RIS_code, Habitat, Hostplant) %>% dplyr::distinct() %>% dplyr::group_by(Habitat, Hostplant) %>% dplyr::summarise(Sample_size = length(unique(RIS_code))) # Merge this on to df for figure Hostplant_emmeans_df_B <- merge(Hostplant_emmeans_df_B, Sample_sizes, by = c("Hostplant", "Habitat"), all.x = TRUE) # Plot (Fig. S13) ggplot(data = Hostplant_emmeans_df_B, aes(x = Hostplant, y = emmean.y)) + geom_errorbar(aes(ymax = upper.CL.y, ymin = lower.CL.y), width = 0, color = "grey80", size = 3) + geom_point(data = Hostplant_emmeans_df_B, aes(x = Hostplant, y = emmean.x)) + geom_errorbar(data = Hostplant_emmeans_df_B, aes(ymax = upper.CL.x, ymin = lower.CL.x), width = 0.1) + geom_hline(yintercept = log(100), linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + #theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + xlab("") + ylab("ln(trend + 100)") + ylab("Percentage change 1968 - 2016") + facet_wrap(~Habitat, nrow = 3) + # geom_text(aes(label = Sample_size)) + # Unhash ths line to print sample sizes on top of bars scale_y_continuous(breaks = c(log(-99+100), log(-90+100), log(-50+100), log(100+100), log(900+100), log(9900+100)), labels = c("-99%", "-90%", " -50%", " 100%", " 900%", " 9900%")) + theme(axis.text.x = element_text(angle = 35, hjust = 1)) # Saved as 8 x 8 inch pdf # # # # ~~~ #### # # # # Making a map of sites and summarising site sample sizes #### # Calculating how many sites were used in this analysis and previous (general) analysis is not # simple as each species used a different set of sites and the completeness of each site-year differs # between species. The general analysis (script 2) used only 'complete' site-years. As the headline # figures from my paper used the analysis with 'complete' site-years only, it is more honest to present # these sites as the main number of sites (although the number is substantially smaller). # I will map to the 266 sites that contributed to the general trends (total abundance, biomass, richness # and diversity). I will also tabulate the sample sizes of traps with the complete years as the main number # and the maximum number of sites used in species-specific trends in brackets. # Read in the complete dataset Annual_species_totals <- read.csv("Data/All_years_all_species_all_sites.csv") # and summarise it down to one value per site-year Annual_aggregate_totals <- Annual_species_totals %>% dplyr::group_by(CalYear, RIS.TrapCode) %>% dplyr::summarise(Annual_count = sum(Annual_count)) # And site info Site_info <- read.csv("Data/Site_info.csv") # And Site_year_completeness Site_year_completeness <- read.csv("Data/Site_year_info.csv") # All I need from this is Paul's complete codes (these are determined manually by the Rothamsted Insect # Survey) Site_year_completeness <- Site_year_completeness %>% dplyr::select(SITE, Year, Complete_type_Paul) # Prepare to merge on Site_info Site_info_2 <- Site_info Site_info_2$Site_name <- NULL Site_year_completeness_2 <- Site_year_completeness Site_year_completeness_2$Site_name_db <- NULL Site_year_completeness_2$Site_name_db_nogaps <- NULL # Some renaming Annual_aggregate_totals <- Annual_aggregate_totals %>% dplyr::rename(Year = CalYear, SITE = RIS.TrapCode) # Merge Annual_aggregate_totals_B <- merge(Annual_aggregate_totals, Site_info_2, by = c("SITE")) Annual_aggregate_totals_C <- merge(Annual_aggregate_totals_B, Site_year_completeness, by = c("SITE", "Year")) # Now cut down data to the site-years that were used in the final general analysis # Get rid of anything outside the 1968 - 2016 range and also the Ireland and Channel Isles sites # and keep only "Complete" site-years Annual_aggregate_totals_D <- Annual_aggregate_totals_C %>% dplyr::filter(Year >= 1968 & Year <= 2016) %>% dplyr::filter(Region != "Channel Islands") %>% dplyr::filter(Region != "Ireland") %>% dplyr::filter(Complete_type_Paul == "Complete") # Make a new variables for Actual_years Actual_years_ran <- Annual_aggregate_totals_D %>% dplyr::group_by(SITE) %>% dplyr::summarise(Actual_years = length(unique(Year))) # Merge it on Annual_aggregate_totals_E <- merge(Annual_aggregate_totals_D, Actual_years_ran, by = "SITE") # And cut it down to sites with at least three Actual_years running Annual_aggregate_totals_F <- Annual_aggregate_totals_E %>% dplyr::filter(Actual_years >= 3) # How many sites? length(unique(Annual_aggregate_totals_F$SITE)) # 266 # How many sites in the north/south Annual_aggregate_totals_F %>% dplyr::group_by(N_S) %>% dplyr::summarise(No_of_sites = length(unique(SITE))) # N_S No_of_sites # North 89 # South 177 # How many sites in each habitat? Annual_aggregate_totals_F %>% dplyr::group_by(Habitat_500m) %>% dplyr::summarise(No_of_sites = length(unique(SITE))) # Habitat_500m No_of_sites # Arable 60 # Conifer_plantation 11 # Deciduous_woodland 42 # Improved_grassland 76 # Lowland_semi_natural 12 # Upland 12 # Urban 53 # How many sites in each Region/Habitat? Annual_aggregate_totals_F %>% dplyr::group_by(Habitat_500m, N_S) %>% dplyr::summarise(No_of_sites = length(unique(SITE))) # Habitat_500m N_S No_of_sites # Arable North 10 # Arable South 50 # Conifer_plantation North 7 # Conifer_plantation South 4 # Deciduous_woodland North 18 # Deciduous_woodland South 24 # Improved_grassland North 29 # Improved_grassland South 46 # Lowland_semi_natural North 6 # Lowland_semi_natural South 6 # Upland North 10 # Upland South 2 # Urban North 9 # Urban South 44 # How many sites with each dominant land-use type (LCM2015)? print(Annual_aggregate_totals_F %>% dplyr::group_by(Dom_land_use_500m, Up_lowland, N_S) %>% dplyr::summarise(No_of_sites = length(unique(SITE))), n = 28) # Dom_land_use_500m Up_lowland N_S No_of_sites # 1 Acid_grassland_2015_500m Lowland North 1 # 2 Acid_grassland_2015_500m Upland North 5 # 3 Arable_2015_500m Lowland North 10 # 4 Arable_2015_500m Lowland South 50 # 5 Bog_2015_500m Upland North 2 # 6 Calcareous_grassland_2015_500m Lowland South 1 # 7 Conifer_plantation_2015_500m Lowland North 7 # 8 Conifer_plantation_2015_500m Lowland South 4 # 9 Conifer_plantation_2015_500m Upland North 1 # 10 Conifer_plantation_2015_500m Upland South 1 # 11 Deciduous_woodland_2015_500m Lowland North 18 # 12 Deciduous_woodland_2015_500m Lowland South 24 # 13 Heather_2015_500m Lowland North 1 # 14 Heather_2015_500m Lowland South 1 # 15 Heather_grassland_2015_500m Lowland North 4 # 16 Improved_grassland_2000_500m Lowland North 2 (*These are on Isle of man where 2015 data was not available) # 17 Improved_grassland_2015_500m Lowland North 27 # 18 Improved_grassland_2015_500m Lowland South 47 # 19 Improved_grassland_2015_500m Upland North 2 # 20 Improved_grassland_2015_500m Upland South 1 # 21 Littoral_sediment_2015_500m Lowland South 1 # 22 Neutral_grassland_2015_500m Lowland South 1 # 23 Saltmarsh_2015_500m Lowland South 1 # 24 Suburban_2015_500m Lowland North 8 # 25 Suburban_2015_500m Lowland South 39 # 26 Supra_littoral_rock_2015_500m Lowland South 1 # 27 Urban_2015_500m Lowland North 1 # 28 Urban_2015_500m Lowland South 5 # Get one row per site ready for plotting Annual_aggregate_totals_G <- Annual_aggregate_totals_F %>% dplyr::select(Improved_longitude, Improved_latitude, Actual_years, Habitat_500m) %>% distinct() # Get coastline UK_coast <- read.csv("Data/UK_coastline_R.csv") # Make better names Hab_names <- c("Arable", "Conifer plantation", "Broadleaf woodland", "Improved grassland", "'Other semi-natural'", "Upland", "Urban") names(Hab_names) <- c("Arable", "Conifer_plantation", "Deciduous_woodland", "Improved_grassland", "Lowland_semi_natural", "Upland", "Urban") # Plot (Fig S1) ggplot(data = Annual_aggregate_totals_G, aes(x = Improved_longitude, y = Improved_latitude)) + geom_path(data = UK_coast, aes(x = Location.longitude, y = Location.latitude), size = 0.2) + geom_point(aes(size = Actual_years, colour = Habitat_500m), alpha = 0.5) + facet_wrap(~Habitat_500m, labeller = labeller(Habitat_500m = Hab_names)) + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + xlab("Longitude") + ylab("Latitude") + geom_hline(yintercept = 53.9, linetype = 2) + scale_size_continuous(breaks = c(3, 10, 20, 30, 40)) # Saved as 8 x 7.5 inch pdf # To get the maximum number of sites per individual species, I just need to rerun the above code without # cutting down to only complete site-years Annual_aggregate_totals_D_2 <- Annual_aggregate_totals_C %>% dplyr::filter(Year >= 1968 & Year <= 2016) %>% dplyr::filter(Region != "Channel Islands") %>% dplyr::filter(Region != "Ireland") # Make a new variables for Actual_years Actual_years_ran <- Annual_aggregate_totals_D_2 %>% dplyr::group_by(SITE) %>% dplyr::summarise(Actual_years = length(unique(Year))) # Merge it on Annual_aggregate_totals_E_2 <- merge(Annual_aggregate_totals_D_2, Actual_years_ran, by = "SITE") # And cut it down to sites with at least three Actual_years running Annual_aggregate_totals_F_2 <- Annual_aggregate_totals_E_2 %>% dplyr::filter(Actual_years >= 3) # How many sites? length(unique(Annual_aggregate_totals_F_2$SITE)) # 384 # How many sites in the north/south Annual_aggregate_totals_F_2 %>% dplyr::group_by(N_S) %>% dplyr::summarise(No_of_sites = length(unique(SITE))) # N_S No_of_sites #1 North 119 #2 South 265 # How many sites in each habitat? Annual_aggregate_totals_F_2 %>% dplyr::group_by(Habitat_500m) %>% dplyr::summarise(No_of_sites = length(unique(SITE))) # Habitat_500m No_of_sites #1 Arable 76 #2 Conifer_plantation 12 #3 Deciduous_woodland 51 #4 Improved_grassland 129 #5 Lowland_semi_natural 19 #6 Upland 15 #7 Urban 82 # How many sites in each Region/Habitat? Annual_aggregate_totals_F_2 %>% dplyr::group_by(Habitat_500m, N_S) %>% dplyr::summarise(No_of_sites = length(unique(SITE))) # Habitat_500m N_S No_of_sites #1 Arable North 11 #2 Arable South 65 #3 Conifer_plantation North 8 #4 Conifer_plantation South 4 #5 Deciduous_woodland North 22 #6 Deciduous_woodland South 29 #7 Improved_grassland North 44 #8 Improved_grassland South 85 #9 Lowland_semi_natural North 8 #10 Lowland_semi_natural South 11 #11 Upland North 11 #12 Upland South 4 #13 Urban North 15 #14 Urban South 67 # How many sites with each dominant land-use type (LCM2015)? print(Annual_aggregate_totals_F_2 %>% dplyr::group_by(Dom_land_use_500m, Up_lowland, N_S) %>% dplyr::summarise(No_of_sites = length(unique(SITE))), n = 31) # Dom_land_use_500m Up_lowland N_S No_of_sites # 1 Acid_grassland_2015_500m Lowland North 1 # 2 Acid_grassland_2015_500m Lowland South 1 # 3 Acid_grassland_2015_500m Upland North 5 # 4 Acid_grassland_2015_500m Upland South 1 # 5 Arable_2015_500m Lowland North 11 # 6 Arable_2015_500m Lowland South 65 # 7 Bog_2015_500m Upland North 2 # 8 Calcareous_grassland_2015_500m Lowland South 1 # 9 Conifer_plantation_2015_500m Lowland North 8 # 10 Conifer_plantation_2015_500m Lowland South 4 # 11 Conifer_plantation_2015_500m Upland North 1 # 12 Conifer_plantation_2015_500m Upland South 1 # 13 Deciduous_woodland_2015_500m Lowland North 22 # 14 Deciduous_woodland_2015_500m Lowland South 29 # 15 Heather_2015_500m Lowland North 2 # 16 Heather_2015_500m Lowland South 2 # 17 Heather_grassland_2015_500m Lowland North 5 # 18 Improved_grassland_2000_500m Lowland North 2 # 19 Improved_grassland_2015_500m Lowland North 42 # 20 Improved_grassland_2015_500m Lowland South 85 # 21 Improved_grassland_2015_500m Upland North 3 # 22 Improved_grassland_2015_500m Upland South 2 # 23 Littoral_sediment_2015_500m Lowland South 1 # 24 Neutral_grassland_2015_500m Lowland South 1 # 25 Saltmarsh_2015_500m Lowland South 3 # 26 Suburban_2015_500m Lowland North 14 # 27 Suburban_2015_500m Lowland South 58 # 28 Supra_littoral_rock_2015_500m Lowland South 1 # 29 Supra_littoral_sediment_2015_500m Lowland South 1 # 30 Urban_2015_500m Lowland North 1 # 31 Urban_2015_500m Lowland South 9 # End of script