-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
How to automate the script across all species #19
Comments
@SarahVray yr <- unique(pheno[order(M_YEAR),][!is.na(NM), as.numeric(as.character(M_YEAR))])
if(length(yr) < 1) next() ## skip species[i] and move to next
if("trimWEEKNO" %in% names(pheno)){
plot(unique(pheno[M_YEAR == yr[1], .(trimWEEKNO, NM)]), type = 'l', ylim = c(0, max(pheno[!is.na(NM), ][, NM])), xlab = 'Monitoring Week', ylab = 'Relative Abundance', lwd=3, main=paste(species.names[i], "flight curve"))
} else {
plot(unique(pheno[M_YEAR == yr[1], .(trimDAYNO, NM)]), type = 'l', ylim = c(0, max(pheno[!is.na(NM), ][, NM])), xlab = 'Monitoring Day', ylab = 'Relative Abundance', lwd=3, main=paste(species.names[i], "flight curve"))
}
if(length(yr) > 1) {
j <- 2
for(y in yr[-1]){
if("trimWEEKNO" %in% names(pheno)){
pointsunique(pheno[M_YEAR == y , .(trimWEEKNO, NM)]), type = 'l', col = i, lwd=3)
} else {
points(unique(pheno[M_YEAR == y , .(trimDAYKNO, NM)]), type = 'l', col = i, lwd=3)
}
j <- j + 1
}}
legend('topright', legend = c(yr), col = c(seq_along(c(yr))), lty = 1, bty = 'o', lwd = 3, title="Year")
} |
Thank you very much @RetoSchmucki, it works for plotting the curves. However I still have an issue when calculating the species trends (from "workshop_functions.R", https://butterfly-monitoring.github.io/bms_workshop/species_trends.html). It efficiently skips species without any year available, but it gets stuck at some point, when it gets to the species Argynnis adippe_collated index.csv for which only the flight curve of 2021 could be fitted. species.names <- unique(LUBMS_count[order(SPECIES), SPECIES])
for(i in 1:length(species.names)){
co_index <- readRDS(file.path("K:/BIODIV/BIOLYS/3_Working area/WP2 - butterfly trends/Working directory/1-ALL transects/co_index_boot", paste( gsub(" ", "_", species.names[i]), "co_index_boot.rds", sep="_")))
yr <- unique(co_index[order(M_YEAR),][!is.na(NSITE), as.numeric(as.character(M_YEAR))])
if(length(yr) < 1) next() ## skip species[i] and move to next
# Convert to log 10 collated indices (TRMOBS):
co_index[, LOGDENSITY:= log(COL_INDEX)/log(10)][, TRMOBS := LOGDENSITY - mean(LOGDENSITY) + 2, by = .(BOOTi)]
# add the trims information
# Estimate and classify species trends with a CI based on the bootstraps:
sp_trend <- estimate_boot_trends(co_index)
trend_path <- file.path("K:","BIODIV","BIOLYS","3_Working area","WP2 - butterfly trends","Working directory","1-ALL transects", "trend",paste(species.names[i], "trend.csv", sep="_"))
write.csv(sp_trend, file=trend_path, row.names=T)
} And the error message I get: Is it because we need a minimum number of years (3 if I remember well) with a flight curve to be able to calculate the trend? |
Hi @SarahVray If you run co_index[, LOGDENSITY:= log(COL_INDEX)/log(10)][, TRMOBS := LOGDENSITY - mean(LOGDENSITY) + 2, by = .(BOOTi)] You will have NA or -Inf in your TRMOBS, since Here is your fix: source("workshop_functions.R")
co_index <- data.table::fread("Argynnis.adippe_collated.index.csv")
## Only use COL_INDEX larger then 0 for calculation or logdensity and trmobs
co_index[COL_INDEX > 0, LOGDENSITY := log(COL_INDEX) / log(10)][
COL_INDEX > 0, TRMOBS := LOGDENSITY - mean(LOGDENSITY, rm.na = TRUE) + 2,
by = .(BOOTi)]
## ==============
## trend_func()
## find first and last year without zeros
## collind_df[!is.na(TRMOBS), min(M_YEAR)]
## collind_df[!is.na(TRMOBS), max(M_YEAR)]
## ==============
# Underlying function for (linear) trend calculation
trend_func <- function(collind_df, bycols = NULL, minyear = NULL, maxyear = NULL) {
setDT(collind_df)
if (is.null(minyear)) minyear <- collind_df[!is.na(TRMOBS), min(M_YEAR)]
if (is.null(maxyear)) maxyear <- collind_df[!is.na(TRMOBS), max(M_YEAR)]
if (!is.null(minyear)) collind_df <- collind_df[M_YEAR >= minyear]
if (!is.null(maxyear)) collind_df <- collind_df[M_YEAR <= maxyear]
if (nrow(collind_df) > 0) {
# Fit linear model
lm_obj <- try(lm(TRMOBS ~ M_YEAR, collind_df), silent = TRUE)
trend_df <- data.frame(
BOOTi = collind_df$BOOTi[1],
data_from = collind_df[!is.na(TRMOBS), min(M_YEAR)],
data_to = collind_df[!is.na(TRMOBS), max(M_YEAR)],
data_nyears = length(collind_df$M_YEAR),
minyear = minyear,
maxyear = maxyear,
n = length(minyear:maxyear),
rate_lt = ifelse(class(lm_obj) != "try-error",
exp(coef(lm_obj)[2] * 2.303), NA
),
pc1_lt = ifelse(class(lm_obj) != "try-error",
100 * (exp(coef(lm_obj)[2] * 2.303) - 1), NA
),
pcn_lt = ifelse(class(lm_obj) != "try-error",
100 * (exp(coef(lm_obj)[2] * 2.303)^(
maxyear - minyear) - 1), NA
)
)
# Fit for last 10 years only if data have more than 10 years
if (length(minyear:maxyear) > 10) {
collind_df10 <- collind_df[M_YEAR >= (maxyear - 9)]
lm_obj10 <- try(lm(TRMOBS ~ M_YEAR, collind_df10), silent = TRUE)
trend_df$rate_10y <- ifelse(class(lm_obj10) != "try-error",
exp(coef(lm_obj10)[2] * 2.303), NA
)
trend_df$pc1_10y <- ifelse(class(lm_obj10) != "try-error",
100 * (exp(coef(lm_obj10)[2] * 2.303) - 1), NA
)
trend_df$pcn_10y <- ifelse(class(lm_obj10) != "try-error",
100 * (exp(coef(lm_obj10)[2] * 2.303)^(
9) - 1), NA
)
}
if (!is.null(bycols)) {
for (i in 1:length(bycols)) {
trend_df[, bycols[i]] <- collind_df[1, get(bycols[i])]
}
}
return(trend_df)
} else {
return(NULL)
}
}
## calculate trend
sp_trend <- estimate_boot_trends(co_index, minyear = 2017) Try this out and let me know if it works. I will update |
Hi @RetoSchmucki, thanks very much for your reply. Indeed this species was not recorded before 2017 (not because it colonized our monitoring sites and started to spread but because we started new transects in dry grasslands in 2016 and then in 2021, so it is clearly a bias that we will have to fix). I ran your code (with the modification in workshop_function) and it seems to work, although it gives me an "uncertain" trend, potentially because 6 years are not enough (?): data_from data_to data_nyears minyear maxyear n nboot_lt rate_lt rate_lt_low rate_lt_upp pcn_lt TrendClass_lt
0 2017 2022 6 2017 2022 6 1000 1.139927 0.7021053 19.6853 92.47969 Uncertain But then as it creates NAs in |
@SarahVray and for the plot, you will need to add co_index0_mean <- mean(co_index[BOOTi == 0]$LOGDENSITY, na.rm = TRUE)
# Derive interval from quantiles
co_index_ci <- merge(co_index[BOOTi == 0, .(M_YEAR, TRMOBS)],
co_index[BOOTi != 0, .(
LOWER = quantile(LOGDENSITY - co_index0_mean + 2, 0.025, na.rm = TRUE),
UPPER = quantile(LOGDENSITY - co_index0_mean + 2, 0.975, na.rm = TRUE)
), by = .(M_YEAR)],
by = c("M_YEAR")
)
spp <- unique(co_index[,SPECIES])
ggplot(co_index_ci, aes(M_YEAR, TRMOBS)) +
theme(text = element_text(size = 14)) +
geom_line() +
geom_point() +
geom_ribbon(aes(ymin = LOWER, ymax = UPPER), alpha = .3) +
geom_smooth(method = "lm", se = FALSE, color = "red") +
xlab("Year") +
ylab(expression("log "["(10)"] * " Collated Index")) +
ggtitle(paste("Collated index for", gsub("_", " ", spp))) Looking at this figure, you see why the trend from 2017 to 2022 was classified as uncertain. |
@RetoSchmucki, thanks it works :-). I get: data_from data_to data_nyears minyear maxyear n nboot_lt rate_lt rate_lt_low rate_lt_upp pcn_lt TrendClass_lt
0 2015 2022 8 2015 2022 8 1000 13.68359 5.504991 34.75508 8982546761 Strong increase Yes indeed I can see the plot now. Thanks again! |
Hi @RetoSchmucki, testing this new code on a bigger dataset, I get another error message at the same step, when estimating the species trends with: source("workshop_functions.R")
species.names <- unique(LUBMS_count[order(SPECIES), SPECIES])
for(i in 1:length(species.names)){
co_index <- readRDS(file.path("K:/BIODIV/BIOLYS/3_Working area/WP2 - butterfly trends/Working directory/1-ALL transects/co_index_boot", paste( gsub(" ", "_", species.names[i]), "co_index_boot.rds", sep="_")))
# BOOTi: 0 when raw data
yr <- unique(co_index[order(M_YEAR),][!is.na(NSITE), as.numeric(as.character(M_YEAR))])
if(length(yr) < 1) next() ## skip species[i] and move to next
## Only use COL_INDEX larger then 0 for calculation of logdensity and trmobs
co_index[COL_INDEX > 0, LOGDENSITY := log(COL_INDEX) / log(10)][
COL_INDEX > 0, TRMOBS := LOGDENSITY - mean(LOGDENSITY, rm.na = TRUE) + 2,
by = .(BOOTi)]
# Estimate and classify species trends with a CI based on the bootstraps:
# fitting all the regressions of bootstraps separately and aggregate them
sp_trend <- estimate_boot_trends(co_index)
trend_path <- file.path("K:","BIODIV","BIOLYS","3_Working area","WP2 - butterfly trends","Working directory","1-ALL transects", "trend",paste(species.names[i], "trend.csv", sep="_"))
write.csv(sp_trend, file=trend_path, row.names=T)
# Plot the species log collated index with bootstrapped 95% CI and linear trend line (in red):
# Calculate mean log index for original data
co_index0_mean <- mean(co_index[BOOTi == 0]$LOGDENSITY, na.rm = TRUE)
# Derive interval from quantiles
co_index_ci <- merge(co_index[BOOTi == 0, .(M_YEAR, TRMOBS)],
co_index[BOOTi != 0, .(LOWER = quantile(LOGDENSITY - co_index0_mean + 2, 0.025, na.rm = TRUE),
UPPER = quantile(LOGDENSITY - co_index0_mean + 2, 0.975, na.rm = TRUE)),
by = .(M_YEAR)],
by=c("M_YEAR"))
# Save plot as jpeg:
collated_index_path <- file.path("K:","BIODIV","BIOLYS","3_Working area","WP2 - butterfly trends","Working directory","1-ALL transects","collated index","Plot version 2",paste(species.names[i], "collated index.jpg", sep="_"))
jpeg(file=collated_index_path, width=800, height=600,units="px",bg="white",res=100,pointsize = 12)
col_plot <- ggplot(co_index_ci, aes(M_YEAR, TRMOBS))+
theme(text = element_text(size = 14))+
geom_line()+
geom_point()+
geom_ribbon(aes(ymin = LOWER, ymax = UPPER), alpha = .3)+
geom_smooth(method="lm", se=FALSE, color="red")+
xlab("Year")+ylab(expression('log '['(10)']*' Collated Index'))+
ggtitle(paste("Collated index for", gsub("_", " ", species.names[i]), "in LUBMS"))
print(col_plot + scale_x_continuous(limits = c(2010, 2022), n.breaks=7))
dev.off()
} And with the new code you made and that I copy-pasted in I get this error message: Error in rbind(deparse.level, ...) :
numbers of columns of arguments do not match
In addition: Warning messages:
1: Removed 5 rows containing non-finite values (stat_smooth).
2: Removed 5 row(s) containing missing values (geom_path).
3: Removed 5 rows containing missing values (geom_point). Do you see where it could come from? Thank you again for your help. |
Hi @SarahVray |
Hi @RetoSchmucki,
Do you know where these issues could come from? |
I don't know what is causing these issues but will have a look and try to reply soon. |
Dear Reto @RetoSchmucki,
I am trying to automate your script from https://retoschmucki.github.io/rbms/articles/Get_Started_1.html and https://retoschmucki.github.io/rbms/articles/Get_Started_2.html to run it across all species at once.
For this I am creating a "for" loop at each step of the script, e.g (with LUBMS_count = the count table):
species.names <- unique(LUBMS_count[order(SPECIES), SPECIES])
for(i in 1:length(species.names)){
ts_season_count <- rbms::ts_monit_count_site(ts_season_visit, LUBMS_count, sp = species.names[i])
saveRDS(ts_season_count, file.path("K:/BIODIV/BIOLYS/3_Working area/WP2 - butterfly trends/Working directory/1-ALL transects/ts_season_count", paste(gsub(" ", "_", species.names[i]), "ts_season_count.rds", sep = "_")))
}
This command works fine but not for all steps. For example, when it comes to plot the flight curve, the NAs from the field NM are causing an error message ending the loop, and I don't know how to handle this.
for(i in 1:length(species.names)){
ts_flight_curve <- readRDS(file.path("K:/BIODIV/BIOLYS/3_Working area/WP2 - butterfly trends/Working directory/1-ALL transects/flight curve", paste(gsub(" ", "_", species.names[i]), "pheno.rds", sep = "_")))
pheno <- ts_flight_curve$pheno
yr <- unique(pheno[order(M_YEAR), as.numeric(as.character(M_YEAR))])
if("trimWEEKNO" %in% names(pheno)){
plot(pheno[M_YEAR == yr[1], trimWEEKNO], pheno[M_YEAR == yr[1], NM], type = 'l', ylim = c(0, max(pheno[, NM])), xlab = 'Monitoring Week', ylab = 'Relative Abundance', lwd=3, main=paste(species.names[i], "flight curve"))
} else {
plot(pheno[M_YEAR == yr[1], trimDAYNO], pheno[M_YEAR == yr[1], NM], type = 'l', ylim = c(0, max(pheno[, NM])), xlab = 'Monitoring Day', ylab = 'Relative Abundance', lwd=3, main=paste(species.names[i], "flight curve"))
}
if(length(yr) > 1) {
j <- 2
for(y in yr[-1]){
if("trimWEEKNO" %in% names(pheno)){
points(pheno[M_YEAR == y , trimWEEKNO], pheno[M_YEAR == y, NM], type = 'l', col = i, lwd=3)
} else {
points(pheno[M_YEAR == y, trimDAYNO], pheno[M_YEAR == y, NM], type = 'l', col = i, lwd=3)
}
j <- j + 1
}}
legend('topright', legend = c(yr), col = c(seq_along(c(yr))), lty = 1, bty = 'o', lwd = 3, title="Year")
}
=> Error in plot.window(...) : need finite 'ylim' values
There are two cases:
I have this issue at three steps in the script:
Would you know how to adapt the script to resolve this issue? I guess the statement "if next" would be helpful here but I have no clue on how to correctly apply it as my loop is built on species names and not on NM values, I cannot find examples on the web similar to the case here.
Thank you very much in advance for your help !
Sarah
The text was updated successfully, but these errors were encountered: