-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathRobCountAnalysis.R
417 lines (392 loc) · 17 KB
/
RobCountAnalysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
#######################################################################
#######################################################################
## This script was created by Dr. Jen Cruz as part of ##
## the Applied Population Ecology Class ###
## ##
## Here we import our cleaned data for the time-series of point count #
# observations for Piute ground squirrels at the NCA and run ##
## robust population N-mixture analyses. The models are hierarchical #
# with : (1) an ecological submodel linking abundance to #
## environmental predictors; (2) an observation submodel linking ##
## detection probability to relevant predictors. ##
## ##
# We will focus on linking predictors to gains instead of survival #
# Female Piute ground squirrels give birth to an average of 5-10 young#
# Reproduction is likely more successful when the season last longer #
# so warmer Feb temperatures will suggest quicker end of hibernation. #
# If Apr-May temperatures are too hot it may decrease food for young. #
# Areas with more sagebrush may have more food sources for young. #
# #
# Detection may be related to observer effects and to time of day as a #
# quadratic, with higher detection expected in the middle of the day, #
# when squirrels are most active. #
# ##
#######################################################################
##### Set up your workspace and load relevant packages -----------
# Clean your workspace to reset your R environment. #
rm( list = ls() )
# Check that you are in the right project folder
getwd()
# load packages:
library( tidyverse )#includes dplyr, tidyr and ggplot2
library( unmarked ) #analyze count and occupancy data
library( AICcmodavg) #gof tests (Duarte et al. 2018)
library( nmixgof ) #more gof tests (Knape et al. 2018)
## end of package load ###############
###################################################################
#### Load or create data -----------------------------------------
# set directory where your data are:
datadir <- paste( getwd(), "/Data/", sep = "" )
# load our cleaned data
opendf <- read.csv( file = paste( datadir, "open_counts.csv", sep = ""),
header = TRUE )
#view
head( opendf ); dim( opendf )
#### End of data load -------------
####################################################################
##### Ready data for analysis --------------
#create your response dataframe:
yy <- opendf %>%
select( o.sites, year, count.j1, count.j2, count.j3 ) %>%
pivot_wider( names_from = year, #which criteria is getting turned to wide
values_from = c("count.j1", "count.j2", "count.j3"), #what columns do we include
#this rearranges the naming of the columns to use year first
names_glue = "{year}_{.value}" )
#check did we get the right number of dimensions?
head( yy ); dim( yy )
#now we sort columns by year instead of survey
yy <- yy %>% dplyr::select( str_sort( colnames(yy), numeric = TRUE ) ) %>%
# we remove site id
select( -o.sites )
#now create siteXyear dataframes:
#for sagebrush:
sagebrush <- opendf %>% select( o.sites, year, sagebrush ) %>%
spread( key = year, value = sagebrush ) %>%
select( -o.sites )
#check
head(sagebrush); dim(sagebrush)
#for Feb temperature:
Feb.minT <- opendf %>% select( o.sites, year, Feb.minT ) %>%
spread( key = year, value = Feb.minT ) %>%
select( -o.sites )
#check
head(Feb.minT); dim(Feb.minT)
#for Apr-May temperature:
AprMay.maxT <- opendf %>% select( o.sites, year, AprMay.maxT ) %>%
spread( key = year, value = AprMay.maxT ) %>%
select( -o.sites )
#check
head(AprMay.maxT); dim(AprMay.maxT)
#create observer level dataframes
#starting with observer effect:
observer <- opendf %>%
select( o.sites, year, observer.j1, observer.j2, observer.j3 ) %>%
pivot_wider( names_from = year, #which criteria is getting turned to wide
values_from = c("observer.j1", "observer.j2", "observer.j3"), #what columns do we include
#this rearranges the naming of the columns to use year first
names_glue = "{year}_{.value}" )
#check did we get the right number of dimensions?
head( observer ); dim( observer )
#now we sort columns by year instead of survey
observer <- observer %>% dplyr::select( str_sort( colnames(observer), numeric = TRUE ) ) %>%
# we remove site id
select( -o.sites )
#now for time of day:
Time <- opendf %>%
select( o.sites, year, time.j1, time.j2, time.j3 ) %>%
pivot_wider( names_from = year, #which criteria is getting turned to wide
values_from = c("time.j1", "time.j2", "time.j3"), #what columns do we include
#this rearranges the naming of the columns to use year first
names_glue = "{year}_{.value}" )
#check did we get the right number of dimensions?
head( Time ); dim( Time )
#now we sort columns by year instead of survey
Time <- Time %>% dplyr::select( str_sort( colnames(Time), numeric = TRUE ) ) %>%
# we remove site id
select( -o.sites )
#lastly calculate number of primary periods
T <- length( unique( opendf$year ) )
#now combine into unmarked dataframe:
umf <- unmarkedFramePCO( y = as.matrix(yy),
#combine yearXsite dataframes into a list
yearlySiteCovs = list( sagebrush = sagebrush,
Feb.minT =Feb.minT,
AprMay.maxT = AprMay.maxT),
#combine observer-level dataframes into list
obsCovs = list( observer = observer,
time = Time ),
#provide number of primary periods
numPrimary = T )
# Check
summary( umf )
#now scale ecological predictors:
sc <- apply( yearlySiteCovs(umf), MARGIN = 2, FUN = scale )
# We replace the predictors in our unmarked dataframe with the scaled values:
yearlySiteCovs( umf ) <- sc
#now for observation-level predictors:
osc <- as.vector(scale( obsCovs(umf)[2] ))
#replace with scaled values:
obsCovs(umf)[2] <- osc
#recheck
summary( umf )
### end data prep -----------
########################################################################
### Analyze data ------------------------------------------
# The pcountOpen() function allows a wide variety of model parametizations. #
# With more choices, comes potential confusion as to how to implement different #
# options adequately. Models are also very computationally-intensive and can #
# take several hours (or days) to run, depending on your computer capabilities. #
# If you can provide initial values, do so. Minimize your K in the first run and#
# start without estimating standard errors. #
# This will allow you to check that you have specified the model correctly. #
# Then build up from there.
# Be on the lookout for strange results such as likelihoods that don't decrease #
# when you add additional parameters, or NaNs when se=TRUE.
# Here we start with description of some of the options available. Please #
# check the unmarked manual for further details. #
# Initial abundance is always: N[1,t] ~ Poisson(lambda) #
# Survivors can be estimated from a Binomial process as density-dependent:
# S[i,t+1] ~ Binomial( N[i,t], omega ), where omega represents apparent survival
# if dynamics = "autoreg":
# Recruits can be related to previous abundance, or density-dependent as:
# R[i,t+1] ~ Poisson( N[i,t] * gamma ), where gamma represents per-capita recruitment
# Note that under this second option, extinct sites (N[i,t]=0) cannot experience
# recruitment (i.e, cannot be rescued).
# OR:
# if dynamics = 'constant':
# R[i,t+1] ~ Poisson( gamma ), where gamma becomes constant recruitment rate
#
# If dynamics = "notrend": lambda * (1-omega) so there is no gamma or temporal trend
# If dynamics = "trend": then population growth is models as exponential growth:
# N[i,t] = N[i,t-1] * gamma, where gamma is finite rate of increase (normally #
# referred to as lambda ).
# Dynamics can also be modeled as density-dependent in two other ways:
# where gamma is maximum instantaneous population growth rate (r) and
# omega is equilibrium abundance (normally termed K).
# (1) Ricker where N[i,t] = N[i,t-1] * exp( gammma * (1 - N[i,t-1] / omega ) ), OR
# (2) Gompertz where N[i,t] = N[i,t-1] * exp( gamma * ( 1 - log( N[i,t-1] + 1 ) /
# log( omega + 1) ) )
# Let's start with a simple model
fm.0 <- pcountOpen( #lambda formula for initial abundance:
lambdaformula = ~1,
#gamma is modeled as lambda * ( 1 - omega)
gammaformula = ~1,
#omega
omegaformula = ~1,
#detection formula:
pformula = ~1,
#no trend
dynamics = 'autoreg',
#Define the maximum possible abundance
K = 80,
# don't calculate standard errors, which makes it run faster:
se = FALSE, #useful for the first run
# set distribution as Poisson:
mixture = "P", #NB or ZIP also possible
# set to true if you want to separate migration from survival:
immigration = FALSE,
# provide data
data = umf,
# set control parameters so that you can see progress:
control = list( trace = TRUE, REPORT = 1) )
# View model results:
fm.0
# We now run a dynamic count model from Dail-Madsen (2011) model. #
fm.dyn0 <- pcountOpen( #lambda formula for abundance in year 1 only:
lambdaformula = ~1,
#gamma is recruitment rate
gammaformula = ~1,
#omega is survival probability
omegaformula = ~1,
#detection formula:
pformula = ~1, #
#trend
dynamics = 'constant',
#upper bound of discrete integration. Should be higher than the maximum
#observed count and high enough that it does not affect #
#parameter estimates
K = 80,
#for the first run turn off calculation of standar errors
se = FALSE,
#more complicated models allow immigration to be split
# from births # we don't enable that here
immigration = FALSE,
#here you provide your unmarked dataframe
data = umf,
control = list( trace = TRUE, REPORT = 1) )
#view
fm.dyn0
# We rerun the model above by adding predictors.
# The model is hard to fit so we start with an intercept only version #
# and we also start without calculating standard errors.#
fm.dyn <- pcountOpen( #lambda formula for initial abundance:
lambdaformula = ~1,
gammaformula = ~1 + sagebrush + Feb.minT + AprMay.maxT,
omegaformula = ~1,
pformula = ~1 + time,
dynamics = 'constant',
K = 80,
se = FALSE,
immigration = FALSE,
data = umf,
control = list( trace = TRUE, REPORT = 1) )
# View model results:
fm.dyn
# Next we rerun the model with se=TRUE and use the values of the previous #
# run, as initial values for the new model
#define coefficients from the previous model as initial values for the next run
inits <- coef( fm.dyn )
#run model
fm.dynSE <- pcountOpen( lambdaformula = ~1,
gammaformula = ~1 + sagebrush + Feb.minT + AprMay.maxT,
omegaformula = ~1, pformula = ~1 + time,
K = 80, se = TRUE, data = umf,
control = list( trace = TRUE, REPORT = 1), starts = inits )
#define which model you want to view
fm <- fm.dynSE
#view results
summary(fm)
#backtransform parameter estimates
exp(coef(fm, type = "lambda"))
#depending on your dynamics, the transformation is a expit or exp:
plogis(coef(fm, type = "omega"))
#exp(coef(fm, type = "omega"))
#depending on your dynamics, gamma may not be present (e.g. in no trend model)
exp( coef( fm, type = "gamma"))
plogis( coef( fm, type = "det" ) )
# We can also estimate confidence intervals for coefficients in #
# ecological submodel:
# 1st season mean abundance:
confint( fm, type = "lambda" )
# omega is apparent survival in the no trend model, K in Gompertz
confint( fm, type = "omega" )
# There is no gamma in the notrend model,
confint( fm, type = "gamma" )
# coefficients for detection submodel:
confint( fm, type = 'det' )
#
# Based on the overlap of the 95% CIs for your predictor coefficients, #
# can you suggest which may be important to each of your responses? #
# Answer:
#
#############end full model ###########
##########################################################################
# Model fit and evaluation -----------------------------------------------
# We start with goodness of fit (GoF) outlined by Duarte et al. 2018 #
# Ecological modelling 374:51-59 and available via AICmodavg package #
# The Nmix.gof.test relies on a Pearson chi-square to assess the fit of #
# N-mixture models. The approach uses bootstrapping to estimate the p values #
# The test also estimates a c-hat measure of overdispersion, as the #
# observed test statistic divided by the mean of the simulated test statistics #
# Let's compute observed chi-square, assess significance, and estimate c-hat
gof.boot <- Nmix.gof.test( fm, nsim = 100, print.table = TRUE )
#view
gof.boot
# Remember that higher chi-squared values represent worse fit
#########################################################################
##### Summarizing model output ##############
# We now check the effects that our predictors are having on abundance #
# by plotting partial prediction plots for our ecological submodels #
# Here I focus only on those with 95% CIs not overlapping zero:
# We start by creating our datasets to predict over
# how many values do we use:
n <- 100
# we use the observed values to define our range:
sage <- seq( min( opendf[,"sagebrush"]),max( opendf[,"sagebrush"]),
length.out = n )
#standardize them
sage.std <- scale( sage )
#combine standardized predictor into a new dataframe to predict partial relationship
sageData <- data.frame( sagebrush = sage.std, AprMay.maxT = 0, Feb.minT = 0 )
#predict partial relationship
pred.sage <- predict( fm, type = "gamma", newdata = sageData,
appendData = TRUE )
#view
head( pred.sage ); dim( pred.sage )
#Feb.minT
minT <- seq( min( opendf[,"Feb.minT"]),max( opendf[,"Feb.minT"]),
length.out = n )
#standardize them
minT.std <- scale( minT )
#combine standardized predictor into a new dataframe to predict partial relationship
minData <- data.frame( sagebrush = 0, Feb.minT = minT.std, AprMay.maxT = 0)
#predict partial relationship
pred.minT <- predict( fm, type = "gamma", newdata = minData,
appendData = TRUE )
#AprMay.maxT
maxT <- seq( min( opendf[,"AprMay.maxT"]),max( opendf[,"AprMay.maxT"]),
length.out = n )
#standardize them
maxT.std <- scale( maxT )
#combine standardized predictor into a new dataframe to predict partial relationship
maxData <- data.frame( sagebrush = 0, Feb.minT = 0, AprMay.maxT = maxT.std )
#predict partial relationship
pred.maxT <- predict( fm, type = "gamma", newdata = maxData,
appendData = TRUE )
# create plots for ecological submodels
sagep <- cbind( pred.sage[,c("Predicted", "lower", "upper") ], sage ) %>%
# define x and y values
ggplot(., aes( x = sage, y = Predicted ) ) +
#choose preset look
theme_bw( base_size = 15 ) +
# add labels
labs( x = "Sagebrush (%)", y = "Recruitment" ) +
# add band of confidence intervals
geom_smooth( aes(ymin = lower, ymax = upper ),
stat = "identity",
size = 1.5, alpha = 0.5, color = "grey" ) +
# add mean line on top
geom_line( size = 2 )
#view
sagep
# How do you interpret this relationship?
# Answer:
#
minTp <- cbind( pred.minT[,c("Predicted", "lower", "upper") ], minT ) %>%
# define x and y values
ggplot(., aes( x = minT, y = Predicted ) ) +
#choose preset look
theme_bw( base_size = 15 ) +
# add labels
labs( x = "Minimum temperature (Feb)", y = "Recruitment" ) +
# add band of confidence intervals
geom_smooth( aes(ymin = lower, ymax = upper ),
stat = "identity",
size = 1.5, alpha = 0.5, color = "grey" ) +
# add mean line on top
geom_line( size = 2 )
#view
minTp
maxTp <- cbind( pred.maxT[,c("Predicted", "lower", "upper") ], maxT ) %>%
# define x and y values
ggplot(., aes( x = maxT, y = Predicted ) ) +
#choose preset look
theme_bw( base_size = 15 ) +
# add labels
labs( x = "Maximum temperature (Apr-May)", y = "Recruitment" ) +
# add band of confidence intervals
geom_smooth( aes(ymin = lower, ymax = upper ),
stat = "identity",
size = 1.5, alpha = 0.5, color = "grey" ) +
# add mean line on top
geom_line( size = 2 )
#view
maxTp
# Add plots for detection
# Answer:
#
############################################################################
################## Save your data and workspace ###################
# Save workspace:
save.image( "RobCountResults.RData" )
#save the plot objects you need for your presentation
#start by calling the file where you will save it
# tiff( 'Data/SageXGam.tiff',
# height = 10, width = 12, units = 'cm',
# compression = "lzw", res = 400 )
# #call the plot
# sagep
# #end connection
# dev.off()
########## End of saving section ##################################
############# END OF SCRIPT #####################################