-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathREADME.Rmd
390 lines (296 loc) · 12.2 KB
/
README.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
---
output: github_document
---
<!-- README.md is generated from README.Rmd. Please edit that file -->
```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.path = "man/figures/README-",
out.width = "100%"
)
#############
load(file = "C:/CodigoR/Mammal_Col/MammalCol/data/taxon.rda") # , taxon
load(file = "C:/CodigoR/Mammal_Col/MammalCol/data/colmap.rda") # , colmap
load(file = "C:/CodigoR/Mammal_Col/MammalCol/data/distribution.rda") # , disribution
############
search_mammalcol <- function(splist, max_distance = 0.2) {
# Defensive function here, check for user input errors
if (is.factor(splist)) {
splist <- as.character(splist)
}
# Fix species name
splist_st <- standardize_names(splist)
dupes_splist_st <- find_duplicates(splist_st)
if (length(dupes_splist_st) != 0) {
message(
"The following names are repeated in the 'splist': ",
paste(dupes_splist_st, collapse = ", ")
)
}
splist_std <- unique(splist_st)
# create an output data container
output_matrix <- matrix(nrow = length(splist_std), ncol = 21) # two more
colnames(output_matrix) <- c(
"name_submitted",
names(taxon),
"Distance"
)
# loop code to find the matching string
for (i in seq_along(splist_std)) {
# Standardise max distance value
if (max_distance < 1 & max_distance > 0) {
max_distance_fixed <- ceiling(nchar(splist_std[i]) * max_distance)
} else {
max_distance_fixed <- max_distance
}
# fuzzy and exact match
matches <- agrep(splist_std[i],
taxon$scientificName, # base data column
max.distance = max_distance_fixed,
value = TRUE
)
# check non matching result
if (length(matches) == 0) {
row_data <- rep("nill", 19) # number of columns
} else if (length(matches) != 0) { # match result
dis_value <- as.numeric(utils::adist(splist_std[i], matches))
matches1 <- matches[dis_value <= max_distance_fixed]
dis_val_1 <- dis_value[dis_value <= max_distance_fixed]
if (length(matches1) == 0) {
row_data <- rep("nill", 19) # number of columns
} else if (length(matches1) != 0) {
row_data <- as.matrix(taxon[taxon$scientificName %in% matches1, ])
}
}
# distance value
if (is.null(nrow(row_data))) {
dis_value_1 <- "nill"
} else {
dis_value_1 <- utils::adist(splist_std[i], row_data[, 2])
}
output_matrix[i, ] <-
c(splist_std[i], row_data, dis_value_1)
}
# Output
output <- as.data.frame(output_matrix)
# rownames(output) <- NULL
output <- output[, -2] # delete the id column
return(output[output$scientificName != "nill", ])
}
standardize_names <- function(splist) {
fixed1 <- simple_cap(trimws(splist)) # all up
fixed2 <- gsub("cf\\.", "", fixed1)
fixed3 <- gsub("aff\\.", "", fixed2)
fixed4 <- trimws(fixed3) # remove trailing and leading space
fixed5 <- gsub("_", " ", fixed4) # change names separated by _ to space
# Hybrids
fixed6 <- gsub("(^x )|( x$)|( x )", " ", fixed5)
hybrids <- fixed5 == fixed6
if (!all(hybrids)) {
sp_hybrids <- splist[!hybrids]
warning(
paste(
"The 'x' sign indicating hybrids have been removed in the",
"following names before search:",
paste(paste0("'", sp_hybrids, "'"), collapse = ", ")
),
immediate. = TRUE, call. = FALSE
)
}
# Merge multiple spaces
fixed7 <- gsub("(?<=[\\s])\\s*|^\\s+|\\s+$", "", fixed6, perl = TRUE)
return(fixed7)
}
simple_cap <- function(x) {
# Split each string into words, remove unnecessary white spaces, and convert to lowercase
words <- sapply(strsplit(x, "\\s+"), function(words) paste(tolower(words), collapse = " "))
# Capitalize the first letter of each word
capitalized <- sapply(strsplit(words, ""), function(word) {
if (length(word) > 0) {
word[1] <- toupper(word[1])
}
paste(word, collapse = "")
})
return(capitalized)
}
find_duplicates <- function(vector) {
# Count the frequency of each word
word_counts <- table(vector)
# Find words with a frequency greater than 1
duplicated_words <- names(word_counts[word_counts > 1])
return(duplicated_words)
}
################
mammalmap <- function(species, legend = TRUE) {
if (!requireNamespace("ggplot2", quietly = TRUE)) {
install.packages("ggplot2")
}
if (!requireNamespace("sf", quietly = TRUE)) {
install.packages("sf")
}
if (missing(species)) {
stop("Argument species was not included")
}
if (!is.character(species)) {
stop(paste0("Argument species must be a character, not ", class(species)))
}
if (!is.logical(legend)) {
stop(paste0("Argument legend must be logical, not ", class(legend)))
}
# require("ggplot2")
# require("sf")
# load("data/colmap.rda")
# load("data/taxon.rda")
# data(mammalcol::taxon)
# data(mammalcol::colmap)
distribution_list <-
strsplit(taxon$distribution, "\\|") # trimws () removes spaces
deptos <- as.data.frame(cbind(Depto = unique(colmap$NAME_1), fill = "white"))
sp_id <- which(taxon$scientificName == species)
# if species is not in the table and is integer(0)
if (length(sp_id) == 0) {
stop(paste0("The species should be in the list. Make sure you use the function search_mammalcol first. ", species, " is not a species present in Colombia"))
}
unos <- trimws(distribution_list[[sp_id]]) # get species number
# nested loop to get deptos
for (i in 1:length(deptos[, 1])) {
for (j in 1:length(unos)) {
if (deptos$Depto[i] == unos[j]) {
deptos$fill[i] <- "blue"
}
}
}
# make the map
# if legend true
if (legend == TRUE) {
mapa <- ggplot2::ggplot(colmap) +
ggplot2::geom_sf(ggplot2::aes(fill = NAME_1)) +
ggplot2::scale_fill_manual(values = deptos$fill) +
# ggtitle(taxon$scientificName[25]) + #species name number
ggplot2::labs(subtitle = taxon$scientificName[sp_id]) +
ggplot2::theme(
legend.position = "right", # location legend
legend.title = ggplot2::element_blank(), # element_text(size=7),#,
legend.text = ggplot2::element_text(size = 8, ), # text depto size
plot.subtitle = ggplot2::element_text(face = "italic") # italica
)
} else { # if legend false
mapa <- ggplot2::ggplot(colmap) +
ggplot2::geom_sf(ggplot2::aes(fill = NAME_1), show.legend = FALSE) + # removes legend
ggplot2::scale_fill_manual(values = deptos$fill) +
# ggtitle(taxon$scientificName[25]) + #species name number
ggplot2::labs(subtitle = taxon$scientificName[sp_id]) +
ggplot2::theme(plot.subtitle = ggplot2::element_text(face = "italic")) # italica
}
return(mapa)
} # end mammalmap
##################
sp_by_depto <- function(states, type = c("any", "only", "all"), taxa = NULL) {
if (length(states) == 0) stop("Please provide at least one Colombian Departamento")
type <- match.arg(type)
states <- sort(states)
# states <- paste("BR-", states, sep = "")
if (length(states) == 0) stop("Please provide at least one Colombian Departamento")
# res <- lapply(occurrences, match, states)
if (type == "any") {
# res <- lapply(res, function(x) any(!is.na(x)))
res <- subset(distribution, grepl(paste(states, collapse = "|"), locality))
}
if (type == "only") {
res <- subset(distribution, grepl(paste("^", paste(states, collapse = "\\|"), "$", sep = ""), locality))
}
if (type == "all") {
res <- subset(distribution, grepl(paste(states, collapse = ".*"), locality))
}
# res <- distribution[unlist(res), ]
if (nrow(res) == 0) {
return(NA)
}
if (is.null(taxa)) {
merge(taxon[, c("scientificName", "family", "order", "id")], res[, c("id", "locality")], by = "id")[, -1]
# removes id
} else {
merge(taxon[taxon$order %in% taxa, c("scientificName", "family", "order", "id")], res[, c("id", "locality")], by = "id")[, -1]
# removes id
}
}
```
# mammalcol <img src="man/figures/logo.png" align="right" height="139" alt="" />
<!-- badges: start -->
[data:image/s3,"s3://crabby-images/76712/76712b80d507a66efab04753f6f6323b6690e928" alt="R-CMD-check"](https://github.com/dlizcano/mammalcol/actions/workflows/R-CMD-check.yaml)
[data:image/s3,"s3://crabby-images/82fd1/82fd1ffe05f49e8f1a4abc10c3d299cda02c66f9" alt="Codecov test coverage"](https://app.codecov.io/gh/dlizcano/mammalcol?branch=main)
[data:image/s3,"s3://crabby-images/4f918/4f9183e60c0e17bca23aac3ffc1c706455b3bd9e" alt="CRAN status"](https://CRAN.R-project.org/package=mammalcol)
[data:image/s3,"s3://crabby-images/d47ec/d47ec1157ea15a7fb737e0f94ec675a5c193c843" alt="lifecycle"](https://lifecycle.r-lib.org/articles/stages.html#stable)
[data:image/s3,"s3://crabby-images/23bfb/23bfb389bbfc0943828bb376acbc4f1456462165" alt="size"](https://github.com/dlizcano/mammalcol)
<!-- badges: end -->
The goal of mammalcol is to allow easy access to the List of Mammal Species of Colombia.
Researchers can explore Colombia's amazing variety of mammals using the mammalcol R package. This package includes information on 548 mammal species, making Colombia one of the world leaders in mammal diversity. The data comes from the latest "Checklist of the mammals (Mammalia) of Colombia" by [Ramírez-Chaves et al (2021)](https://doi.org/10.47603/mano.v7n2.253). But It's important to remember that how scientists classify mammals keeps changing constantly, so the information in this package has been updated with the checklist [Mamíferos de Colombia](https://doi.org/10.15472/kl1whs), which is published and updated regularly by the Colombian Mammal Society.
## Installation
You can install the development version of mammalcol from [GitHub](https://github.com/dlizcano/mammalcol) with:
``` r
# install.packages("devtools")
devtools::install_github("dlizcano/mammalcol")
```
## load library
```{r}
library(mammalcol)
```
## Example
### Search database
This is a basic example which shows you how to search mammal names in Colombia using a vector of mammal names.
```{r example1, eval=TRUE, echo=TRUE}
# define a vector with species to search for
splist <- c(
"Tapirus bairdii", "Tapirus pinchaque", "Tapirus terrestris",
"Tapirus terrestris", "Pudu mephistophiles", "Tapirus bairdii"
)
# search in database
search_mammalcol(splist)
```
#### Search database with typos
`mammalcol` has the ability to identify and correct minor typos and lower case in the genus. Correcting potential typos or variations in species names through fuzzy matching is a crucial aspect of data management. This technique ensures precise retrieval by adeptly identifying and accommodating minor differences in input names, thus enhancing the reliability of analyses conducted on diverse, inconsistent, and ensemble datasets.
```{r example2, eval=TRUE, echo=TRUE}
# vector with species names and intentional typos
splist <- c("Tapiru terrestre", "pudu mephistophiles", "tapirus bairdii")
# search in database
search_mammalcol(splist)
```
### Produce a map
The function mammalmap produces basic a map of distribution at the "Departamento" level for a single species.
```{r example3, eval=TRUE, echo=TRUE}
# write a species name in the function to map it
mammalmap("Tapirus pinchaque")
```
#### Remove the legeng
Using the parameter legend=FALSE
```{r example3b, eval=TRUE, echo=TRUE}
# write a species name in the function to map it
mammalmap("Tapirus pinchaque", legend=FALSE)
```
### See the full taxon table
```{r example4}
head(taxon)
```
### Search mammals present by departamento
Use the departamento name or a vector of departamentos to know the mammals species present. The argument type = "any" retrieve mammals present in any of those departamentos.
```{r}
occ.any <- sp_by_depto(c("Arauca", "Norte de Santander"), type = "any")
head(occ.any)
```
The argument type = "only" retrieves species present only in that departamento and in no other departamento.
```{r}
occ.only <- sp_by_depto(c("Norte de Santander"), type = "only")
head(occ.only)
```
The argument all retrieves species present in both departamentos. The argument taxa limit the search to one order. occ.bats has the bats present in Arauca and Norte de Santander.
```{r}
occ.all <- sp_by_depto(c("Arauca", "Norte de Santander"), type = "all")
occ.bats <- sp_by_depto(c("Arauca", "Norte de Santander"), type = "all", taxa = "Chiroptera")
head(occ.bats)
```
### Sugested citation
```{r eval=TRUE}
citation("mammalcol")
```
Lizcano, DJ. (2024). mammalcol: Access to the List of Mammal Species of Colombia. R package version 0.1.0