Skip to content

Commit

Permalink
Merge pull request #264 from AtlasOfLivingAustralia/dev
Browse files Browse the repository at this point in the history
release version 2.1.1
  • Loading branch information
mjwestgate authored Feb 7, 2025
2 parents 9c6b0fa + a09285f commit 40130d4
Show file tree
Hide file tree
Showing 72 changed files with 1,368 additions and 635 deletions.
33 changes: 33 additions & 0 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
pull_request:

name: R-CMD-check.yaml

permissions: read-all

jobs:
R-CMD-check:
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes
steps:
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::rcmdcheck
needs: check

- uses: r-lib/actions/check-r-package@v2
with:
upload-snapshots: true
build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
64 changes: 0 additions & 64 deletions .travis.yml

This file was deleted.

5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: galah
Type: Package
Title: Biodiversity Data from the GBIF Node Network
Version: 2.1.0
Version: 2.1.1
Authors@R:
c(person(given = "Martin",
family = "Westgate",
Expand Down Expand Up @@ -44,7 +44,8 @@ Imports:
tibble,
tidyr,
tidyselect,
utils
utils,
xml2
Suggests:
covr,
gt,
Expand Down
11 changes: 1 addition & 10 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ export(galah_select)
export(geolocate)
export(group_by)
export(identify)
export(read_zip)
export(request_data)
export(request_files)
export(request_metadata)
Expand Down Expand Up @@ -163,9 +164,6 @@ importFrom(purrr,pluck_depth)
importFrom(purrr,rate_backoff)
importFrom(purrr,rate_delay)
importFrom(purrr,rate_sleep)
importFrom(readr,cols)
importFrom(readr,read_csv)
importFrom(readr,read_tsv)
importFrom(rlang,.data)
importFrom(rlang,abort)
importFrom(rlang,as_label)
Expand Down Expand Up @@ -221,10 +219,3 @@ importFrom(stringr,str_to_title)
importFrom(stringr,str_trim)
importFrom(tibble,as_tibble)
importFrom(tibble,tibble)
importFrom(tidyr,drop_na)
importFrom(tidyr,unnest_longer)
importFrom(tidyselect,eval_select)
importFrom(utils,URLdecode)
importFrom(utils,URLencode)
importFrom(utils,adist)
importFrom(utils,unzip)
16 changes: 16 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
# galah 2.1.1

### Minor improvements
* New vignette to demonstrate methods that support reproducibility
* New function `read_zip()` to reimport downloaded files
* Support `group_by()` in occurrence queries to allow facet downloads by any variable (#195, #258)
* Improvements to `atlas_citation()` for improved clarity

### Bug fixes
* Improved documentation to use `galah_filter()` to specify a `taxon_concept_id` rather than `galah_identify()` (#245)
* Adding a `field` without data breaks occurrence downloads (#248)
* Queries that filter using both `!` and `%in%` parse correctly (#251)
* `show_all(lists)` no longer truncates results to first 500 rows (#252)
* `atlas_counts()` no longer errors when `group_by()` is set but record count = 0 (#254)
* Empty tibbles returned by `atlas_species()` no longer return different column names to queries that return a result (#255)

# galah 2.1.0

### Image downloads
Expand Down
130 changes: 96 additions & 34 deletions R/atlas_citation.R
Original file line number Diff line number Diff line change
@@ -1,47 +1,109 @@
#' Generate a citation for occurrence data
#'
#' If a `data.frame` was generated using [atlas_occurrences()],
#' and the `mint_doi` argument was set to `TRUE`, the DOI associated
#' with that dataset is appended to the resulting `data.frame` as an
#' attribute. This function simply formats that DOI as a citation that can be
#' included in a scientific publication. Please also consider citing this
#' package, using the information in `citation("galah")`.
#' @param data data.frame: occurrence data generated by
#' [atlas_occurrences()]
#' @return A string containing the citation for that dataset.
#' If a `tibble` containing occurrences was generated using galah (either via
#' \code{\link[=collect.data_request]{collect()}} or [atlas_occurrences()]), it
#' will usually contain associated metadata stored in `attributes()` that can be
#' used to build a citation for that dataset. This function simply extracts that
#' information, formats it, then both invisibly returns the formatted citation
#' and prints it to the console.
#' @param data A `tibble` generated by [atlas_occurrences()] or similar
#' @return Invisibly returns a string containing the citation for that dataset.
#' Primarily called for the side-effect of printing this string to the console.
#' @examples \dontrun{
#' atlas_citation(doi)
#' x <- galah_call() |>
#' identify("Heleioporus") |>
#' filter(year == 2022) |>
#' collect()
#' atlas_citation(x)
#' }
#' @export

atlas_citation <- function(data) {
doi <- attributes(data)$doi
if (is.null(doi)) {
bullets <- c(
"This data does not have a DOI attached.",
i = "Did you set `atlas_occurrences(mint_doi = TRUE)`?",
i = "`atlas_citation` extracts this citation info when present."
)
warn(bullets, call = caller_env())
glue("Please consider citing R & galah. To do so, call:
citation()
citation('galah')")
}else{
current_date <- format(Sys.Date(), "%e %B %Y") |>
# get basic information from file
modified_date <- attributes(data)$modified_date
if(is.null(modified_date)){
modified_date <- Sys.Date() |>
format("%e %B %Y") |>
trimws()
}
doi <- attributes(data)$doi
citation <- attributes(data)$citation
# search_url <- attributes(data)$search_url

# get existing citation info
r_citation <- citation() |>
print(style = 'text') |>
utils::capture.output() |>
glue_collapse(sep = " ")
galah_citation <- c("Westgate M, Kellie D, Stevenson M & Newman P (2025):",
"_galah: Biodiversity Data from the GBIF Node Network_.",
"R package version 2.1.1.",
"doi: 10.32614/CRAN.package.galah") |>
glue_collapse(sep = " ")

# ask users to cite galah and R
suffix_text <- glue("
Please consider citing R & galah, in addition to your dataset:
{r_citation}
{galah_citation}")


# set case when DOI is missing
if(!is.null(doi)) {
# ALA
if(grepl("10.26197/ala.", doi)){
org_text <- "Atlas of Living Australia"
description <- "Occurrence download"
result <- glue("
The citation for this dataset is:
Atlas of Living Australia ({modified_date}) Occurrence download {doi}
{suffix_text}
")
cli::cli_text(result)
invisible(result)
# GBIF
}else if(grepl("10.15468/dl.", doi)){
result <- glue("
The citation for this dataset is:
GBIF.org ({modified_date}) GBIF Occurrence Download {doi}
{suffix_text}
")
cli::cli_text(result)
invisible(result)
# Unknown
}else{
org_text <- "GBIF.org"
description <- "GBIF Occurrence Download"
bullets <- c(
"The supplied DOI was not recognized.",
i = "Please consider checking the atlas in question for their citation guidelines"
)
cli::cli_warn(bullets)
invisible(bullets)
}
}else{
if(!is.null(citation)){
result <- glue("
The citation for this dataset is:
{citation}
{suffix_text}
")
cli::cli_text(result)
invisible(result)
}else{
bullets <- c(
"This dataset does not have any citation information attached.",
i = "Please consider checking the atlas in question for their citation guidelines"
)
cli::cli_warn(bullets)
invisible(bullets)
}
glue("
{org_text} ({current_date}) {description} {doi}
Please consider citing R & galah, in addition to your dataset. To do so, call:
citation()
citation('galah')
")
}
}
3 changes: 1 addition & 2 deletions R/atlas_media.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#' @importFrom purrr pluck
#' @importFrom rlang abort
#' @importFrom stringr str_remove
#' @importFrom tidyr unnest_longer
#' @export
atlas_media <- function(request = NULL,
identify = NULL,
Expand Down Expand Up @@ -94,7 +93,7 @@ atlas_media <- function(request = NULL,
# get occurrences
occ <- query_collapse |>
collect(wait = TRUE) |>
unnest_longer(col = any_of(present_fields))
tidyr::unnest_longer(col = any_of(present_fields))

if(!any(colnames(occ) == "all_image_url")){
occ$media_id <- build_media_id(occ)
Expand Down
18 changes: 9 additions & 9 deletions R/atlas_occurrences.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
#' Retrieve a database query
#'
#' @description
#' An alternative to using `collect()` at the end of a query pipe is to
#' call a function with the `atlas_` prefix. These solutions are basically
#' synonymous, but `atlas_` functions differ in two ways:
#' An alternative to using \code{\link[=collect.data_request]{collect()}} at the
#' end of a query pipe is to call a function with the `atlas_` prefix. These
#' solutions are basically synonymous, but `atlas_` functions differ in two ways:
#'
#' * They have the ability to accept `filter`, `select` etc as arguments,
#' rather than within a pipe, but **only** when using the `galah_` forms of
#' those functions (e.g. `galah_filter()`).
#' rather than within a pipe; but **only** when using the `galah_` forms of
#' those functions (e.g. [galah_filter()]).
#' * `atlas_` functions do not require you to specify the `method` or `type`
#' arguments to `galah_call()`, as they are more specific in what data are
#' arguments to [galah_call()], as they are more specific in what data are
#' being requested.
#'
#' @name atlas_
Expand Down Expand Up @@ -76,7 +76,7 @@
#' # Get a list of species within genus "Heleioporus"
#' # (every row is a species with associated taxonomic data)
#' galah_call() |>
#' galah_identify("Heleioporus") |>
#' identify("Heleioporus") |>
#' atlas_species()
#'
#' # Download Regent Honeyeater records with multimedia attached
Expand All @@ -88,8 +88,8 @@
#'
#' # Get a taxonomic tree of *Chordata* down to the class level
#' galah_call() |>
#' galah_identify("chordata") |>
#' galah_filter(rank == class) |>
#' identify("chordata") |>
#' filter(rank == class) |>
#' atlas_taxonomy()
#' }
#' @export
Expand Down
13 changes: 11 additions & 2 deletions R/build.R
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,20 @@ build_single_fq <- function(query){
# ensure all arguments from galah_filter are enclosed in brackets
# EXCEPT for assertions
fq <- query$fq
missing_brackets <- !grepl("^\\(", fq) & !grepl("assertions", fq)
missing_brackets <-
!grepl("^\\(", fq) & # already has brackets
!grepl("assertions", fq) & # assertions don't need additional brackets
!grepl("^-\\(", fq) # negative query already has brackets
if(any(missing_brackets)){
fq[missing_brackets] <- paste0("(", fq[missing_brackets], ")")
}
fq_single <- paste(fq, collapse = "AND")
# add brackets to non-negative AND statements
# (adding additional brackets to negative statements breaks them)
if(any(!grepl("^-\\(", fq))) {
fq_single <- glue::glue_collapse(glue("{fq}"), "AND")
} else {
fq_single <- glue::glue_collapse(glue("({fq})"), "AND")
}
c(fq = fq_single, query[names(query) != "fq"])
}else{
query
Expand Down
Loading

0 comments on commit 40130d4

Please sign in to comment.