Merge pull request #250 from AtlasOfLivingAustralia/dev

Dev
AtlasOfLivingAustralia · Nov 19, 2024 · 42fdded · 42fdded
2 parents 3597579 + 8cb2b9e
commit 42fdded
Show file tree

Hide file tree

Showing 122 changed files with 4,010 additions and 3,749 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,19 +1,19 @@
 Package: galah
 Type: Package
 Title: Biodiversity Data from the GBIF Node Network
-Version: 2.0.2
+Version: 2.1.0
 Authors@R: 
     c(person(given = "Martin",
              family = "Westgate",
              email = "[email protected]",
              role = c("aut", "cre")),
-      person(given = "Matilda",
-             family = "Stevenson",
-             role = "aut"),
       person(given = "Dax",
              family = "Kellie",
              email = "[email protected]",
              role = "aut"),
+      person(given = "Matilda",
+             family = "Stevenson",
+             role = "aut"),
       person(given = "Peggy",
              family = "Newman",
              email = "[email protected]",
@@ -26,7 +26,7 @@ Description: The Global Biodiversity Information Facility
     Australia ('ALA', <https://www.ala.org.au>). 'galah' enables the R community 
     to directly access data and resources hosted by 'GBIF' and its partner nodes.
 Depends:
-    R (>= 4.1.0)
+    R (>= 4.3.0)
 Imports:
     cli,
     crayon,
@@ -61,6 +61,6 @@ BugReports: https://github.com/AtlasOfLivingAustralia/galah-R/issues
 Maintainer: Martin Westgate <[email protected]>
 LazyLoad: yes
 VignetteBuilder: knitr
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.2
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
diff --git a/NAMESPACE b/NAMESPACE
@@ -51,14 +51,14 @@ export(galah_apply_profile)
 export(galah_bbox)
 export(galah_call)
 export(galah_config)
-export(galah_down_to)
 export(galah_filter)
 export(galah_geolocate)
 export(galah_group_by)
 export(galah_identify)
 export(galah_polygon)
 export(galah_radius)
 export(galah_select)
+export(geolocate)
 export(group_by)
 export(identify)
 export(request_data)
@@ -213,7 +213,7 @@ importFrom(sf,st_is_simple)
 importFrom(sf,st_is_valid)
 importFrom(stringr,str_detect)
 importFrom(stringr,str_extract)
-importFrom(stringr,str_extract_all)
+importFrom(stringr,str_remove)
 importFrom(stringr,str_replace)
 importFrom(stringr,str_replace_all)
 importFrom(stringr,str_split)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,20 @@
+# galah 2.1.0
+
+### Image downloads
+galah now supports media downloads for all atlases. The only exceptions are GBIF and France, for whom these APIs are not supported (yet)
+
+### Minor improvements
+* Reorganise help files for improved clarity, largely following `dplyr` syntax
+* Support data profiles for Sweden and Spain
+* Species downloads (via `atlas_species()`) now work for Sweden, France, and Spain (#234)
+* `select()` now works for species downloads (i.e. via `atlas_species()`; #185, #227)
+* Temporarily remove Estonian atlas (https://elurikkus.ee) during system upgrades
+
+### Bug fixes
+* Fix bugs in `filter`, `group_by` etc. not recognising fields (#237)
+* Swap to new APIs for Australia (#163) and Austria (#231, #242)
+* Re-add taxonomic help under `?taxonomic_searches` (#241)
+
 # galah 2.0.2
 
 ### Minor improvements
@@ -111,7 +128,7 @@ The current implementation is experimental and back-end changes are expected in
 
 ### Minor improvements
 * `galah_config()` gains a `print` function, and now uses fuzzy matching for the `atlas` field to match to region, organisation or acronym (as defined by `show_all(atlases)`). An example use case is to match to organisations via acronyms, e.g. `galah_config(atlas = "ALA")`.
-* Improved support for data from Spain via [gbif.es](https://www.gbif.es) (name-matching, lists, spatial)
+* Improved support for data from Spain via [gbif.es](https://gbif.es) (name-matching, lists, spatial)
 * Swapped provider for data from France; formerly [gbif.fr](http://www.gbif.fr), now [OpenObs](https://openobs.mnhn.fr), as per advice from maintainers
 * Reading data from disk now uses `readr::read_csv` in place of `utils::read.csv` for improved speed
 * `show_all` (and associated sub-functions) gain a `limit` argument, set to NULL (i.e. no limit) by default

diff --git a/R/arrange.R b/R/arrange.R
@@ -1,24 +1,20 @@
-#' Arrange rows of a query
+#' Order rows using column values
 #' 
 #' @description
 #' `r lifecycle::badge("experimental")`  
 #' 
 #' `arrange.data_request()` arranges rows of a query on the server side, meaning 
-#' that prior to sending a query, the query is constructed in such a way that 
-#' information will be arranged when the query is processed. Any data that is 
-#' then returned by the query will have rows already pre-arranged.
-#' 
-#' The benefit of using `arrange()` within a `galah_call()` is that it is faster 
-#' to process arranging rows on the server side than arranging rows locally on 
-#' downloaded data, 
-#' especially if the dataset is large or complex.
-#' 
-#' `arrange()` can be used within a `galah_call()` pipe, but only  
-#' for queries of  `type = "occurrences-count"`. The `galah_call()` pipe must 
-#' include `count()` and finish with `collect()` (see examples). 
-#' 
+#' that the query is constructed in such a way that information will be arranged 
+#' when the query is processed. This only has an effect when used in combination
+#' with \code{\link[=count.data_request]{count()}} and 
+#' \code{\link[=group_by.data_request]{group_by()}}. The benefit of using 
+#' `arrange()` within a `galah_call()` pipe is that it is sometimes beneficial 
+#' to choose a non-default order for data to be delivered in, particularly if
+#' \code{\link[=slice_head.data_request]{slice_head()}} is also called.
 #' @param .data An object of class `data_request`
-#' @param ... Either `count` or `index`
+#' @param ... A variable to arrange the resulting tibble by. Should be one of 
+#' the variables also listed in \code{\link[=group_by.data_request]{group_by()}}.
+#' @returns An amended `data_request` with a completed `arrange` slot.
 #' @examples \dontrun{
 #' 
 #' # Arrange grouped counts by ascending year
@@ -49,7 +45,7 @@
 #'   collect()
 #' }
 #' @importFrom dplyr bind_cols
-#' @rdname arrange
+#' @name arrange.data_request
 #' @export
 arrange.data_request <- function(.data, ...){
   dots <- enquos(..., .ignore_empty = "all")
@@ -66,6 +62,6 @@ arrange.data_request <- function(.data, ...){
   return(.data)
 }
 
-#' @rdname arrange
+#' @rdname arrange.data_request
 #' @export
 arrange.metadata_request <- arrange.data_request
diff --git a/R/atlas_counts.R b/R/atlas_counts.R
@@ -1,54 +1,14 @@
-#' Return a count of records
-#'
-#' Prior to downloading data it is often valuable to have some estimate of how
-#' many records are available, both for deciding if the query is feasible,
-#' and for estimating how long it will take to download. Alternatively, for some kinds
-#' of reporting, the count of observations may be all that is required, for example
-#' for understanding how observations are growing or shrinking in particular
-#' locations, or for particular taxa. To this end, `atlas_counts()` takes
-#' arguments in the same format as [atlas_occurrences()], and
-#' provides either a total count of records matching the criteria, or a
-#' `data.frame` of counts matching the criteria supplied to the `group_by`
-#' argument.
-#'
-#' @param request optional `data_request` object: generated by a call to
-#' [galah_call()].
-#' @param identify `data.frame`: generated by a call to
-#' [galah_identify()].
-#' @param filter `data.frame`: generated by a call to
-#' [galah_filter()]
-#' @param geolocate `string`: generated by a call to
-#' [galah_geolocate()]
-#' @param data_profile `string`: generated by a call to
-#' [galah_apply_profile()]
-#' @param group_by `data.frame`: An object of class `galah_group_by`,
-#' as returned by [galah_group_by()]. Alternatively a vector of field
-#' names (see `search_all(fields)` and `show_all(fields)`.
+#' @rdname atlas_
+#' @order 2
+#' @param group_by `tibble`: generated by a call to [galah_group_by()].
 #' @param limit `numeric`: maximum number of categories to return, defaulting to 100.
 #' If limit is NULL, all results are returned. For some categories this will
 #' take a while.
-#' @param type `string`: one of `c("occurrences-count", "species-count")`. 
-#' Defaults to `"occurrences-count"`, which returns the number of records
+#' @param type `string`: one of `"occurrences"` or `"species"`. 
+#' Defaults to `"occurrences"`, which returns the number of records
 #' that match the selected criteria; alternatively returns the number of 
 #' species. Formerly accepted arguments (`"records"` or `"species"`) are
 #' deprecated but remain functional.
-#' @return
-#' An object of class `tbl_df` and `data.frame` (aka a tibble) returning: 
-#'  * A single number, if `group_by` is not specified or,
-#'  * A summary of counts grouped by field(s), if `group_by` is specified
-#'
-#' @examples \dontrun{
-#' # classic syntax:
-#' galah_call() |>
-#'   galah_filter(year == 2015) |>
-#'   atlas_counts()
-#' 
-#' # synonymous with:
-#' request_data() |>
-#'   filter(year == 2015) |>
-#'   count() |>
-#'   collect()
-#' }
 #' @export
 atlas_counts <- function(request = NULL, 
                          identify = NULL, 
@@ -70,25 +30,4 @@ atlas_counts <- function(request = NULL,
     count() |>
     slice_head(n = limit) |>
     collect()
-}
-
-#' @rdname atlas_counts
-#' @param x An object of class `data_request`, created using [galah_call()]
-#' @param wt currently ignored
-#' @param ... currently ignored
-#' @param sort currently ignored
-#' @param name currently ignored
-#' @importFrom dplyr count
-#' @export
-count.data_request <- function(x, 
-                               ..., 
-                               wt, 
-                               sort, 
-                               name){
-  x$type <- switch(x$type, 
-         "occurrences" = "occurrences-count",
-         "species" = "species-count",
-         "media" = abort("type = 'media' is not supported by `count()`"),
-         abort("`count()` only supports `type = 'occurrences' or` `'species'`"))
-  x
-}
+}
diff --git a/R/atlas_distributions.R b/R/atlas_distributions.R
@@ -10,7 +10,7 @@
 #' @return An object of classes `sf`, `tbl`, `tbl_df` and `data.frame` 
 #' (aka a Simple feature collection) of distribution maps, with one column per
 #' map, and spatial data stored in the `geometry` column.
-#' @examples
+#' @examples \dontrun{
 #' 
 #' x <- show_all(distributions) |>
 #'   slice_head(n = 4)
@@ -21,6 +21,7 @@
 #' ggplot() +
 #'   geom_sf(data = ozmap_country) +
 #'   geom_sf(data = st_as_sf(x))
+#' }
 #' @noRd
 #' @keywords Internal
 atlas_distributions <- function(request = NULL,