Skip to content

Commit

Permalink
Add hash_xxhash etc. functions
Browse files Browse the repository at this point in the history
  • Loading branch information
gaborcsardi committed Oct 19, 2024
1 parent 20f41c8 commit bddf9e1
Show file tree
Hide file tree
Showing 17 changed files with 7,281 additions and 8 deletions.
8 changes: 8 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -173,19 +173,27 @@ export(hash_emoji)
export(hash_file_md5)
export(hash_file_sha1)
export(hash_file_sha256)
export(hash_file_xxhash)
export(hash_file_xxhash64)
export(hash_md5)
export(hash_obj_animal)
export(hash_obj_emoji)
export(hash_obj_md5)
export(hash_obj_sha1)
export(hash_obj_sha256)
export(hash_obj_xxhash)
export(hash_obj_xxhash64)
export(hash_raw_animal)
export(hash_raw_emoji)
export(hash_raw_md5)
export(hash_raw_sha1)
export(hash_raw_sha256)
export(hash_raw_xxhash)
export(hash_raw_xxhash64)
export(hash_sha1)
export(hash_sha256)
export(hash_xxhash)
export(hash_xxhash64)
export(is_ansi_tty)
export(is_dynamic_tty)
export(is_utf8_output)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
* `ansi_collapse()` is now correct for length-1 vectors with style "head"
if width is specified (@rundel, #590).

* New `hash_xxhash()` etc. functions to calculate the xxHash of strings,
raw vectors, objects, files.

# cli 3.6.3

* cli now builds on ARM Windows.
Expand Down
111 changes: 111 additions & 0 deletions R/hash.R
Original file line number Diff line number Diff line change
Expand Up @@ -498,3 +498,114 @@ hash_obj_animal <- function(x, n_adj = 2, serialize_version = 2) {
sr <- serialize(x, NULL, version = serialize_version)[-(1:14)]
hash_raw_animal(sr, n_adj = n_adj)
}

#' xxHash
#'
#' Extremely fast hash algorithm.
#'
#' @param x Character vector. If not a character vector, then
#' [as.character()] is used to try to coerce it into one. `NA` entries
#' will have an `NA` hash.
#' @return `hash_xxhash()` returns a character vector of hexadecimal
#' xxHash hashes.
#'
#' @family hash functions
#'
#' @export
#' @examples
#' hash_xxhash(c("foo", NA, "bar", ""))

hash_xxhash <- function(x) {
if (!is.character(x)) x <- as.character(x)

Check warning on line 519 in R/hash.R

View check run for this annotation

Codecov / codecov/patch

R/hash.R#L519

Added line #L519 was not covered by tests
na <- is.na(x)
x[na] <- NA_character_
x[!na] <- .Call(clic_xxhash, x[!na])
x
}

#' @export
#' @rdname hash_xxhash
#' @details `hash_raw_xxhash()` calculates the xxHash hash of the bytes
#' of a raw vector.
#' @return `hash_raw_xxhash()` returns a character scalar.

hash_raw_xxhash <- function(x) {
stopifnot(is.raw(x))
.Call(clic_xxhash_raw, x)
}

#' @export
#' @rdname hash_xxhash
#' @param serialize_version Workspace format version to use, see
#' [base::serialize()].
#' @details `hash_obj_xxhash()` calculates the xxHash hash of an R
#' object. The object is serialized into a binary vector first.
#' @return `hash_obj_xxhash()` returns a character scalar.

hash_obj_xxhash <- function(x, serialize_version = 2) {
sr <- serialize(x, NULL, version = serialize_version)[-(1:14)]
hash_raw_xxhash(sr)
}

#' @export
#' @rdname hash_xxhash
#' @param paths Character vector of file names.
#' @details `hash_file_xxhash()` calculates the xxHash hash of one or
#' more files.
#'
#' @return `hash_file_xxhash()` returns a character vector of xxHash
#' hashes.

hash_file_xxhash <- function(paths) {
if (!is.character(paths)) paths <- as.character(paths)

Check warning on line 560 in R/hash.R

View check run for this annotation

Codecov / codecov/patch

R/hash.R#L560

Added line #L560 was not covered by tests
paths <- normalizePath(paths, mustWork = FALSE)
if (is_windows()) {
paths <- enc2utf8(paths)

Check warning on line 563 in R/hash.R

View check run for this annotation

Codecov / codecov/patch

R/hash.R#L563

Added line #L563 was not covered by tests
} else {
paths <- enc2native(paths)
}
.Call(clic_xxhash_file, paths)
}

#' @export
#' @rdname hash_xxhash
#' @details The `64` functions caculate the 64 bit variant
#' of xxHash. Otherwise they work the same.

hash_xxhash64 <- function(x) {
if (!is.character(x)) x <- as.character(x)

Check warning on line 576 in R/hash.R

View check run for this annotation

Codecov / codecov/patch

R/hash.R#L576

Added line #L576 was not covered by tests
na <- is.na(x)
x[na] <- NA_character_
x[!na] <- .Call(clic_xxhash64, x[!na])
x
}

#' @export
#' @rdname hash_xxhash

hash_raw_xxhash64 <- function(x) {
stopifnot(is.raw(x))
.Call(clic_xxhash64_raw, x)
}

#' @export
#' @rdname hash_xxhash

hash_obj_xxhash64 <- function(x, serialize_version = 2) {
sr <- serialize(x, NULL, version = serialize_version)[-(1:14)]
hash_raw_xxhash64(sr)
}

#' @export
#' @rdname hash_xxhash

hash_file_xxhash64 <- function(paths) {
if (!is.character(paths)) paths <- as.character(paths)

Check warning on line 603 in R/hash.R

View check run for this annotation

Codecov / codecov/patch

R/hash.R#L603

Added line #L603 was not covered by tests
paths <- normalizePath(paths, mustWork = FALSE)
if (is_windows()) {
paths <- enc2utf8(paths)

Check warning on line 606 in R/hash.R

View check run for this annotation

Codecov / codecov/patch

R/hash.R#L606

Added line #L606 was not covered by tests
} else {
paths <- enc2native(paths)
}
.Call(clic_xxhash64_file, paths)
}
7 changes: 4 additions & 3 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,9 @@ reference:

- title: Raising conditions with formatted cli messages
desc: |
This section documents cli functions for signalling
errors, warnings or messages using
abort(), warn() and inform() from
This section documents cli functions for signalling
errors, warnings or messages using
abort(), warn() and inform() from
[rlang](https://rlang.r-lib.org/reference/topic-condition-formatting.html)
contents:
- cli_abort
Expand Down Expand Up @@ -221,6 +221,7 @@ reference:
- hash_md5
- hash_sha1
- hash_sha256
- hash_xxhash

- title: Utilities and Configuration
contents:
Expand Down
3 changes: 2 additions & 1 deletion man/hash_animal.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/hash_emoji.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/hash_md5.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/hash_sha1.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/hash_sha256.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

78 changes: 78 additions & 0 deletions man/hash_xxhash.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions src/cli.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ SEXP clic_sha256_file(SEXP paths);
SEXP clic_sha1(SEXP strs);
SEXP clic_sha1_raw(SEXP r);
SEXP clic_sha1_file(SEXP paths);
SEXP clic_xxhash(SEXP strs);
SEXP clic_xxhash_raw(SEXP r);
SEXP clic_xxhash_file(SEXP paths);
SEXP clic_xxhash64(SEXP strs);
SEXP clic_xxhash64_raw(SEXP r);
SEXP clic_xxhash64_file(SEXP paths);
SEXP clic_tty_size(void);
SEXP clic_ansi_simplify(SEXP x, SEXP keep_csi);
SEXP clic_ansi_substr(SEXP x, SEXP start, SEXP stop);
Expand Down
6 changes: 6 additions & 0 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ static const R_CallMethodDef callMethods[] = {
{ "clic_sha1", (DL_FUNC) clic_sha1, 1 },
{ "clic_sha1_raw", (DL_FUNC) clic_sha1_raw, 1 },
{ "clic_sha1_file", (DL_FUNC) clic_sha1_file, 1 },
{ "clic_xxhash", (DL_FUNC) clic_xxhash, 1 },
{ "clic_xxhash_raw", (DL_FUNC) clic_xxhash_raw, 1 },
{ "clic_xxhash_file", (DL_FUNC) clic_xxhash_file, 1 },
{ "clic_xxhash64", (DL_FUNC) clic_xxhash64, 1 },
{ "clic_xxhash64_raw", (DL_FUNC) clic_xxhash64_raw, 1 },
{ "clic_xxhash64_file", (DL_FUNC) clic_xxhash64_file, 1 },
{ "clic_tty_size", (DL_FUNC) clic_tty_size, 0 },
{ "clic_ansi_simplify", (DL_FUNC) clic_ansi_simplify, 2 },
{ "clic_ansi_substr", (DL_FUNC) clic_ansi_substr, 3 },
Expand Down
44 changes: 44 additions & 0 deletions src/xxhash.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* xxHash - Extremely Fast Hash algorithm
* Copyright (C) 2012-2021 Yann Collet
*
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* You can contact the author at:
* - xxHash homepage: https://www.xxhash.com
* - xxHash source repository: https://github.com/Cyan4973/xxHash
*/


/*
* xxhash.c instantiates functions defined in xxhash.h
*/

#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
#define XXH_IMPLEMENTATION /* access definitions */
#define XXH_INLINE_ALL

#include "xxhash.h"
Loading

0 comments on commit bddf9e1

Please sign in to comment.