Skip to content

Commit

Permalink
Merge pull request #734 from r-lib/feature/xxhash
Browse files Browse the repository at this point in the history
Add hash_xxhash etc. functions
  • Loading branch information
gaborcsardi authored Oct 19, 2024
2 parents 20f41c8 + bddf9e1 commit 38cef38
Show file tree
Hide file tree
Showing 17 changed files with 7,281 additions and 8 deletions.
8 changes: 8 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -173,19 +173,27 @@ export(hash_emoji)
export(hash_file_md5)
export(hash_file_sha1)
export(hash_file_sha256)
export(hash_file_xxhash)
export(hash_file_xxhash64)
export(hash_md5)
export(hash_obj_animal)
export(hash_obj_emoji)
export(hash_obj_md5)
export(hash_obj_sha1)
export(hash_obj_sha256)
export(hash_obj_xxhash)
export(hash_obj_xxhash64)
export(hash_raw_animal)
export(hash_raw_emoji)
export(hash_raw_md5)
export(hash_raw_sha1)
export(hash_raw_sha256)
export(hash_raw_xxhash)
export(hash_raw_xxhash64)
export(hash_sha1)
export(hash_sha256)
export(hash_xxhash)
export(hash_xxhash64)
export(is_ansi_tty)
export(is_dynamic_tty)
export(is_utf8_output)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
* `ansi_collapse()` is now correct for length-1 vectors with style "head"
if width is specified (@rundel, #590).

* New `hash_xxhash()` etc. functions to calculate the xxHash of strings,
raw vectors, objects, files.

# cli 3.6.3

* cli now builds on ARM Windows.
Expand Down
111 changes: 111 additions & 0 deletions R/hash.R
Original file line number Diff line number Diff line change
Expand Up @@ -498,3 +498,114 @@ hash_obj_animal <- function(x, n_adj = 2, serialize_version = 2) {
sr <- serialize(x, NULL, version = serialize_version)[-(1:14)]
hash_raw_animal(sr, n_adj = n_adj)
}

#' xxHash
#'
#' Extremely fast hash algorithm.
#'
#' @param x Character vector. If not a character vector, then
#' [as.character()] is used to try to coerce it into one. `NA` entries
#' will have an `NA` hash.
#' @return `hash_xxhash()` returns a character vector of hexadecimal
#' xxHash hashes.
#'
#' @family hash functions
#'
#' @export
#' @examples
#' hash_xxhash(c("foo", NA, "bar", ""))

hash_xxhash <- function(x) {
if (!is.character(x)) x <- as.character(x)
na <- is.na(x)
x[na] <- NA_character_
x[!na] <- .Call(clic_xxhash, x[!na])
x
}

#' @export
#' @rdname hash_xxhash
#' @details `hash_raw_xxhash()` calculates the xxHash hash of the bytes
#' of a raw vector.
#' @return `hash_raw_xxhash()` returns a character scalar.

hash_raw_xxhash <- function(x) {
stopifnot(is.raw(x))
.Call(clic_xxhash_raw, x)
}

#' @export
#' @rdname hash_xxhash
#' @param serialize_version Workspace format version to use, see
#' [base::serialize()].
#' @details `hash_obj_xxhash()` calculates the xxHash hash of an R
#' object. The object is serialized into a binary vector first.
#' @return `hash_obj_xxhash()` returns a character scalar.

hash_obj_xxhash <- function(x, serialize_version = 2) {
sr <- serialize(x, NULL, version = serialize_version)[-(1:14)]
hash_raw_xxhash(sr)
}

#' @export
#' @rdname hash_xxhash
#' @param paths Character vector of file names.
#' @details `hash_file_xxhash()` calculates the xxHash hash of one or
#' more files.
#'
#' @return `hash_file_xxhash()` returns a character vector of xxHash
#' hashes.

hash_file_xxhash <- function(paths) {
if (!is.character(paths)) paths <- as.character(paths)
paths <- normalizePath(paths, mustWork = FALSE)
if (is_windows()) {
paths <- enc2utf8(paths)
} else {
paths <- enc2native(paths)
}
.Call(clic_xxhash_file, paths)
}

#' @export
#' @rdname hash_xxhash
#' @details The `64` functions caculate the 64 bit variant
#' of xxHash. Otherwise they work the same.

hash_xxhash64 <- function(x) {
if (!is.character(x)) x <- as.character(x)
na <- is.na(x)
x[na] <- NA_character_
x[!na] <- .Call(clic_xxhash64, x[!na])
x
}

#' @export
#' @rdname hash_xxhash

hash_raw_xxhash64 <- function(x) {
stopifnot(is.raw(x))
.Call(clic_xxhash64_raw, x)
}

#' @export
#' @rdname hash_xxhash

hash_obj_xxhash64 <- function(x, serialize_version = 2) {
sr <- serialize(x, NULL, version = serialize_version)[-(1:14)]
hash_raw_xxhash64(sr)
}

#' @export
#' @rdname hash_xxhash

hash_file_xxhash64 <- function(paths) {
if (!is.character(paths)) paths <- as.character(paths)
paths <- normalizePath(paths, mustWork = FALSE)
if (is_windows()) {
paths <- enc2utf8(paths)
} else {
paths <- enc2native(paths)
}
.Call(clic_xxhash64_file, paths)
}
7 changes: 4 additions & 3 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,9 @@ reference:

- title: Raising conditions with formatted cli messages
desc: |
This section documents cli functions for signalling
errors, warnings or messages using
abort(), warn() and inform() from
This section documents cli functions for signalling
errors, warnings or messages using
abort(), warn() and inform() from
[rlang](https://rlang.r-lib.org/reference/topic-condition-formatting.html)
contents:
- cli_abort
Expand Down Expand Up @@ -221,6 +221,7 @@ reference:
- hash_md5
- hash_sha1
- hash_sha256
- hash_xxhash

- title: Utilities and Configuration
contents:
Expand Down
3 changes: 2 additions & 1 deletion man/hash_animal.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/hash_emoji.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/hash_md5.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/hash_sha1.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/hash_sha256.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

78 changes: 78 additions & 0 deletions man/hash_xxhash.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions src/cli.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ SEXP clic_sha256_file(SEXP paths);
SEXP clic_sha1(SEXP strs);
SEXP clic_sha1_raw(SEXP r);
SEXP clic_sha1_file(SEXP paths);
SEXP clic_xxhash(SEXP strs);
SEXP clic_xxhash_raw(SEXP r);
SEXP clic_xxhash_file(SEXP paths);
SEXP clic_xxhash64(SEXP strs);
SEXP clic_xxhash64_raw(SEXP r);
SEXP clic_xxhash64_file(SEXP paths);
SEXP clic_tty_size(void);
SEXP clic_ansi_simplify(SEXP x, SEXP keep_csi);
SEXP clic_ansi_substr(SEXP x, SEXP start, SEXP stop);
Expand Down
6 changes: 6 additions & 0 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ static const R_CallMethodDef callMethods[] = {
{ "clic_sha1", (DL_FUNC) clic_sha1, 1 },
{ "clic_sha1_raw", (DL_FUNC) clic_sha1_raw, 1 },
{ "clic_sha1_file", (DL_FUNC) clic_sha1_file, 1 },
{ "clic_xxhash", (DL_FUNC) clic_xxhash, 1 },
{ "clic_xxhash_raw", (DL_FUNC) clic_xxhash_raw, 1 },
{ "clic_xxhash_file", (DL_FUNC) clic_xxhash_file, 1 },
{ "clic_xxhash64", (DL_FUNC) clic_xxhash64, 1 },
{ "clic_xxhash64_raw", (DL_FUNC) clic_xxhash64_raw, 1 },
{ "clic_xxhash64_file", (DL_FUNC) clic_xxhash64_file, 1 },
{ "clic_tty_size", (DL_FUNC) clic_tty_size, 0 },
{ "clic_ansi_simplify", (DL_FUNC) clic_ansi_simplify, 2 },
{ "clic_ansi_substr", (DL_FUNC) clic_ansi_substr, 3 },
Expand Down
44 changes: 44 additions & 0 deletions src/xxhash.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* xxHash - Extremely Fast Hash algorithm
* Copyright (C) 2012-2021 Yann Collet
*
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* You can contact the author at:
* - xxHash homepage: https://www.xxhash.com
* - xxHash source repository: https://github.com/Cyan4973/xxHash
*/


/*
* xxhash.c instantiates functions defined in xxhash.h
*/

#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
#define XXH_IMPLEMENTATION /* access definitions */
#define XXH_INLINE_ALL

#include "xxhash.h"
Loading

0 comments on commit 38cef38

Please sign in to comment.