From a03a1267e708d1eb50aadfee6454a7ccd31c8c65 Mon Sep 17 00:00:00 2001 From: Mauricio 'Pacha' Vargas Sepulveda Date: Wed, 1 Jan 2025 16:58:09 -0500 Subject: [PATCH 1/3] draft map -> list/sexp --- cpp11test/DESCRIPTION | 2 +- inst/include/cpp11/map_to_sexp.hpp | 44 ++++++++++++++++++++++++++++ vignettes/motivations.Rmd | 47 +++++++++++++++++++++++++++++- 3 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 inst/include/cpp11/map_to_sexp.hpp diff --git a/cpp11test/DESCRIPTION b/cpp11test/DESCRIPTION index d1d05665..70c5649f 100644 --- a/cpp11test/DESCRIPTION +++ b/cpp11test/DESCRIPTION @@ -20,4 +20,4 @@ Suggests: xml2 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.1.1 +RoxygenNote: 7.3.2 diff --git a/inst/include/cpp11/map_to_sexp.hpp b/inst/include/cpp11/map_to_sexp.hpp new file mode 100644 index 00000000..62ef2e11 --- /dev/null +++ b/inst/include/cpp11/map_to_sexp.hpp @@ -0,0 +1,44 @@ +#pragma once + +#include +#include +#include "cpp11/R.hpp" +#include "cpp11/protect.hpp" +#include "cpp11/list.hpp" +#include "cpp11/strings.hpp" + +namespace cpp11 { + +template +SEXP map_to_sexp(const std::map& map) { + cpp11::writable::list result(map.size()); + cpp11::writable::strings names(map.size()); + + size_t i = 0; + for (const auto& pair : map) { + result[i] = cpp11::as_sexp(pair.second); + names[i] = cpp11::as_sexp(pair.first); + ++i; + } + + result.names() = names; + return result; +} + +template +SEXP unordered_map_to_sexp(const std::unordered_map& map) { + cpp11::writable::list result(map.size()); + cpp11::writable::strings names(map.size()); + + size_t i = 0; + for (const auto& pair : map) { + result[i] = cpp11::as_sexp(pair.second); + names[i] = cpp11::as_sexp(pair.first); + ++i; + } + + result.names() = names; + return result; +} + +} // namespace cpp11 diff --git a/vignettes/motivations.Rmd b/vignettes/motivations.Rmd index 9f5ffb0f..4e957dcd 100644 --- a/vignettes/motivations.Rmd +++ b/vignettes/motivations.Rmd @@ -332,7 +332,52 @@ Doing this universally avoids many locale specific issues when dealing with Unic Concretely cpp11 always uses `Rf_translateCharUTF8()` when obtaining `const char*` from `CHRSXP` objects and uses `Rf_mkCharCE(, CE_UTF8)` when creating new `CHRSXP` objects from `const char*` inputs. - +Converting R Unicode Strings to C++ Strings: + +```cpp +#include +#include + +[[cpp11::register]] +std::string convert_to_utf8(cpp11::strings input) { + std::string result; + for (auto str : input) { + result += cpp11::r_string(Rf_translateCharUTF8(str)); + } + return result; +} +``` + +```r +# hello + how are you? in Japanese and Spanish +input <- c("\xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1\xe3\x81\xaf", + "\xc2\xbfC\xc3\xb3mo est\xc3\xa1s\x3f") + +convert_to_utf8(input) + +[1] "こんにちは¿Cómo estás?" +``` + +Returning Unicode Strings from C++ to R: + +```cpp +#include +#include + +[[cpp11::register]] +cpp11::writable::strings return_utf8_string() { + cpp11::writable::strings result; + std::string utf8_str = "こんにちは"; // Hello in Japanese + result.push_back(Rf_mkCharCE(utf8_str.c_str(), CE_UTF8)); + return result; +} +``` + +```r +return_utf8_string() + +[1] "こんにちは" +``` ## C++11 features {#c11-features} From aaed10470838c6836168e3d7ac904cb6fa7a21a6 Mon Sep 17 00:00:00 2001 From: Mauricio 'Pacha' Vargas Sepulveda Date: Thu, 2 Jan 2025 01:19:25 -0500 Subject: [PATCH 2/3] ordered and unordered C++ maps are converted to R lists --- cpp11test/R/cpp11.R | 8 ++ cpp11test/src/cpp11.cpp | 16 ++++ cpp11test/src/map.cpp | 20 +++++ cpp11test/tests/testthat/test-map-to-list.R | 18 +++++ inst/include/cpp11/as.hpp | 84 +++++++++++++++++++-- inst/include/cpp11/map_to_sexp.hpp | 44 ----------- vignettes/cpp11.Rmd | 4 - 7 files changed, 140 insertions(+), 54 deletions(-) create mode 100644 cpp11test/src/map.cpp create mode 100644 cpp11test/tests/testthat/test-map-to-list.R delete mode 100644 inst/include/cpp11/map_to_sexp.hpp diff --git a/cpp11test/R/cpp11.R b/cpp11test/R/cpp11.R index 038e7b76..332faa88 100644 --- a/cpp11test/R/cpp11.R +++ b/cpp11test/R/cpp11.R @@ -88,6 +88,14 @@ cpp11_insert_ <- function(num_sxp) { .Call(`_cpp11test_cpp11_insert_`, num_sxp) } +ordered_map_to_list_ <- function(x) { + .Call(`_cpp11test_ordered_map_to_list_`, x) +} + +unordered_map_to_list_ <- function(x) { + .Call(`_cpp11test_unordered_map_to_list_`, x) +} + gibbs_cpp <- function(N, thin) { .Call(`_cpp11test_gibbs_cpp`, N, thin) } diff --git a/cpp11test/src/cpp11.cpp b/cpp11test/src/cpp11.cpp index 421de637..56057fc0 100644 --- a/cpp11test/src/cpp11.cpp +++ b/cpp11test/src/cpp11.cpp @@ -173,6 +173,20 @@ extern "C" SEXP _cpp11test_cpp11_insert_(SEXP num_sxp) { return cpp11::as_sexp(cpp11_insert_(cpp11::as_cpp>(num_sxp))); END_CPP11 } +// map.cpp +SEXP ordered_map_to_list_(cpp11::doubles x); +extern "C" SEXP _cpp11test_ordered_map_to_list_(SEXP x) { + BEGIN_CPP11 + return cpp11::as_sexp(ordered_map_to_list_(cpp11::as_cpp>(x))); + END_CPP11 +} +// map.cpp +SEXP unordered_map_to_list_(cpp11::doubles x); +extern "C" SEXP _cpp11test_unordered_map_to_list_(SEXP x) { + BEGIN_CPP11 + return cpp11::as_sexp(unordered_map_to_list_(cpp11::as_cpp>(x))); + END_CPP11 +} // matrix.cpp SEXP gibbs_cpp(int N, int thin); extern "C" SEXP _cpp11test_gibbs_cpp(SEXP N, SEXP thin) { @@ -500,6 +514,7 @@ static const R_CallMethodDef CallEntries[] = { {"_cpp11test_my_warning_n1", (DL_FUNC) &_cpp11test_my_warning_n1, 1}, {"_cpp11test_my_warning_n1fmt", (DL_FUNC) &_cpp11test_my_warning_n1fmt, 1}, {"_cpp11test_my_warning_n2fmt", (DL_FUNC) &_cpp11test_my_warning_n2fmt, 2}, + {"_cpp11test_ordered_map_to_list_", (DL_FUNC) &_cpp11test_ordered_map_to_list_, 1}, {"_cpp11test_protect_many_", (DL_FUNC) &_cpp11test_protect_many_, 1}, {"_cpp11test_protect_many_cpp11_", (DL_FUNC) &_cpp11test_protect_many_cpp11_, 1}, {"_cpp11test_protect_many_preserve_", (DL_FUNC) &_cpp11test_protect_many_preserve_, 1}, @@ -533,6 +548,7 @@ static const R_CallMethodDef CallEntries[] = { {"_cpp11test_sum_int_foreach_", (DL_FUNC) &_cpp11test_sum_int_foreach_, 1}, {"_cpp11test_test_destruction_inner", (DL_FUNC) &_cpp11test_test_destruction_inner, 0}, {"_cpp11test_test_destruction_outer", (DL_FUNC) &_cpp11test_test_destruction_outer, 0}, + {"_cpp11test_unordered_map_to_list_", (DL_FUNC) &_cpp11test_unordered_map_to_list_, 1}, {"_cpp11test_upper_bound", (DL_FUNC) &_cpp11test_upper_bound, 2}, {"run_testthat_tests", (DL_FUNC) &run_testthat_tests, 1}, {NULL, NULL, 0} diff --git a/cpp11test/src/map.cpp b/cpp11test/src/map.cpp new file mode 100644 index 00000000..e4a32db4 --- /dev/null +++ b/cpp11test/src/map.cpp @@ -0,0 +1,20 @@ +#include "cpp11/as.hpp" +#include "cpp11/doubles.hpp" + +[[cpp11::register]] SEXP ordered_map_to_list_(cpp11::doubles x) { + std::map counts; + int n = x.size(); + for (int i = 0; i < n; i++) { + counts[x[i]]++; + } + return cpp11::as_sexp(counts); +} + +[[cpp11::register]] SEXP unordered_map_to_list_(cpp11::doubles x) { + std::unordered_map counts; + int n = x.size(); + for (int i = 0; i < n; i++) { + counts[x[i]]++; + } + return cpp11::as_sexp(counts); +} diff --git a/cpp11test/tests/testthat/test-map-to-list.R b/cpp11test/tests/testthat/test-map-to-list.R new file mode 100644 index 00000000..dc637afa --- /dev/null +++ b/cpp11test/tests/testthat/test-map-to-list.R @@ -0,0 +1,18 @@ +test_that("ordered and unordered C++ maps are converted to R lists", { + set.seed(42L) + x <- rnorm(10L) + xprime <- c(x, x[1]) + + om <- ordered_map_to_list_(x) + expect_type(om, "list") + + om_doubles <- as.double(names(om)) + expect_equal(om_doubles, sort(om_doubles)) + + omprime <- ordered_map_to_list_(xprime) + expect_equal(unlist(unique(omprime)), 1:2) + + um <- unordered_map_to_list_(xprime) + expect_type(um, "list") + expect_equal(unlist(unique(um)), 1:2) +}) diff --git a/inst/include/cpp11/as.hpp b/inst/include/cpp11/as.hpp index 682f12b5..374c0518 100644 --- a/inst/include/cpp11/as.hpp +++ b/inst/include/cpp11/as.hpp @@ -2,10 +2,13 @@ #include // for modf #include // for initializer_list +#include // for std::map #include // for std::shared_ptr, std::weak_ptr, std::unique_ptr #include -#include // for string, basic_string -#include // for decay, enable_if, is_same, is_convertible +#include // for string, basic_string +#include // for decay, enable_if, is_same, is_convertible +#include // for std::unordered_map +#include // for std::vector #include "cpp11/R.hpp" // for SEXP, SEXPREC, Rf_xlength, R_xlen_t #include "cpp11/protect.hpp" // for stop, protect, safe, protect::function @@ -243,7 +246,7 @@ enable_if_integral as_sexp(const Container& from) { } inline SEXP as_sexp(std::initializer_list from) { - return as_sexp>(from); + return as_sexp(std::vector(from)); } template as_sexp(const Container& from) { } inline SEXP as_sexp(std::initializer_list from) { - return as_sexp>(from); + return as_sexp(std::vector(from)); } template as_sexp(const Container& from) { } inline SEXP as_sexp(std::initializer_list from) { - return as_sexp>(from); + return as_sexp(std::vector(from)); } namespace detail { @@ -325,7 +328,7 @@ enable_if_c_string as_sexp(const Container& from) { } inline SEXP as_sexp(std::initializer_list from) { - return as_sexp>(from); + return as_sexp(std::vector(from)); } template > @@ -333,4 +336,73 @@ enable_if_convertible_to_sexp as_sexp(const T& from) { return from; } +// Pacha: Specialization for std::map +// NOTE: I did not use templates to avoid clashes with doubles/function/etc. +inline SEXP as_sexp(const std::map& map) { + R_xlen_t size = map.size(); + SEXP result = PROTECT(Rf_allocVector(VECSXP, size)); + SEXP names = PROTECT(Rf_allocVector(STRSXP, size)); + + auto it = map.begin(); + for (R_xlen_t i = 0; i < size; ++i, ++it) { + SET_VECTOR_ELT(result, i, it->second); + SET_STRING_ELT(names, i, Rf_mkCharCE(it->first.c_str(), CE_UTF8)); + } + + Rf_setAttrib(result, R_NamesSymbol, names); + UNPROTECT(2); + return result; +} + +// Specialization for std::map +inline SEXP as_sexp(const std::map& map) { + R_xlen_t size = map.size(); + SEXP result = PROTECT(Rf_allocVector(VECSXP, size)); + SEXP names = PROTECT(Rf_allocVector(REALSXP, size)); + + auto it = map.begin(); + for (R_xlen_t i = 0; i < size; ++i, ++it) { + SET_VECTOR_ELT(result, i, Rf_ScalarInteger(it->second)); + REAL(names)[i] = it->first; + } + + Rf_setAttrib(result, R_NamesSymbol, names); + UNPROTECT(2); + return result; +} + +// Pacha: Specialization for std::unordered_map +inline SEXP as_sexp(const std::unordered_map& map) { + R_xlen_t size = map.size(); + SEXP result = PROTECT(Rf_allocVector(VECSXP, size)); + SEXP names = PROTECT(Rf_allocVector(STRSXP, size)); + + auto it = map.begin(); + for (R_xlen_t i = 0; i < size; ++i, ++it) { + SET_VECTOR_ELT(result, i, it->second); + SET_STRING_ELT(names, i, Rf_mkCharCE(it->first.c_str(), CE_UTF8)); + } + + Rf_setAttrib(result, R_NamesSymbol, names); + UNPROTECT(2); + return result; +} + +// Specialization for std::unordered_map +inline SEXP as_sexp(const std::unordered_map& map) { + R_xlen_t size = map.size(); + SEXP result = PROTECT(Rf_allocVector(VECSXP, size)); + SEXP names = PROTECT(Rf_allocVector(REALSXP, size)); + + auto it = map.begin(); + for (R_xlen_t i = 0; i < size; ++i, ++it) { + SET_VECTOR_ELT(result, i, Rf_ScalarInteger(it->second)); + REAL(names)[i] = it->first; + } + + Rf_setAttrib(result, R_NamesSymbol, names); + UNPROTECT(2); + return result; +} + } // namespace cpp11 diff --git a/inst/include/cpp11/map_to_sexp.hpp b/inst/include/cpp11/map_to_sexp.hpp deleted file mode 100644 index 62ef2e11..00000000 --- a/inst/include/cpp11/map_to_sexp.hpp +++ /dev/null @@ -1,44 +0,0 @@ -#pragma once - -#include -#include -#include "cpp11/R.hpp" -#include "cpp11/protect.hpp" -#include "cpp11/list.hpp" -#include "cpp11/strings.hpp" - -namespace cpp11 { - -template -SEXP map_to_sexp(const std::map& map) { - cpp11::writable::list result(map.size()); - cpp11::writable::strings names(map.size()); - - size_t i = 0; - for (const auto& pair : map) { - result[i] = cpp11::as_sexp(pair.second); - names[i] = cpp11::as_sexp(pair.first); - ++i; - } - - result.names() = names; - return result; -} - -template -SEXP unordered_map_to_sexp(const std::unordered_map& map) { - cpp11::writable::list result(map.size()); - cpp11::writable::strings names(map.size()); - - size_t i = 0; - for (const auto& pair : map) { - result[i] = cpp11::as_sexp(pair.second); - names[i] = cpp11::as_sexp(pair.first); - ++i; - } - - result.names() = names; - return result; -} - -} // namespace cpp11 diff --git a/vignettes/cpp11.Rmd b/vignettes/cpp11.Rmd index 5f10fcc6..dcae8e05 100644 --- a/vignettes/cpp11.Rmd +++ b/vignettes/cpp11.Rmd @@ -878,8 +878,6 @@ logicals duplicated_cpp(integers x) { } ``` -````{=html} - -```` ### Exercises From 70c68dfd22382dfb377e75beb2e79c43db1f2cdd Mon Sep 17 00:00:00 2001 From: Mauricio 'Pacha' Vargas Sepulveda Date: Thu, 2 Jan 2025 01:26:45 -0500 Subject: [PATCH 3/3] template map to list conversion --- cpp11test/R/cpp11.R | 4 ++ cpp11test/src/cpp11.cpp | 8 ++++ cpp11test/src/map.cpp | 9 ++++ cpp11test/tests/testthat/test-map-to-list.R | 3 ++ inst/include/cpp11/as.hpp | 53 +++++---------------- 5 files changed, 36 insertions(+), 41 deletions(-) diff --git a/cpp11test/R/cpp11.R b/cpp11test/R/cpp11.R index 332faa88..4c323020 100644 --- a/cpp11test/R/cpp11.R +++ b/cpp11test/R/cpp11.R @@ -92,6 +92,10 @@ ordered_map_to_list_ <- function(x) { .Call(`_cpp11test_ordered_map_to_list_`, x) } +ordered_map_to_list_2_ <- function(x) { + .Call(`_cpp11test_ordered_map_to_list_2_`, x) +} + unordered_map_to_list_ <- function(x) { .Call(`_cpp11test_unordered_map_to_list_`, x) } diff --git a/cpp11test/src/cpp11.cpp b/cpp11test/src/cpp11.cpp index 56057fc0..4ce4e154 100644 --- a/cpp11test/src/cpp11.cpp +++ b/cpp11test/src/cpp11.cpp @@ -181,6 +181,13 @@ extern "C" SEXP _cpp11test_ordered_map_to_list_(SEXP x) { END_CPP11 } // map.cpp +SEXP ordered_map_to_list_2_(cpp11::doubles x); +extern "C" SEXP _cpp11test_ordered_map_to_list_2_(SEXP x) { + BEGIN_CPP11 + return cpp11::as_sexp(ordered_map_to_list_2_(cpp11::as_cpp>(x))); + END_CPP11 +} +// map.cpp SEXP unordered_map_to_list_(cpp11::doubles x); extern "C" SEXP _cpp11test_unordered_map_to_list_(SEXP x) { BEGIN_CPP11 @@ -515,6 +522,7 @@ static const R_CallMethodDef CallEntries[] = { {"_cpp11test_my_warning_n1fmt", (DL_FUNC) &_cpp11test_my_warning_n1fmt, 1}, {"_cpp11test_my_warning_n2fmt", (DL_FUNC) &_cpp11test_my_warning_n2fmt, 2}, {"_cpp11test_ordered_map_to_list_", (DL_FUNC) &_cpp11test_ordered_map_to_list_, 1}, + {"_cpp11test_ordered_map_to_list_2_", (DL_FUNC) &_cpp11test_ordered_map_to_list_2_, 1}, {"_cpp11test_protect_many_", (DL_FUNC) &_cpp11test_protect_many_, 1}, {"_cpp11test_protect_many_cpp11_", (DL_FUNC) &_cpp11test_protect_many_cpp11_, 1}, {"_cpp11test_protect_many_preserve_", (DL_FUNC) &_cpp11test_protect_many_preserve_, 1}, diff --git a/cpp11test/src/map.cpp b/cpp11test/src/map.cpp index e4a32db4..71b3402d 100644 --- a/cpp11test/src/map.cpp +++ b/cpp11test/src/map.cpp @@ -10,6 +10,15 @@ return cpp11::as_sexp(counts); } +[[cpp11::register]] SEXP ordered_map_to_list_2_(cpp11::doubles x) { + std::map counts; + double n = x.size(); + for (int i = 0; i < n; i++) { + counts[x[i]] += 1.0; + } + return cpp11::as_sexp(counts); +} + [[cpp11::register]] SEXP unordered_map_to_list_(cpp11::doubles x) { std::unordered_map counts; int n = x.size(); diff --git a/cpp11test/tests/testthat/test-map-to-list.R b/cpp11test/tests/testthat/test-map-to-list.R index dc637afa..dd7fd5ca 100644 --- a/cpp11test/tests/testthat/test-map-to-list.R +++ b/cpp11test/tests/testthat/test-map-to-list.R @@ -6,6 +6,9 @@ test_that("ordered and unordered C++ maps are converted to R lists", { om <- ordered_map_to_list_(x) expect_type(om, "list") + om2 <- ordered_map_to_list_2_(x) + expect_equal(om, om2) + om_doubles <- as.double(names(om)) expect_equal(om_doubles, sort(om_doubles)) diff --git a/inst/include/cpp11/as.hpp b/inst/include/cpp11/as.hpp index 374c0518..e7bb9b71 100644 --- a/inst/include/cpp11/as.hpp +++ b/inst/include/cpp11/as.hpp @@ -336,33 +336,18 @@ enable_if_convertible_to_sexp as_sexp(const T& from) { return from; } -// Pacha: Specialization for std::map -// NOTE: I did not use templates to avoid clashes with doubles/function/etc. -inline SEXP as_sexp(const std::map& map) { - R_xlen_t size = map.size(); - SEXP result = PROTECT(Rf_allocVector(VECSXP, size)); - SEXP names = PROTECT(Rf_allocVector(STRSXP, size)); - - auto it = map.begin(); - for (R_xlen_t i = 0; i < size; ++i, ++it) { - SET_VECTOR_ELT(result, i, it->second); - SET_STRING_ELT(names, i, Rf_mkCharCE(it->first.c_str(), CE_UTF8)); - } - - Rf_setAttrib(result, R_NamesSymbol, names); - UNPROTECT(2); - return result; -} - -// Specialization for std::map -inline SEXP as_sexp(const std::map& map) { +// Templated specialization for std::map +template ::value && + std::is_arithmetic::value>> +inline SEXP as_sexp(const std::map& map) { R_xlen_t size = map.size(); SEXP result = PROTECT(Rf_allocVector(VECSXP, size)); SEXP names = PROTECT(Rf_allocVector(REALSXP, size)); auto it = map.begin(); for (R_xlen_t i = 0; i < size; ++i, ++it) { - SET_VECTOR_ELT(result, i, Rf_ScalarInteger(it->second)); + SET_VECTOR_ELT(result, i, as_sexp(it->second)); REAL(names)[i] = it->first; } @@ -371,32 +356,18 @@ inline SEXP as_sexp(const std::map& map) { return result; } -// Pacha: Specialization for std::unordered_map -inline SEXP as_sexp(const std::unordered_map& map) { - R_xlen_t size = map.size(); - SEXP result = PROTECT(Rf_allocVector(VECSXP, size)); - SEXP names = PROTECT(Rf_allocVector(STRSXP, size)); - - auto it = map.begin(); - for (R_xlen_t i = 0; i < size; ++i, ++it) { - SET_VECTOR_ELT(result, i, it->second); - SET_STRING_ELT(names, i, Rf_mkCharCE(it->first.c_str(), CE_UTF8)); - } - - Rf_setAttrib(result, R_NamesSymbol, names); - UNPROTECT(2); - return result; -} - -// Specialization for std::unordered_map -inline SEXP as_sexp(const std::unordered_map& map) { +// Templated specialization for std::unordered_map +template ::value && + std::is_arithmetic::value>> +inline SEXP as_sexp(const std::unordered_map& map) { R_xlen_t size = map.size(); SEXP result = PROTECT(Rf_allocVector(VECSXP, size)); SEXP names = PROTECT(Rf_allocVector(REALSXP, size)); auto it = map.begin(); for (R_xlen_t i = 0; i < size; ++i, ++it) { - SET_VECTOR_ELT(result, i, Rf_ScalarInteger(it->second)); + SET_VECTOR_ELT(result, i, as_sexp(it->second)); REAL(names)[i] = it->first; }