Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rebuild translation table + support ordered factor in scan_data #580

Merged
merged 8 commits into from
Nov 28, 2024
Merged
3 changes: 1 addition & 2 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,11 @@ vignettes
man/figures/
^tests/manual_tests$
^inst/translations$
tests/testthat/_snaps
tests/testthat/postgres.R
tests/testthat/postgres-2.R
tests/testthat/test-col_schema.R
tests/testthat/test-column_roles.R
tests/testthat/test-create_validation_steps.R
tests/testthat/test-draft_validation.R
tests/testthat/test-expectation_fns.R
tests/testthat/test-file_naming.R
tests/testthat/test-get_agent_report.R
Expand All @@ -34,6 +32,7 @@ tests/testthat/test-get_informant_report.R
tests/testthat/test-get_multiagent_report.R
tests/testthat/test-interrogate_simple.R
tests/testthat/test-interrogate_with_agent.R
tests/testthat/_snaps/interrogate_with_agent.md
tests/testthat/test-interrogate_with_agent_db.R
tests/testthat/test-interrogate_with_agent_segments.R
tests/testthat/test-read_disk_multiagent.R
Expand Down
1 change: 1 addition & 0 deletions R/scan_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,7 @@ probe_columns <- function(
lang = lang,
locale = locale
),
ordered = ,
factor = probe_columns_factor(
data = data,
column = col_name,
Expand Down
22 changes: 2 additions & 20 deletions R/tbl_from_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -575,33 +575,15 @@ download_remote_file <- function(url, ...) {

if (grepl("^https?://", url)) {

is_r32 <- getRversion() >= "3.2"

if (.Platform$OS.type == "windows") {

if (is_r32) {

method <- "wininet"

} else {

seti2 <- utils::"setInternet2"
internet2_start <- seti2(NA)

if (!internet2_start) {

on.exit(suppressWarnings(seti2(internet2_start)))
suppressWarnings(seti2(TRUE))
}

method <- "internal"
}
method <- "wininet"

suppressWarnings(utils::download.file(url, method = method, ...))

} else {

if (is_r32 && capabilities("libcurl")) {
if (capabilities("libcurl")) {

method <- "libcurl"

Expand Down
60 changes: 2 additions & 58 deletions data-raw/00-reencode_text.R
Original file line number Diff line number Diff line change
@@ -1,61 +1,5 @@
library(tidyverse)
library(here)
library(yaml)
library(stringi)

reencode_utf8 <- function(x) {
y <- yaml::read_yaml(here::here("data-raw/translations_source.yml"), fileEncoding = "UTF-8")

# Ensure that we encode non-UTF-8 strings to UTF-8 in a
# two-step process: (1) to native encoding, and then
# (2) to UTF-8
if (Encoding(x) != 'UTF-8') {
x <- enc2utf8(x)
}

# Use `iconv()` to convert to UTF-32 (big endian) as
# raw bytes and convert again to integer (crucial here
# to set the base to 16 for this conversion)
raw_bytes <-
iconv(x, "UTF-8", "UTF-32BE", toRaw = TRUE) %>%
unlist() %>%
strtoi(base = 16L)

# Split into a list of four bytes per element
chars <- split(raw_bytes, ceiling(seq_along(raw_bytes) / 4))

x <-
vapply(
chars,
FUN.VALUE = character(1),
USE.NAMES = FALSE,
FUN = function(x) {

bytes_nz <- x[min(which(x > 0)):length(x)]

if (length(bytes_nz) > 2) {
out <- paste("\\U", paste(as.hexmode(x), collapse = ""), sep = "")
} else if (length(bytes_nz) > 1) {
out <- paste("\\u", paste(as.hexmode(bytes_nz), collapse = ""), sep = "")
} else if (length(bytes_nz) == 1 && bytes_nz > 127) {
out <- paste("\\u", sprintf("%04s", paste(as.hexmode(bytes_nz)), collapse = ""), sep = "")
} else {
out <- rawToChar(as.raw(bytes_nz))
}
out
}
) %>%
paste(collapse = "")

x
}

y <- yaml::read_yaml(here::here("data-raw/translations_source.yml"))

# Obtain a list that reencodes all translation text to Unicode
# code points
translations_list <-
lapply(y, lapply, lapply, function(x) {
reencode_utf8(x) %>% stringi::stri_unescape_unicode()
})

saveRDS(translations_list, file = here("inst/text/translations_built"))
saveRDS(y, file = here::here("inst/text/translations_built"))
32 changes: 16 additions & 16 deletions data-raw/translations_source.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1531,7 +1531,7 @@ table_scan:
nl: "**pointblank** Versie"
tbl_lab_r_version:
en: "**R** Version"
fr: "version **R**"
fr: "Version **R**"
de: "**R** Version"
it: "versione **R**"
es: "Versión **R**"
Expand All @@ -1545,7 +1545,7 @@ table_scan:
nl: "**R**-versie"
tbl_lab_system_os:
en: "Operating System"
fr: "Système opérateur"
fr: "Système d'exploitation"
de: "Betriebssystem"
it: "Sistema operativo"
es: "Sistema operativo"
Expand All @@ -1559,7 +1559,7 @@ table_scan:
nl: "Besturingssysteem"
tbl_lab_distinct:
en: "Distinct"
fr: "valeurs uniques"
fr: "Valeurs uniques"
de: "Einzigartige Werte"
it: "Valori unici"
es: "Valores únicos"
Expand Down Expand Up @@ -1782,19 +1782,19 @@ table_scan:
sv: "Andra värden"
nl: "Andere waarden"
footer_text_fragment:
en: "Table scan generated with <a href=\"https://www.github.com/rstudio/pointblank\">pointblank</a>."
fr: "Scan de tableau généré avec <a href=\"https://www.github.com/rstudio/pointblank\">pointblank</a>."
de: "Mit <a href=\"https://www.github.com/rstudio/pointblank\">pointblank</a> generierter Scan der Tabelle."
it: "Scansione della tabella generata con <a href=\"https://www.github.com/rstudio/pointblank\">pointblank</a>."
es: "Escaneo de Tabla generado con <a href=\"https://www.github.com/rstudio/pointblank\">pointblank</a>."
pt: "Examen da tabela gerada com <a href=\"https://www.github.com/rstudio/pointblank\">pointblank</a>."
tr: "<a href=\"https://www.github.com/rstudio/pointblank\">pointblank</a> ile oluşturulan tablo taraması."
zh: "通过 <a href=\"https://www.github.com/rstudio/pointblank\">pointblank</a>建立表扫描。"
ru: "Сканирование таблиц, сгенерированное с помощью <a href=\"https://www.github.com/rstudio/pointblank\">pointblank</a>."
pl: "Skan tabeli wygenerowany za pomoca <a href=\"https://www.github.com/rstudio/pointblank\">pointblank</a>."
da: "Scan af tabel genereret med <a href=\"https://www.github.com/rstudio/pointblank\">pointblank</a>."
sv: "Genomsökning av tabellen genereras med <a href=\"https://www.github.com/rstudio/pointblank\">pointblank</a>."
nl: "Scan van tabel gegenereerd met <a href=\"https://www.github.com/rstudio/pointblank\">pointblank</a>."
en: "Table scan generated with <a href=\"https://github.com/rstudio/pointblank\">pointblank</a>."
fr: "Scan de tableau généré avec <a href=\"https://github.com/rstudio/pointblank\">pointblank</a>."
de: "Mit <a href=\"https://github.com/rstudio/pointblank\">pointblank</a> generierter Scan der Tabelle."
it: "Scansione della tabella generata con <a href=\"https://github.com/rstudio/pointblank\">pointblank</a>."
es: "Escaneo de Tabla generado con <a href=\"https://github.com/rstudio/pointblank\">pointblank</a>."
pt: "Examen da tabela gerada com <a href=\"https://github.com/rstudio/pointblank\">pointblank</a>."
tr: "<a href=\"https://github.com/rstudio/pointblank\">pointblank</a> ile oluşturulan tablo taraması."
zh: "通过 <a href=\"https://github.com/rstudio/pointblank\">pointblank</a>建立表扫描。"
ru: "Сканирование таблиц, сгенерированное с помощью <a href=\"https://github.com/rstudio/pointblank\">pointblank</a>."
pl: "Skan tabeli wygenerowany za pomoca <a href=\"https://github.com/rstudio/pointblank\">pointblank</a>."
da: "Scan af tabel genereret med <a href=\"https://github.com/rstudio/pointblank\">pointblank</a>."
sv: "Genomsökning av tabellen genereras med <a href=\"https://github.com/rstudio/pointblank\">pointblank</a>."
nl: "Scan van tabel gegenereerd met <a href=\"https://github.com/rstudio/pointblank\">pointblank</a>."
multiagent_report:
pointblank_multiagent_title_text:
en: "Pointblank Validation Series"
Expand Down
Binary file modified inst/text/translations_built
Binary file not shown.
28 changes: 14 additions & 14 deletions inst/translations/ES/_pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ lang: es
home:
strip_header: true

url: https://rich-iannone.github.io/pointblank/es
url: https://rstudio.github.io/pointblank/es

template:
bootstrap: 5
Expand Down Expand Up @@ -87,7 +87,7 @@ reference:
- conjointly
- serially
- specially

- title: Funciones de información
desc: >
Podemos añadir progresivamente información a un objeto *informant*
Expand All @@ -112,7 +112,7 @@ reference:
- snip_stats
- snip_lowest
- snip_highest

- title: Creación de correo electrónico
desc: >
A veces queremos enviar por correo electrónico un informe de una
Expand All @@ -125,7 +125,7 @@ reference:
- email_create
- stock_msg_body
- stock_msg_footer

- title: Registro
desc: >
El registro de las condiciones de fallo de validación es una buena
Expand All @@ -149,7 +149,7 @@ reference:
contents:
- interrogate
- get_agent_report

- title: "El informante: incorporar e informar"
desc: >
Si tenemos un objeto *informante* que ha sido cargado con información
Expand All @@ -161,7 +161,7 @@ reference:
contents:
- incorporate
- get_informant_report

- title: Después del interrogatorio
desc: >
El objeto `agente` siempre tiene una lista especial llamada 'x-list'.
Expand All @@ -180,7 +180,7 @@ reference:
- get_sundered_data
- all_passed
- write_testthat_file

- title: Operaciones con objetos
desc: >
Tenemos opciones para escribir un agente o informante en el disco con
Expand All @@ -201,7 +201,7 @@ reference:
- activate_steps
- deactivate_steps
- remove_steps

- title: El Multiagente
desc: >
El objeto `multiagente` es un grupo de agentes, cada uno de los
Expand All @@ -218,7 +218,7 @@ reference:
- create_multiagent
- read_disk_multiagent
- get_multiagent_report

- title: pointblank YAML
desc: >
Los archivos YAML pueden ser utilizados en **pointblank** para dos
Expand All @@ -238,7 +238,7 @@ reference:
- yaml_agent_show_exprs
- yaml_informant_incorporate
- yaml_exec

- title: Table Transformers
desc: >
Las funciones del **Table Transformers** pueden transformar
Expand All @@ -259,7 +259,7 @@ reference:
- tt_time_shift
- tt_time_slice
- get_tt_param

- title: Funciones de utilidad y ayuda
contents:
- col_schema
Expand All @@ -268,15 +268,15 @@ reference:
- affix_datetime
- stop_if_not
- from_github

- title: Conjuntos de datos
contents:
- small_table
- small_table_sqlite
- specifications
- game_revenue
- game_revenue_info

navbar:
type: default
left:
Expand Down Expand Up @@ -307,7 +307,7 @@ navbar:
- text: News
href: news/index.html
- text: In English
href: https://rich-iannone.github.io/pointblank
href: https://rstudio.github.io/pointblank

development:
version_tooltip: "Development version"
2 changes: 1 addition & 1 deletion inst/translations/ES/vignettes/INFO-1.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ informant <-
section_name = "further information",
`examples and documentation` = "Examples for how to use the `info_*()` functions
(and many more) are available at the
[**pointblank** site](https://rich-iannone.github.io/pointblank/)."
[**pointblank** site](https://rstudio.github.io/pointblank/)."
)

informant
Expand Down
2 changes: 1 addition & 1 deletion inst/translations/ES/vignettes/VALID-V.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,6 @@ Just as with all the other workflows, the `tbl` supplied could be a data frame,

# Languages and Locales

The reporting generated by `scan_data()` can be presented in one of eight spoken languages: English (`"en"`, the default), French (`"fr"`), German (`"de"`), Italian (`"it"`), Spanish (`"es"`), Portuguese, (`"pt"`), Chinese (`"zh"`), and Russian (`"ru"`). These two-letter language codes can be used as an argument to the `lang` argument. When applied, all label text and other non-data elements will be set to the language of choice. We have checked the translations with native speakers of the respective languages but if you find an error that should be corrected, please [file an issue](https://github.com/rich-iannone/pointblank/issues).
The reporting generated by `scan_data()` can be presented in one of eight spoken languages: English (`"en"`, the default), French (`"fr"`), German (`"de"`), Italian (`"it"`), Spanish (`"es"`), Portuguese, (`"pt"`), Chinese (`"zh"`), and Russian (`"ru"`). These two-letter language codes can be used as an argument to the `lang` argument. When applied, all label text and other non-data elements will be set to the language of choice. We have checked the translations with native speakers of the respective languages but if you find an error that should be corrected, please [file an issue](https://github.com/rstudio/pointblank/issues).

Along with translations, numerical values that are generated as part of the reporting (e.g., table dimensions, summary statistics, etc.) are automatically formatted in the locale of the language (given in `lang`). This can be overridden with the `locale` argument which accepts a locale ID. Examples include `"en_US"` for English (United States) and `"fr_FR"` for French (France). More simply, this can be a language identifier without a country designation, like `"es"` for Spanish (Spain, same as `"es_ES"`). More than 700 locales are currently accepted.
2 changes: 1 addition & 1 deletion tests/testthat/_snaps/draft_validation.md
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@
Code
readLines(con = path) %>% paste0(collapse = "\n")
Output
[1] "library(pointblank)\n\nagent <-\n create_agent(\n tbl = ~ tbl,\n actions = action_levels(\n warn_at = 0.05,\n stop_at = 0.10\n ),\n tbl_name = \"tbl\",\n label = \"Validation plan generated by `draft_validation()`.\",\n lang = \"fr\",\n locale = \"fr\"\n ) %>%\n # Attendez-vous à ce que la colonne `a` soit de type: integer\n col_is_integer(\n columns = c(\"a\")\n ) %>%\n # Attendez-vous à ce que les valeurs de `a` soient comprises entre `1` et `8`\n col_vals_between(\n columns = c(\"a\"),\n left = 1,\n right = 8\n ) %>%\n # Attendez-vous à ce que la colonne `b` soit de type: character\n col_is_character(\n columns = c(\"b\")\n ) %>%\n # Attendez-vous à ce que la colonne `c` soit de type: numeric\n col_is_numeric(\n columns = c(\"c\")\n ) %>%\n # Attendez-vous à ce que les valeurs de `c` soient comprises entre `2` et `9`\n col_vals_between(\n columns = c(\"c\"),\n left = 2,\n right = 9,\n na_pass = TRUE\n ) %>%\n # Attendez-vous à ce que la colonne `d` soit de type: numeric\n col_is_numeric(\n columns = c(\"d\")\n ) %>%\n # Attendez-vous à ce que les valeurs de `d` soient comprises entre `108.34` et `9999.99`\n col_vals_between(\n columns = c(\"d\"),\n left = 108.34,\n right = 9999.99\n ) %>%\n # Attendez-vous à ce que la colonne `e` soit de type: logical\n col_is_logical(\n columns = c(\"e\")\n ) %>%\n # Attendez-vous à ce que la colonne `f` soit de type: character\n col_is_character(\n columns = c(\"f\")\n ) %>%\n # Attendez-vous à ce que les schémas de colonnes correspondent\n col_schema_match(\n schema = col_schema(\n date_time = c(\"POSIXct\", \"POSIXt\"),\n date = \"Date\",\n a = \"integer\",\n b = \"character\",\n c = \"numeric\",\n d = \"numeric\",\n e = \"logical\",\n f = \"character\"\n )\n ) %>%\n interrogate()\n\nagent"
[1] "library(pointblank)\n\nagent <-\n create_agent(\n tbl = ~ tbl,\n actions = action_levels(\n warn_at = 0.05,\n stop_at = 0.10\n ),\n tbl_name = \"tbl\",\n label = \"Validation plan generated by `draft_validation()`.\",\n lang = \"fr\",\n locale = \"fr\"\n ) %>%\n # On s'attend à ce que la colonne `a` soit de type : integer\n col_is_integer(\n columns = c(\"a\")\n ) %>%\n # On s'attend à ce que les valeurs de `a` soient comprises entre `1` et `8`\n col_vals_between(\n columns = c(\"a\"),\n left = 1,\n right = 8\n ) %>%\n # On s'attend à ce que la colonne `b` soit de type : character\n col_is_character(\n columns = c(\"b\")\n ) %>%\n # On s'attend à ce que la colonne `c` soit de type : numeric\n col_is_numeric(\n columns = c(\"c\")\n ) %>%\n # On s'attend à ce que les valeurs de `c` soient comprises entre `2` et `9`\n col_vals_between(\n columns = c(\"c\"),\n left = 2,\n right = 9,\n na_pass = TRUE\n ) %>%\n # On s'attend à ce que la colonne `d` soit de type : numeric\n col_is_numeric(\n columns = c(\"d\")\n ) %>%\n # On s'attend à ce que les valeurs de `d` soient comprises entre `108.34` et `9999.99`\n col_vals_between(\n columns = c(\"d\"),\n left = 108.34,\n right = 9999.99\n ) %>%\n # On s'attend à ce que la colonne `e` soit de type : logical\n col_is_logical(\n columns = c(\"e\")\n ) %>%\n # On s'attend à ce que la colonne `f` soit de type : character\n col_is_character(\n columns = c(\"f\")\n ) %>%\n # On s'attend à ce que les schémas de colonnes correspondent\n col_schema_match(\n schema = col_schema(\n date_time = c(\"POSIXct\", \"POSIXt\"),\n date = \"Date\",\n a = \"integer\",\n b = \"character\",\n c = \"numeric\",\n d = \"numeric\",\n e = \"logical\",\n f = \"character\"\n )\n ) %>%\n interrogate()\n\nagent"

---

Expand Down
5 changes: 5 additions & 0 deletions tests/testthat/helper.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,8 @@ expect_equal_unlist <- function(object, expected, ...) {
expected
)
}

skip_if_not_utf8 <- function() {
# likely on Windows for R < 4.2
skip_if_not(l10n_info()$`UTF-8`)
}
5 changes: 2 additions & 3 deletions tests/testthat/test-draft_validation.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
skip_on_os(os = "windows")

skip_on_cran()
work_path <- "./generated_r_files"

if (fs::dir_exists(path = work_path)) {
Expand Down Expand Up @@ -92,7 +91,7 @@ test_that("draft validations for data tables can be generated", {
})

test_that("draft validations for data tables can be generated in different languages", {

skip_if_not_utf8()
write_draft_snapshot_test(dataset = pointblank::small_table, filename = "st_en", lang = "en")
write_draft_snapshot_test(dataset = pointblank::small_table, filename = "st_fr", lang = "fr")
write_draft_snapshot_test(dataset = pointblank::small_table, filename = "st_de", lang = "de")
Expand Down
Loading