Skip to content

Commit

Permalink
Migrate xml2_url.cpp
Browse files Browse the repository at this point in the history
  • Loading branch information
mgirlich committed Sep 4, 2023
1 parent 1fd06ac commit 48ab979
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 106 deletions.
20 changes: 10 additions & 10 deletions src/cpp11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -405,38 +405,38 @@ extern "C" SEXP _xml2_doc_validate(SEXP doc_sxp, SEXP schema_sxp) {
END_CPP11
}
// xml2_url.cpp
extern "C" SEXP url_absolute_(SEXP x_sxp, SEXP base_sxp);
cpp11::strings url_absolute_(cpp11::strings x_sxp, cpp11::strings base_sxp);
extern "C" SEXP _xml2_url_absolute_(SEXP x_sxp, SEXP base_sxp) {
BEGIN_CPP11
return cpp11::as_sexp(url_absolute_(cpp11::as_cpp<cpp11::decay_t<SEXP>>(x_sxp), cpp11::as_cpp<cpp11::decay_t<SEXP>>(base_sxp)));
return cpp11::as_sexp(url_absolute_(cpp11::as_cpp<cpp11::decay_t<cpp11::strings>>(x_sxp), cpp11::as_cpp<cpp11::decay_t<cpp11::strings>>(base_sxp)));
END_CPP11
}
// xml2_url.cpp
extern "C" SEXP url_relative_(SEXP x_sxp, SEXP base_sxp);
cpp11::strings url_relative_(cpp11::strings x_sxp, cpp11::strings base_sxp);
extern "C" SEXP _xml2_url_relative_(SEXP x_sxp, SEXP base_sxp) {
BEGIN_CPP11
return cpp11::as_sexp(url_relative_(cpp11::as_cpp<cpp11::decay_t<SEXP>>(x_sxp), cpp11::as_cpp<cpp11::decay_t<SEXP>>(base_sxp)));
return cpp11::as_sexp(url_relative_(cpp11::as_cpp<cpp11::decay_t<cpp11::strings>>(x_sxp), cpp11::as_cpp<cpp11::decay_t<cpp11::strings>>(base_sxp)));
END_CPP11
}
// xml2_url.cpp
extern "C" SEXP url_parse_(SEXP x_sxp);
cpp11::data_frame url_parse_(cpp11::strings x_sxp);
extern "C" SEXP _xml2_url_parse_(SEXP x_sxp) {
BEGIN_CPP11
return cpp11::as_sexp(url_parse_(cpp11::as_cpp<cpp11::decay_t<SEXP>>(x_sxp)));
return cpp11::as_sexp(url_parse_(cpp11::as_cpp<cpp11::decay_t<cpp11::strings>>(x_sxp)));
END_CPP11
}
// xml2_url.cpp
extern "C" SEXP url_escape_(SEXP x_sxp, SEXP reserved_sxp);
cpp11::strings url_escape_(cpp11::strings x_sxp, cpp11::strings reserved_sxp);
extern "C" SEXP _xml2_url_escape_(SEXP x_sxp, SEXP reserved_sxp) {
BEGIN_CPP11
return cpp11::as_sexp(url_escape_(cpp11::as_cpp<cpp11::decay_t<SEXP>>(x_sxp), cpp11::as_cpp<cpp11::decay_t<SEXP>>(reserved_sxp)));
return cpp11::as_sexp(url_escape_(cpp11::as_cpp<cpp11::decay_t<cpp11::strings>>(x_sxp), cpp11::as_cpp<cpp11::decay_t<cpp11::strings>>(reserved_sxp)));
END_CPP11
}
// xml2_url.cpp
extern "C" SEXP url_unescape_(SEXP x_sxp);
cpp11::strings url_unescape_(cpp11::strings x_sxp);
extern "C" SEXP _xml2_url_unescape_(SEXP x_sxp) {
BEGIN_CPP11
return cpp11::as_sexp(url_unescape_(cpp11::as_cpp<cpp11::decay_t<SEXP>>(x_sxp)));
return cpp11::as_sexp(url_unescape_(cpp11::as_cpp<cpp11::decay_t<cpp11::strings>>(x_sxp)));
END_CPP11
}
// xml2_xpath.cpp
Expand Down
143 changes: 59 additions & 84 deletions src/xml2_url.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,152 +7,127 @@
#include <libxml/uri.h>
#include "xml2_utils.h"

[[cpp11::register]]
extern "C" SEXP url_absolute_(SEXP x_sxp, SEXP base_sxp) {
R_xlen_t n = Rf_xlength(x_sxp);
SEXP out = PROTECT(Rf_allocVector(STRSXP, n));

if (Rf_xlength(base_sxp) > 1) {
Rf_error("Base URL must be length 1");
const xmlChar* to_xml_chr(cpp11::strings x, const char* arg) {
if (x.size() > 1) {
cpp11::stop("%s must be a character vector of length 1", arg);
}

const xmlChar* base_uri = (xmlChar*) Rf_translateCharUTF8(STRING_ELT(base_sxp, 0));
return (xmlChar*) cpp11::as_cpp<const char*>(x);
}

[[cpp11::register]]
cpp11::strings url_absolute_(cpp11::strings x_sxp, cpp11::strings base_sxp) {
int n = x_sxp.size();
cpp11::writable::strings out(n);

const xmlChar* base_uri = to_xml_chr(base_sxp, "Base URL");

for (int i = 0; i < n; ++i) {
const xmlChar* uri = (xmlChar*) Rf_translateCharUTF8(STRING_ELT(x_sxp, i));
SET_STRING_ELT(out, i, Xml2String(xmlBuildURI(uri, base_uri)).asRString());
const xmlChar* uri = (xmlChar*) Rf_translateCharUTF8(x_sxp[i]);
out[i] = Xml2String(xmlBuildURI(uri, base_uri)).asRString();
}

UNPROTECT(1);
return out;
}

[[cpp11::register]]
extern "C" SEXP url_relative_(SEXP x_sxp, SEXP base_sxp) {
R_xlen_t n = Rf_xlength(x_sxp);
SEXP out = PROTECT(Rf_allocVector(STRSXP, n));

if (Rf_xlength(base_sxp) > 1) {
Rf_error("Base URL must be length 1");
}
cpp11::strings url_relative_(cpp11::strings x_sxp, cpp11::strings base_sxp) {
int n = x_sxp.size();
cpp11::writable::strings out(n);

const xmlChar* base_uri = (xmlChar*) Rf_translateCharUTF8(STRING_ELT(base_sxp, 0));
const xmlChar* base_uri = to_xml_chr(base_sxp, "Base URL");

for (int i = 0; i < n; ++i) {
const xmlChar* uri = (xmlChar*) Rf_translateCharUTF8(STRING_ELT(x_sxp, i));
SET_STRING_ELT(out, i, Xml2String(xmlBuildRelativeURI(uri, base_uri)).asRString());
const xmlChar* uri = (xmlChar*) Rf_translateCharUTF8(x_sxp[i]);
out[i] = Xml2String(xmlBuildRelativeURI(uri, base_uri)).asRString();
}

UNPROTECT(1);
return out;
}

[[cpp11::register]]
extern "C" SEXP url_parse_(SEXP x_sxp) {
R_xlen_t n = Rf_xlength(x_sxp);
cpp11::data_frame url_parse_(cpp11::strings x_sxp) {
int n = x_sxp.size();

SEXP scheme = PROTECT(Rf_allocVector(STRSXP, n));
SEXP server = PROTECT(Rf_allocVector(STRSXP, n));
SEXP user = PROTECT(Rf_allocVector(STRSXP, n));
SEXP path = PROTECT(Rf_allocVector(STRSXP, n));
SEXP query = PROTECT(Rf_allocVector(STRSXP, n));
SEXP fragment = PROTECT(Rf_allocVector(STRSXP, n));
cpp11::writable::strings scheme(n);
cpp11::writable::strings server(n);
cpp11::writable::strings user(n);
cpp11::writable::strings path(n);
cpp11::writable::strings query(n);
cpp11::writable::strings fragment(n);

SEXP port = PROTECT(Rf_allocVector(INTSXP, n));
cpp11::writable::integers port(n);

for (int i = 0; i < n; ++i) {
const char* raw = Rf_translateCharUTF8(STRING_ELT(x_sxp, i));
const char* raw = Rf_translateCharUTF8(x_sxp[i]);
xmlURI* uri = xmlParseURI(raw);
if (uri == NULL) {
continue;
}

SET_STRING_ELT(scheme, i, Rf_mkChar(uri->scheme == NULL ? "" : uri->scheme));
SET_STRING_ELT(server, i, Rf_mkChar(uri->server == NULL ? "" : uri->server));
INTEGER(port)[i] = uri->port == 0 ? NA_INTEGER : uri->port;
SET_STRING_ELT(user, i, Rf_mkChar(uri->user == NULL ? "" : uri->user));
SET_STRING_ELT(path, i, Rf_mkChar(uri->path == NULL ? "" : uri->path));
SET_STRING_ELT(fragment, i, Rf_mkChar(uri->fragment == NULL ? "" : uri->fragment));
scheme[i] = uri->scheme == NULL ? "" : uri->scheme;
server[i] = uri->server == NULL ? "" : uri->server;
port[i] = uri->port == 0 ? NA_INTEGER : uri->port;
user[i] = uri->user == NULL ? "" : uri->user;
path[i] = uri->path == NULL ? "" : uri->path;
fragment[i] = uri->fragment == NULL ? "" : uri->fragment;

/* * *
* Thu Apr 26 10:36:26 CEST 2007 Daniel Veillard
* svn path=/trunk/; revision=3607
* https://github.com/GNOME/libxml2/commit/a1413b84f7163d57c6251d5f4251186368efd859
*/
#if defined(LIBXML_VERSION) && (LIBXML_VERSION >= 20629)
SET_STRING_ELT(query, i, Rf_mkChar(uri->query_raw == NULL ? "" : uri->query_raw));
query[i] = uri->query_raw == NULL ? "" : uri->query_raw;
#else
SET_STRING_ELT(query, i, Rf_mkChar(uri->query == NULL ? "" : uri->query));
query[i] = uri->query == NULL ? "" : uri->query;
#endif

xmlFreeURI(uri);
}

SEXP out = PROTECT(Rf_allocVector(VECSXP, 7));
SET_VECTOR_ELT(out, 0, scheme);
SET_VECTOR_ELT(out, 1, server);
SET_VECTOR_ELT(out, 2, port);
SET_VECTOR_ELT(out, 3, user);
SET_VECTOR_ELT(out, 4, path);
SET_VECTOR_ELT(out, 5, query);
SET_VECTOR_ELT(out, 6, fragment);

SEXP names = PROTECT(Rf_allocVector(STRSXP, 7));

SET_STRING_ELT(names, 0, Rf_mkChar("scheme"));
SET_STRING_ELT(names, 1, Rf_mkChar("server"));
SET_STRING_ELT(names, 2, Rf_mkChar("port"));
SET_STRING_ELT(names, 3, Rf_mkChar("user"));
SET_STRING_ELT(names, 4, Rf_mkChar("path"));
SET_STRING_ELT(names, 5, Rf_mkChar("query"));
SET_STRING_ELT(names, 6, Rf_mkChar("fragment"));

Rf_setAttrib(out, R_ClassSymbol, Rf_mkString("data.frame"));
Rf_setAttrib(out, R_NamesSymbol, names);
using namespace cpp11::literals;

SEXP row_names = PROTECT(Rf_allocVector(INTSXP, 2));
INTEGER(row_names)[0] = NA_INTEGER;
INTEGER(row_names)[1] = -n;
Rf_setAttrib(out, R_RowNamesSymbol, row_names);

UNPROTECT(10);
cpp11::writable::data_frame out({
"scheme"_nm = scheme,
"server"_nm = server,
"port"_nm = port,
"user"_nm = user,
"path"_nm = path,
"query"_nm = query,
"fragment"_nm = fragment,
});

return out;
}

[[cpp11::register]]
extern "C" SEXP url_escape_(SEXP x_sxp, SEXP reserved_sxp) {
R_xlen_t n = Rf_xlength(x_sxp);
SEXP out = PROTECT(Rf_allocVector(STRSXP, n));

if (Rf_xlength(reserved_sxp) != 1) {
Rf_error("`reserved` must be character vector of length 1");
}
cpp11::strings url_escape_(cpp11::strings x_sxp, cpp11::strings reserved_sxp) {
int n = x_sxp.size();
cpp11::writable::strings out(n);

xmlChar* xReserved = (xmlChar*) Rf_translateCharUTF8(STRING_ELT(reserved_sxp, 0));
const xmlChar* xReserved = to_xml_chr(reserved_sxp, "`reserved`");

for (int i = 0; i < n; ++i) {
const xmlChar* xx = (xmlChar*) Rf_translateCharUTF8(STRING_ELT(x_sxp, i));
SET_STRING_ELT(out, i, Xml2String(xmlURIEscapeStr(xx, xReserved)).asRString());
const xmlChar* xx = (xmlChar*) Rf_translateCharUTF8(x_sxp[i]);
out[i] = Xml2String(xmlURIEscapeStr(xx, xReserved)).asRString();
}

UNPROTECT(1);
return out;
}

[[cpp11::register]]
extern "C" SEXP url_unescape_(SEXP x_sxp) {
R_xlen_t n = Rf_xlength(x_sxp);
SEXP out = PROTECT(Rf_allocVector(STRSXP, n));
cpp11::strings url_unescape_(cpp11::strings x_sxp) {
int n = x_sxp.size();
cpp11::writable::strings out(n);

for (int i = 0; i < n; ++i) {
const char* xx = Rf_translateCharUTF8(STRING_ELT(x_sxp, i));
const char* xx = Rf_translateCharUTF8(x_sxp[i]);

char* unescaped = xmlURIUnescapeString(xx, 0, NULL);
SET_STRING_ELT(out, i, (unescaped == NULL) ? NA_STRING : Rf_mkCharCE(unescaped, CE_UTF8));
out[i] = (unescaped == NULL) ? cpp11::na<cpp11::r_string>() : cpp11::r_string(unescaped);
xmlFree(unescaped);
}

UNPROTECT(1);
return out;
}
17 changes: 17 additions & 0 deletions tests/testthat/_snaps/xml_url.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# url_absolute

Code
url_absolute(c(".", "..", "/", "/x"), c("http://hadley.nz/a/b/c/d",
"http://foo.bar"))
Condition
Error:
! Base URL must be a character vector of length 1

# url_escape

Code
url_escape("a b c", reserved = c("a", "b"))
Condition
Error:
! `reserved` must be a character vector of length 1

21 changes: 9 additions & 12 deletions tests/testthat/test-xml_url.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@ test_that("url_absolute", {
c("http://hadley.nz/a/b/c/", "http://hadley.nz/a/b/", "http://hadley.nz/", "http://hadley.nz/x")
)

expect_error(
url_absolute(c(".", "..", "/", "/x"), c("http://hadley.nz/a/b/c/d", "http://foo.bar")),
"Base URL must be length 1"
)
expect_snapshot(error = TRUE, {
url_absolute(c(".", "..", "/", "/x"), c("http://hadley.nz/a/b/c/d", "http://foo.bar"))
})
})

test_that("url_relative", {
Expand All @@ -34,10 +33,9 @@ test_that("url_relative", {
"../c"
)

expect_error(
url_relative("http://hadley.nz/a/c", c("http://hadley.nz/a/b/c/d", "http://foo.bar")),
"Base URL must be length 1"
)
expect_snapshot(error = TRUE, {
url_relative("http://hadley.nz/a/c", c("http://hadley.nz/a/b/c/d", "http://foo.bar"))
})
})

test_that("url_parse", {
Expand Down Expand Up @@ -75,10 +73,9 @@ test_that("url_parse", {
})

test_that("url_escape", {
expect_error(
url_escape("a b c", reserved = c("a", "b")),
"`reserved` must be character vector of length 1"
)
expect_snapshot(error = TRUE, {
url_escape("a b c", reserved = c("a", "b"))
})

expect_equal(
url_escape("a b c"),
Expand Down

0 comments on commit 48ab979

Please sign in to comment.