diff --git a/Dockerfile b/Dockerfile index 1af642f..357b17d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -132,6 +132,10 @@ RUN wget -q https://downloads.python.org/pypy/pypy3.6-v7.3.3-linux64.tar.bz2 -O ln -s /opt/pypy3/bin/pypy3 /usr/local/bin/pypy3 && \ rm pypy3.6-v7.3.3-linux64.tar.bz2 +## R +RUN apt-get install -yq --no-install-recommends \ + r-base r-cran-readr + ## Ruby RUN apt-get install -yq --no-install-recommends \ ruby-full diff --git a/r/README.md b/r/README.md new file mode 100644 index 0000000..77520cd --- /dev/null +++ b/r/README.md @@ -0,0 +1,8 @@ +# R Regex Benchmark + +## How to run + +```sh +# R +Rscript --vanilla benchmark.R +``` diff --git a/r/benchmark.R b/r/benchmark.R new file mode 100644 index 0000000..49591fd --- /dev/null +++ b/r/benchmark.R @@ -0,0 +1,28 @@ +#!/usr/bin/env Rscript +library(readr) + +measure <- function(data, pattern) { + start <- Sys.time() + res <- length(regmatches(data, gregexpr(pattern, data, perl = TRUE, useBytes = TRUE))[[1]]) + end <- Sys.time() + s <- end - start + out <- paste0(format(as.numeric(s)*1e3, digits = 3), " - ", res) + print(out) +} + +args = commandArgs(trailingOnly=TRUE) + +if (length(args)==0) { + stop("At least one argument must be supplied (input file).n", call.=FALSE) +} + +file_str <- read_file(args[1]) + +# Email +measure(file_str, r"{[\w\\.+-]+@[\w\.-]+\.[\w\.-]+}") + +# URI +measure(file_str, r"{[\w]+://[^/\s?#]+[^\s?#]+(?:\?[^\s#]*)?(?:#[^\s]*)?}") + +# IPv4 +measure(file_str, r"{(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])}") diff --git a/run-benchmarks.php b/run-benchmarks.php index 86756b0..78e35b0 100644 --- a/run-benchmarks.php +++ b/run-benchmarks.php @@ -46,6 +46,7 @@ 'Python 3' => 'python3.6 python/benchmark.py', 'Python PyPy2' => 'pypy2 python/benchmark.py', 'Python PyPy3' => 'pypy3 python/benchmark.py', + 'R' => 'Rscript --vanilla r/benchmark.R', 'Ruby' => 'ruby ruby/benchmark.rb', 'Rust' => 'rust/target/release/benchmark', ];