Skip to content

Commit

Permalink
Avoid string copies by implementing replace with match (chapel-lang#2…
Browse files Browse the repository at this point in the history
…4757)

This PR optimizes the 'replace' methods in Regex. It avoids string
copies that were coming from moving the data to and from a `std::string`
when calling RE2's Replace/GlobalReplace. It does so by using our own
implementation of Replace by calling RE2's Match function in the
re2-interface.cc code so that we can construct a result buffer by
repeatedly doubling & then provide that when constructing a Chapel
string.

While there, this removes the need for doReplaceAndCountSlow, since now
that we have our own implementation of the replace function, we can
handle a maximum number of matches right there. While here, I noticed a
bug in the old implementation, which this change fixes.

The bug:

``` chapel
use Regex;

writeln("bb".replace(new regex(""), "a"); // ababa
writeln("bb".replace(new regex(""), "a", count=5)); // was aaaaabb but now ababa
```

Work on this PR also revealed another bug described in chapel-lang#24788. Fixing
that is left as future work.

How does it affect performance? These measurements were collected on my
PC with `start_test -performance
test/studies/shootout/submitted/regexredux*.chpl --numtrials 4`.

| Benchmark    | main   | this PR | speedup    |
| ------------ | ------ | ------- | ---------- |
| regexredux2  | 1.89 s | 1.55 s  | 22% faster |
| regexredux3  | 1.64 s | 1.29 s  | 25% faster |

Reviewed by @DanilaFe - thanks!

- [x] full comm=none testing
- [x] valgrind testing for test/regex
  • Loading branch information
mppf authored Apr 8, 2024
2 parents f0bf31a + 71a5e0f commit 6da6595
Show file tree
Hide file tree
Showing 6 changed files with 250 additions and 112 deletions.
90 changes: 6 additions & 84 deletions modules/standard/Regex.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ extern record qio_regex_string_piece_t {
private extern proc qio_regex_string_piece_isnull(ref sp:qio_regex_string_piece_t):bool;

private extern proc qio_regex_match(const ref re:qio_regex_t, text:c_ptrConst(c_char), textlen:int(64), startpos:int(64), endpos:int(64), anchor:c_int, ref submatch:qio_regex_string_piece_t, nsubmatch:int(64)):bool;
private extern proc qio_regex_replace(const ref re:qio_regex_t, repl:c_ptrConst(c_char), repllen:int(64), text:c_ptrConst(c_char), textlen:int(64), startpos:int(64), endpos:int(64), global:bool, ref replaced:c_ptrConst(c_char), ref replaced_len:int(64)):int(64);
private extern proc qio_regex_replace(const ref re:qio_regex_t, repl:c_ptrConst(c_char), repllen:int(64), text:c_ptrConst(c_char), textlen:int(64), maxreplace:int(64), ref replaced:c_ptrConst(c_char), ref replaced_len:int(64)):int(64);

// These two could be folded together if we had a way
// to check if a default argument was supplied
Expand Down Expand Up @@ -1113,85 +1113,6 @@ proc bytes.replaceAndCount(pattern: regex(bytes), replacement:bytes,

private inline proc doReplaceAndCount(x: ?t, pattern: regex(t), replacement: t,
count=-1) where (t==string || t==bytes) {
if count<0 || count==1 then
return doReplaceAndCountFast(x, pattern, replacement, global=(count!=1));
else
return doReplaceAndCountSlow(x, pattern, replacement, count);

}

private proc doReplaceAndCountSlow(x: ?t, pattern: regex(t), replacement: t,
count=-1) where (t==string || t==bytes) {
use ByteBufferHelpers;

var regexCopy:regex(t);
if pattern.home != here then regexCopy = pattern;
const localRegex = if pattern.home != here then regexCopy._regex
else pattern._regex;

const localX = x.localize();

var matchesDom = {0..#initBufferSizeForSlowReplaceAndCount};
var matches: [matchesDom] qio_regex_string_piece_t;

var curIdx = 0;
var totalBytesToRemove = 0;
var totalChunksToRemove = 0;
for i in 0..<count {
if i == matchesDom.size then matchesDom = {0..#matchesDom.size*2};

var got = qio_regex_match(localRegex, localX.c_str(), x.numBytes,
startpos=curIdx, endpos=x.numBytes,
QIO_REGEX_ANCHOR_UNANCHORED, matches[i], 1);
if !got then break;

curIdx = matches[i].offset + matches[i].len;
totalBytesToRemove += matches[i].len;
totalChunksToRemove += 1;
}
if totalChunksToRemove == 0 then return (x,0);

const numBytesInResult = x.numBytes-totalBytesToRemove+
(totalChunksToRemove*replacement.numBytes);

var (newBuff, buffSize) = bufferAlloc(numBytesInResult+1);
newBuff[numBytesInResult] = 0;

const localRepl = replacement.localize();
var readIdx = 0;
var writeIdx = 0;
for i in 0..#totalChunksToRemove {
var readOffset = matches[i].offset;

// copy from the original string
const copyLen = readOffset-readIdx;
bufferMemcpyLocal(dst=newBuff, src=localX.buff, len=copyLen,
dst_off=writeIdx, src_off=readIdx);
writeIdx += copyLen;

// copy the replacement
bufferMemcpyLocal(dst=newBuff, src=localRepl.buff, len=localRepl.numBytes,
dst_off=writeIdx);
writeIdx += localRepl.numBytes;

readIdx = (readOffset+matches[i].len);
}

// handle the last part
if readIdx < localX.numBytes {
const copyLen = localX.numBytes-readIdx;
bufferMemcpyLocal(dst=newBuff, src=localX.buff, len=copyLen,
dst_off=writeIdx, src_off=readIdx);
}

var ret = try! t.createAdoptingBuffer(newBuff, length=numBytesInResult,
size=buffSize);

return (ret, totalChunksToRemove);
}

private proc doReplaceAndCountFast(x: ?t, pattern: regex(t), replacement: t,
global:bool) where (t==string || t==bytes) {
var regexCopy:regex(t);
if pattern.home != here then regexCopy = pattern;
const localRegex = if pattern.home != here then regexCopy._regex
Expand All @@ -1204,10 +1125,11 @@ private proc doReplaceAndCountFast(x: ?t, pattern: regex(t), replacement: t,

var replaced:c_ptrConst(c_char);
var replaced_len:int(64);
var nreplaced: int = qio_regex_replace(localRegex, replacement.localize().c_str(),
replacement.numBytes, x.localize().c_str(),
x.numBytes, pos:int, endpos:int, global,
replaced, replaced_len);
var nreplaced: int =
qio_regex_replace(localRegex, replacement.localize().c_str(),
replacement.numBytes, x.localize().c_str(),
x.numBytes, count,
replaced, replaced_len);

var ret = try! t.createAdoptingBuffer(replaced, replaced_len);

Expand Down
2 changes: 1 addition & 1 deletion runtime/include/qio/qio_regex.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ qio_regex_string_piece_isnull(qio_regex_string_piece_t* sp)
// Returns true if we matched.
qio_bool qio_regex_match(qio_regex_t* regex, const char* str, int64_t str_len, int64_t startpos, int64_t endpos, int anchor, qio_regex_string_piece_t* submatch, int64_t nsubmatch);

int64_t qio_regex_replace(qio_regex_t* regex, const char* repl, int64_t repl_len, const char* str, int64_t str_len, int64_t startpos, int64_t endpos, qio_bool global, const char** str_out, int64_t* len_out);
int64_t qio_regex_replace(qio_regex_t* regex, const char* repl, int64_t repl_len, const char* str, int64_t str_len, int64_t maxreplace, const char** str_out, int64_t* len_out);


// Returns ENOERR if we matched, EFORMAT if we did not, or an IO error.
Expand Down
213 changes: 187 additions & 26 deletions runtime/src/qio/regex/bundled/re2-interface.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@

#include <limits>
#include <pthread.h>
#include <stdlib.h>
#include <stdio.h>
#include <cstdlib>
#include <cstdio>

// We have run into issues when using our chpl-atomics.h file from C++ code
// under CHPL_ATOMICS=cstdlib. As a workaround, just use std::atomic and avoid
Expand All @@ -37,6 +37,7 @@
#include "qio.h" // for channel operations
#include "chpltypes.h" // must be before "error.h" to prevent include errors
#include "error.h"
#include "encoding/encoding-support.h"
#undef printf

#include "re2/re2.h"
Expand Down Expand Up @@ -227,14 +228,12 @@ void qio_regex_create_compile_flags(const char* str, int64_t str_len, const char
void qio_regex_retain(const qio_regex_t* compiled)
{
re_t* re = (re_t*) compiled->regex;
//fprintf(stdout, "Retain %p\n", re);
DO_RETAIN(re);
}

void qio_regex_release(qio_regex_t* compiled)
{
re_t* re = (re_t*) compiled->regex;
//fprintf(stdout, "Release %p\n", re);
DO_RELEASE(re, re_free);
compiled->regex = NULL;
}
Expand Down Expand Up @@ -320,32 +319,194 @@ qio_bool qio_regex_match(qio_regex_t* regex, const char* text, int64_t text_len,
return ret;
}

int64_t qio_regex_replace(qio_regex_t* regex, const char* repl, int64_t repl_len, const char* str, int64_t str_len, int64_t startpos, int64_t endpos, qio_bool global, const char** str_out, int64_t* len_out)
{
// This could make fewer copies of everything...
// ... but it will work for the moment and this is the
// expedient way.
StringPiece rewrite(repl, repl_len);
std::string s(str, str_len);
int64_t ret = 0;
if (RE2* re = (RE2*) regex->regex) {
char* output = NULL;
if( global ) {
ret = RE2::GlobalReplace(&s, *re, rewrite);

// returns true for OK
static bool append(char*& buf, // buffer
int64_t& buf_sz, // allocated size
int64_t& buf_len, // amount used
const char* data, // to append
int64_t data_len) {
if (data_len == 0)
return true; // nothing else to do

if (data_len < 0)
return false;

if (buf == NULL || buf_len + data_len > buf_sz) {
// reallocate buf
int64_t new_sz = buf_sz * 2;
if (new_sz < buf_len + data_len) {
new_sz = buf_len + data_len + 32;
}
if (new_sz < 64) {
new_sz = 64;
}
char* new_buf = (char*) qio_realloc((void*)buf, new_sz);
if (new_buf == NULL) return false; // failure to allocate
// record the new buffer and size for the caller
buf = new_buf;
buf_sz = new_sz;
}

// append the data
assert(buf_len >= 0 && data_len >= 0 && buf_sz >= 0);
assert(buf_len + data_len <= buf_sz);
memcpy(buf + buf_len, data, data_len);
buf_len += data_len;

return true; // everything is OK
}

static bool append_rewrite(char*& buf, // buffer
int64_t& buf_sz, // allocated size
int64_t& buf_len, // amount used
const StringPiece& rewrite,
const StringPiece* vec,
int veclen) {
for (const char *s = rewrite.data(), *end = s + rewrite.size();
s < end;
s++) {
if (*s != '\\') {
// append 1 byte
append(buf, buf_sz, buf_len, s, 1);
continue;
}
s++;
int c = (s < end) ? *s : EOF;
if (isdigit(c)) {
int n = (c - '0');
if (n >= veclen) {
return false;
}
StringPiece snip = vec[n];
if (!snip.empty())
append(buf, buf_sz, buf_len, snip.data(), snip.size());
} else if (c == '\\') {
const char* backslash = "\\";
append(buf, buf_sz, buf_len, backslash, 1);
} else {
if( RE2::Replace(&s, *re, rewrite) ) {
ret = 1;
} else {
ret = 0;
return false;
}
}
return true;
}

static int free_and_return_error(char* buf) {
qio_free((void*)buf);
return -1;
}

// these should match re2.cc
static const int kMaxArgs = 16;
static const int kVecSize = 1+kMaxArgs;

// this function is loosly based upon RE2::GlobalReplace
static int replace(const RE2& re, const StringPiece& rewrite,
const StringPiece& str,
int maxreplace,
const char** str_out, int64_t* len_out) {
StringPiece vec[kVecSize];
int nvec = 1 + RE2::MaxSubmatch(rewrite);
if (nvec > 1 + re.NumberOfCapturingGroups())
return -1;
if (nvec > static_cast<int>(sizeof(vec)/sizeof(StringPiece)))
return -1;

const char* p = str.data();
const char* ep = p + str.size();
const char* lastend = NULL;

char* buf = NULL;
int64_t buf_sz = 0;
int64_t buf_len = 0;

int count = 0;
while (p <= ep) {
// don't try to replace anything if we are replacing 0
if (maxreplace == 0)
break;

if (!re.Match(str, static_cast<size_t>(p - str.data()),
str.size(), RE2::UNANCHORED, vec, nvec))
break;

// append the data before the match
if (p < vec[0].data())
if (!append(buf, buf_sz, buf_len, p, vec[0].data() - p))
return free_and_return_error(buf);

if (vec[0].data() == lastend && vec[0].empty() && count > 0) {
// quit if we are at the end (trailing \0 added later)
if (p == ep) {
break;
}

// Disallow empty match at end of last match: skip ahead.
//
if (re.options().encoding() == RE2::Options::EncodingUTF8) {
// append the character
int nbytes = 0;
int32_t chr = 0;
int failed =
chpl_enc_decode_char_buf_utf8(&chr, &nbytes, p, ep - p, false);
if (failed == 0) {
if (!append(buf, buf_sz, buf_len, p, nbytes))
return free_and_return_error(buf);
p += nbytes;
continue;
}
}
// append a character; either not in UTF-8 mode or there
// is some UTF-8 encoding issue
if (p < ep)
if (!append(buf, buf_sz, buf_len, p, 1))
return free_and_return_error(buf);

p++;
continue;
}
output = (char*) qio_malloc(s.length()+1);
memcpy(output, s.data(), s.length());
output[s.length()] = '\0';
*str_out = output;
*len_out = s.length();

// append the rewrite
if (!append_rewrite(buf, buf_sz, buf_len, rewrite, vec, nvec))
return free_and_return_error(buf);

// continue with just after the match
p = vec[0].data() + vec[0].size();
lastend = p;
count++;

// stop if we have replaced too many
if (count == maxreplace)
break;
}
return ret;

// append anything after the last match
if (p < ep)
if (!append(buf, buf_sz, buf_len, p, ep - p))
return free_and_return_error(buf);

// append a null byte
const char* empty = "";
if (!append(buf, buf_sz, buf_len, empty, 1))
return free_and_return_error(buf);

// return buffer to caller
*str_out = buf;
*len_out = buf_len-1; // do not count trailing null byte

return count;
}

int64_t qio_regex_replace(qio_regex_t* regex, const char* repl, int64_t repl_len, const char* str, int64_t str_len, int64_t maxreplace, const char** str_out, int64_t* len_out)
{
if (RE2* re = (RE2*) regex->regex) {
StringPiece rewrite(repl, repl_len);
StringPiece s(str, str_len);
return replace(*re, rewrite, s,
maxreplace,
str_out, len_out);
}
return 0;
}

int qio_regex_channel_read_byte(qio_channel_s* ch);
Expand Down
2 changes: 1 addition & 1 deletion runtime/src/qio/regex/none/fail-regex.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ bool qio_regex_match(qio_regex_t* regex, const char* text, int64_t str_len, int6
return false;
}

int64_t qio_regex_replace(qio_regex_t* regex, const char* repl, int64_t repl_len, const char* str, int64_t str_len, int64_t startpos, int64_t endpos, qio_bool global, const char** str_out, int64_t* len_out)
int64_t qio_regex_replace(qio_regex_t* regex, const char* repl, int64_t repl_len, const char* str, int64_t str_len, int64_t maxreplace, const char** str_out, int64_t* len_out)
{
chpl_internal_error("No Regex Support");
return 0;
Expand Down
Loading

0 comments on commit 6da6595

Please sign in to comment.