Skip to content

Commit

Permalink
Add contains() method
Browse files Browse the repository at this point in the history
- contains() works like match() but returns a bool to avoid creating a
  Match object. see #12
- add wrapper for re.Pattern so that contains() and count() methods are
  also available when falling back to re.
  • Loading branch information
andreasvc committed Apr 30, 2020
1 parent e05bad3 commit cfc6f2a
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 7 deletions.
4 changes: 2 additions & 2 deletions src/compile.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def _compile(object pattern, int flags=0, int max_mem=8388608):
elif current_notification == FALLBACK_WARNING:
warnings.warn("WARNING: Using re module. Reason: %s" % error_msg)
try:
result = re.compile(pattern, flags)
result = PythonRePattern(pattern, flags)
except re.error as err:
raise RegexError(*err.args)
return result
Expand Down Expand Up @@ -93,7 +93,7 @@ def _compile(object pattern, int flags=0, int max_mem=8388608):
raise RegexError(error_msg)
elif current_notification == FALLBACK_WARNING:
warnings.warn("WARNING: Using re module. Reason: %s" % error_msg)
return re.compile(original_pattern, flags)
return PythonRePattern(original_pattern, flags)

cdef Pattern pypattern = Pattern()
cdef map[cpp_string, int] named_groups = re_pattern.NamedCapturingGroups()
Expand Down
89 changes: 89 additions & 0 deletions src/pattern.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,45 @@ cdef class Pattern:
release_cstring(&buf)
return m

def contains(self, object string, int pos=0, int endpos=-1):
""""contains(string[, pos[, endpos]]) --> bool."
Scan through string looking for a match, and return True or False."""
cdef char * cstring
cdef Py_ssize_t size
cdef Py_buffer buf
cdef int retval
cdef int encoded = 0
cdef StringPiece * sp

if 0 <= endpos <= pos:
return False

bytestr = unicode_to_bytes(string, &encoded, self.encoded)
if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1:
raise TypeError('expected string or buffer')
try:
if encoded == 2 and (pos or endpos != -1):
utf8indices(cstring, size, &pos, &endpos)
if pos > size:
return False
if 0 <= endpos < size:
size = endpos

sp = new StringPiece(cstring, size)
with nogil:
retval = self.re_pattern.Match(
sp[0],
pos,
size,
UNANCHORED,
NULL,
0)
del sp
finally:
release_cstring(&buf)
return retval != 0

def count(self, object string, int pos=0, int endpos=-1):
"""Return number of non-overlapping matches of pattern in string."""
cdef char * cstring
Expand Down Expand Up @@ -547,3 +586,53 @@ cdef class Pattern:

def __dealloc__(self):
del self.re_pattern


class PythonRePattern:
"""A wrapper for re.Pattern to support the extra methods defined by re2
(contains, count)."""
def __init__(self, pattern, flags=None):
self._pattern = re.compile(pattern, flags)
self.pattern = pattern
self.flags = flags
self.groupindex = self._pattern.groupindex
self.groups = self._pattern.groups

def contains(self, string):
return bool(self._pattern.search(string))

def count(self, string, pos=0, endpos=9223372036854775807):
return len(self._pattern.findall(string, pos, endpos))

def findall(self, string, pos=0, endpos=9223372036854775807):
return self._pattern.findall(string, pos, endpos)

def finditer(self, string, pos=0, endpos=9223372036854775807):
return self._pattern.finditer(string, pos, endpos)

def fullmatch(self, string, pos=0, endpos=9223372036854775807):
return self._pattern.fullmatch(string, pos, endpos)

def match(self, string, pos=0, endpos=9223372036854775807):
return self._pattern.match(string, pos, endpos)

def scanner(self, string, pos=0, endpos=9223372036854775807):
return self._pattern.scanner(string, pos, endpos)

def search(self, string, pos=0, endpos=9223372036854775807):
return self._pattern.search(string, pos, endpos)

def split(self, string, maxsplit=0):
return self._pattern.split(string, maxsplit)

def sub(self, repl, string, count=0):
return self._pattern.sub(repl, string, count)

def subn(self, repl, string, count=0):
return self._pattern.subn(repl, string, count)

def __repr__(self):
return repr(self._pattern)

def __reduce__(self):
return (self, (self.pattern, self.flags))
9 changes: 8 additions & 1 deletion src/re2.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ This module exports the following functions::
count Count all occurrences of a pattern in a string.
match Match a regular expression pattern to the beginning of a string.
fullmatch Match a regular expression pattern to all of a string.
search Search a string for the presence of a pattern.
search Search a string for a pattern and return Match object.
contains Same as search, but only return bool.
sub Substitute occurrences of a pattern found in a string.
subn Same as sub, but also return the number of substitutions made.
split Split a string by the occurrences of a pattern.
Expand Down Expand Up @@ -170,6 +171,12 @@ def fullmatch(pattern, string, int flags=0):
return compile(pattern, flags).fullmatch(string)


def contains(pattern, string, int flags=0):
"""Scan through string looking for a match to the pattern, returning
True or False."""
return compile(pattern, flags).contains(string)


def finditer(pattern, string, int flags=0):
"""Yield all non-overlapping matches in the string.
Expand Down
13 changes: 9 additions & 4 deletions tests/count.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,10 @@ This one is from http://docs.python.org/library/re.html?#finding-all-adverbs:
>>> re2.count(r"\w+ly", "He was carefully disguised but captured quickly by police.")
2

This one makes sure all groups are found:
Groups should not affect count():

>>> re2.count(r"(\w+)=(\d+)", "foo=1,foo=2")
2

When there's only one matched group, it should not be returned in a tuple:

>>> re2.count(r"(\w)\w", "fx")
1

Expand All @@ -31,3 +28,11 @@ A pattern matching an empty string:

>>> re2.count("", "foo")
4

contains tests
==============

>>> re2.contains('a', 'bbabb')
True
>>> re2.contains('a', 'bbbbb')
False

0 comments on commit cfc6f2a

Please sign in to comment.