From cfc6f2abec098d38e6758347cd1b60bfcdbe72fc Mon Sep 17 00:00:00 2001
From: Andreas van Cranenburgh <andreas@unstable.nl>
Date: Thu, 30 Apr 2020 19:41:41 +0200
Subject: [PATCH] Add contains() method

- contains() works like match() but returns a bool to avoid creating a
  Match object. see #12
- add wrapper for re.Pattern so that contains() and count() methods are
  also available when falling back to re.
---
 src/compile.pxi |  4 +--
 src/pattern.pxi | 89 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/re2.pyx     |  9 ++++-
 tests/count.txt | 13 +++++---
 4 files changed, 108 insertions(+), 7 deletions(-)

diff --git a/src/compile.pxi b/src/compile.pxi
index f56af557..1e53f602 100644
--- a/src/compile.pxi
+++ b/src/compile.pxi
@@ -20,7 +20,7 @@ def _compile(object pattern, int flags=0, int max_mem=8388608):
         elif current_notification == FALLBACK_WARNING:
             warnings.warn("WARNING: Using re module. Reason: %s" % error_msg)
         try:
-            result = re.compile(pattern, flags)
+            result = PythonRePattern(pattern, flags)
         except re.error as err:
             raise RegexError(*err.args)
         return result
@@ -93,7 +93,7 @@ def _compile(object pattern, int flags=0, int max_mem=8388608):
             raise RegexError(error_msg)
         elif current_notification == FALLBACK_WARNING:
             warnings.warn("WARNING: Using re module. Reason: %s" % error_msg)
-        return re.compile(original_pattern, flags)
+        return PythonRePattern(original_pattern, flags)
 
     cdef Pattern pypattern = Pattern()
     cdef map[cpp_string, int] named_groups = re_pattern.NamedCapturingGroups()
diff --git a/src/pattern.pxi b/src/pattern.pxi
index 5c75de7b..0950db2b 100644
--- a/src/pattern.pxi
+++ b/src/pattern.pxi
@@ -78,6 +78,45 @@ cdef class Pattern:
             release_cstring(&buf)
         return m
 
+    def contains(self, object string, int pos=0, int endpos=-1):
+        """"contains(string[, pos[, endpos]]) --> bool."
+
+        Scan through string looking for a match, and return True or False."""
+        cdef char * cstring
+        cdef Py_ssize_t size
+        cdef Py_buffer buf
+        cdef int retval
+        cdef int encoded = 0
+        cdef StringPiece * sp
+
+        if 0 <= endpos <= pos:
+            return False
+
+        bytestr = unicode_to_bytes(string, &encoded, self.encoded)
+        if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1:
+            raise TypeError('expected string or buffer')
+        try:
+            if encoded == 2 and (pos or endpos != -1):
+                utf8indices(cstring, size, &pos, &endpos)
+            if pos > size:
+                return False
+            if 0 <= endpos < size:
+                size = endpos
+
+            sp = new StringPiece(cstring, size)
+            with nogil:
+                retval = self.re_pattern.Match(
+                        sp[0],
+                        pos,
+                        size,
+                        UNANCHORED,
+                        NULL,
+                        0)
+            del sp
+        finally:
+            release_cstring(&buf)
+        return retval != 0
+
     def count(self, object string, int pos=0, int endpos=-1):
         """Return number of non-overlapping matches of pattern in string."""
         cdef char * cstring
@@ -547,3 +586,53 @@ cdef class Pattern:
 
     def __dealloc__(self):
         del self.re_pattern
+
+
+class PythonRePattern:
+    """A wrapper for re.Pattern to support the extra methods defined by re2
+    (contains, count)."""
+    def __init__(self, pattern, flags=None):
+        self._pattern = re.compile(pattern, flags)
+        self.pattern = pattern
+        self.flags = flags
+        self.groupindex = self._pattern.groupindex
+        self.groups = self._pattern.groups
+
+    def contains(self, string):
+        return bool(self._pattern.search(string))
+
+    def count(self, string, pos=0, endpos=9223372036854775807):
+        return len(self._pattern.findall(string, pos, endpos))
+
+    def findall(self, string, pos=0, endpos=9223372036854775807):
+        return self._pattern.findall(string, pos, endpos)
+
+    def finditer(self, string, pos=0, endpos=9223372036854775807):
+        return self._pattern.finditer(string, pos, endpos)
+
+    def fullmatch(self, string, pos=0, endpos=9223372036854775807):
+        return self._pattern.fullmatch(string, pos, endpos)
+
+    def match(self, string, pos=0, endpos=9223372036854775807):
+        return self._pattern.match(string, pos, endpos)
+
+    def scanner(self, string, pos=0, endpos=9223372036854775807):
+        return self._pattern.scanner(string, pos, endpos)
+
+    def search(self, string, pos=0, endpos=9223372036854775807):
+        return self._pattern.search(string, pos, endpos)
+
+    def split(self, string, maxsplit=0):
+        return self._pattern.split(string, maxsplit)
+
+    def sub(self, repl, string, count=0):
+        return self._pattern.sub(repl, string, count)
+
+    def subn(self, repl, string, count=0):
+        return self._pattern.subn(repl, string, count)
+
+    def __repr__(self):
+        return repr(self._pattern)
+
+    def __reduce__(self):
+        return (self, (self.pattern, self.flags))
diff --git a/src/re2.pyx b/src/re2.pyx
index 36fe86b0..6638f5fb 100644
--- a/src/re2.pyx
+++ b/src/re2.pyx
@@ -72,7 +72,8 @@ This module exports the following functions::
     count     Count all occurrences of a pattern in a string.
     match     Match a regular expression pattern to the beginning of a string.
     fullmatch Match a regular expression pattern to all of a string.
-    search    Search a string for the presence of a pattern.
+    search    Search a string for a pattern and return Match object.
+    contains  Same as search, but only return bool.
     sub       Substitute occurrences of a pattern found in a string.
     subn      Same as sub, but also return the number of substitutions made.
     split     Split a string by the occurrences of a pattern.
@@ -170,6 +171,12 @@ def fullmatch(pattern, string, int flags=0):
     return compile(pattern, flags).fullmatch(string)
 
 
+def contains(pattern, string, int flags=0):
+    """Scan through string looking for a match to the pattern, returning
+    True or False."""
+    return compile(pattern, flags).contains(string)
+
+
 def finditer(pattern, string, int flags=0):
     """Yield all non-overlapping matches in the string.
 
diff --git a/tests/count.txt b/tests/count.txt
index f5ab6ced..3c848fb7 100644
--- a/tests/count.txt
+++ b/tests/count.txt
@@ -9,13 +9,10 @@ This one is from http://docs.python.org/library/re.html?#finding-all-adverbs:
     >>> re2.count(r"\w+ly", "He was carefully disguised but captured quickly by police.")
     2
 
-This one makes sure all groups are found:
+Groups should not affect count():
 
     >>> re2.count(r"(\w+)=(\d+)", "foo=1,foo=2")
     2
-
-When there's only one matched group, it should not be returned in a tuple:
-
     >>> re2.count(r"(\w)\w", "fx")
     1
 
@@ -31,3 +28,11 @@ A pattern matching an empty string:
 
     >>> re2.count("", "foo")
     4
+
+contains tests
+==============
+
+	>>> re2.contains('a', 'bbabb')
+	True
+	>>> re2.contains('a', 'bbbbb')
+	False