From e7a4804f1a4b8358bdd28d92045eab8dea067573 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh Date: Fri, 31 Jul 2015 23:51:37 +0200 Subject: [PATCH 001/114] implement finditer as generator --- src/re2.pyx | 93 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 70 insertions(+), 23 deletions(-) diff --git a/src/re2.pyx b/src/re2.pyx index 95b17c8e..71752944 100644 --- a/src/re2.pyx +++ b/src/re2.pyx @@ -32,8 +32,8 @@ def set_fallback_notification(level): """ Set the fallback notification to a level; one of: FALLBACK_QUIETLY - FALLBACK_WARNING - FALLBACK_EXCEPTION + FALLBACK_WARNING + FALLBACK_EXCEPTION """ global current_notification level = int(level) @@ -462,8 +462,11 @@ cdef class Pattern: print cpp_to_pystring(s[0]) + "\n" sys.stdout.flush() - - cdef _finditer(self, object string, int pos=0, int endpos=-1, int as_match=0): + def finditer(self, object string, int pos=0, int endpos=-1): + """ + Yield all non-overlapping matches of pattern in string as Match + objects. + """ cdef Py_ssize_t size cdef int result cdef char * cstring @@ -485,11 +488,18 @@ cdef class Pattern: while True: m = Match(self, self.ngroups + 1) with nogil: - result = self.re_pattern.Match(sp[0], pos, size, _re2.UNANCHORED, m.matches, self.ngroups + 1) + result = self.re_pattern.Match( + sp[0], + pos, + size, + _re2.UNANCHORED, + m.matches, + self.ngroups + 1) if result == 0: break m.encoded = encoded - m.named_groups = _re2.addressof(self.re_pattern.NamedCapturingGroups()) + m.named_groups = _re2.addressof( + self.re_pattern.NamedCapturingGroups()) m.nmatches = self.ngroups + 1 m.match_string = string m._pos = pos @@ -497,13 +507,7 @@ cdef class Pattern: m._endpos = len(string) else: m._endpos = endpos - if as_match: - if self.ngroups > 1: - resultlist.append(m.groups("")) - else: - resultlist.append(m.group(self.ngroups)) - else: - resultlist.append(m) + yield m if pos == size: break # offset the pos to move to the next point @@ -512,22 +516,65 @@ cdef class Pattern: else: pos = m.matches[0].data() - cstring + m.matches[0].length() del sp - return resultlist - - def finditer(self, object string, int pos=0, int endpos=-1): - """ - Return all non-overlapping matches of pattern in string as a list - of match objects. - """ - # TODO This builds a list and returns its iterator. Probably could be more memory efficient - return self._finditer(string, pos, endpos, 0).__iter__() def findall(self, object string, int pos=0, int endpos=-1): """ Return all non-overlapping matches of pattern in string as a list of strings. """ - return self._finditer(string, pos, endpos, 1) + cdef Py_ssize_t size + cdef int result + cdef char * cstring + cdef _re2.StringPiece * sp + cdef Match m + cdef list resultlist = [] + cdef int encoded = 0 + + string = unicode_to_bytestring(string, &encoded) + if pystring_to_bytestring(string, &cstring, &size) == -1: + raise TypeError("expected string or buffer") + encoded = encoded + + if endpos != -1 and endpos < size: + size = endpos + + sp = new _re2.StringPiece(cstring, size) + + while True: + m = Match(self, self.ngroups + 1) + with nogil: + result = self.re_pattern.Match( + sp[0], + pos, + size, + _re2.UNANCHORED, + m.matches, + self.ngroups + 1) + if result == 0: + break + m.encoded = encoded + m.named_groups = _re2.addressof( + self.re_pattern.NamedCapturingGroups()) + m.nmatches = self.ngroups + 1 + m.match_string = string + m._pos = pos + if endpos == -1: + m._endpos = len(string) + else: + m._endpos = endpos + if self.ngroups > 1: + resultlist.append(m.groups("")) + else: + resultlist.append(m.group(self.ngroups)) + if pos == size: + break + # offset the pos to move to the next point + if m.matches[0].length() == 0: + pos += 1 + else: + pos = m.matches[0].data() - cstring + m.matches[0].length() + del sp + return resultlist def split(self, string, int maxsplit=0): """ From 2d86f8e9f863bd4b957ab31e7fcae92f97b0a726 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh Date: Sat, 1 Aug 2015 22:58:29 +0200 Subject: [PATCH 002/114] Python 2/3 compatibility, support buffer objects, &c. - Python 2/3 compatibility - support searching in buffer objects (e.g., mmap) - add module docstring - some refactoring - remove outdated Cython-generated file - modify setup.py to cythonize as needed. --- setup.py | 49 +- src/re2.cpp | 15724 --------------------------------------- src/re2.pyx | 817 +- tests/findall.txt | 1 + tests/finditer.txt | 10 +- tests/issue4.txt | 21 +- tests/match_expand.txt | 9 +- tests/mmap.txt | 17 + tests/namedgroups.txt | 17 +- tests/pattern.txt | 1 + tests/performance.py | 9 +- tests/re2_test.py | 2 +- tests/search.txt | 15 +- tests/split.txt | 16 +- tests/sub.txt | 7 +- tests/test_re.py | 29 +- tests/unicode.txt | 93 +- 17 files changed, 621 insertions(+), 16216 deletions(-) delete mode 100644 src/re2.cpp mode change 100755 => 100644 tests/finditer.txt create mode 100644 tests/mmap.txt diff --git a/setup.py b/setup.py index 580824e0..c90f4438 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import re from distutils.core import setup, Extension, Command -MINIMUM_CYTHON_VERSION = '0.13' +MINIMUM_CYTHON_VERSION = '0.15' def cmp(a, b): @@ -33,19 +33,24 @@ def normalize(v): cmdclass = {'test': TestCommand} ext_files = [] -if '--cython' in sys.argv[1:]: +if '--cython' in sys.argv[1:] or not os.path.exists('src/re2.cpp'): # Using Cython - sys.argv.remove('--cython') + try: + sys.argv.remove('--cython') + except ValueError: + pass from Cython.Compiler.Main import Version if version_compare(MINIMUM_CYTHON_VERSION, Version.version) > 0: raise ValueError("Cython is version %s, but needs to be at least %s." % (Version.version, MINIMUM_CYTHON_VERSION)) from Cython.Distutils import build_ext + from Cython.Build import cythonize cmdclass['build_ext'] = build_ext - ext_files.append("src/re2.pyx") + use_cython = True else: # Building from C ext_files.append("src/re2.cpp") + use_cython = False # Locate the re2 module @@ -77,6 +82,30 @@ def get_authors(): return ', '.join(authors) def main(): + include_dirs = [os.path.join(re2_prefix, "include")] if re2_prefix else [] + libraries = ["re2"] + library_dirs = [os.path.join(re2_prefix, "lib")] if re2_prefix else [] + runtime_library_dirs = [os.path.join(re2_prefix, "lib") + ] if re2_prefix else [] + ext_modules = [ + Extension( + "re2", + sources=["src/re2.pyx" if use_cython else "src/re2.cpp"], + language="c++", + include_dirs=include_dirs, + libraries=libraries, + library_dirs=library_dirs, + runtime_library_dirs=runtime_library_dirs, + )] + if use_cython: + ext_modules = cythonize(ext_modules, + language_level=3, + annotate=True, + compiler_directives={ + 'embedsignature': True, + 'warn.unused': True, + 'warn.unreachable': True, + }) setup( name="re2", version="0.2.23", @@ -86,17 +115,7 @@ def main(): license="New BSD License", author_email = "mike@axiak.net", url = "http://github.com/axiak/pyre2/", - ext_modules = [ - Extension( - "re2", - ext_files, - language="c++", - include_dirs=[os.path.join(re2_prefix, "include")] if re2_prefix else [], - libraries=["re2"], - library_dirs=[os.path.join(re2_prefix, "lib")] if re2_prefix else [], - runtime_library_dirs=[os.path.join(re2_prefix, "lib")] if re2_prefix else [], - ) - ], + ext_modules = ext_modules, cmdclass=cmdclass, classifiers = [ 'License :: OSI Approved :: BSD License', diff --git a/src/re2.cpp b/src/re2.cpp deleted file mode 100644 index 7bf4893e..00000000 --- a/src/re2.cpp +++ /dev/null @@ -1,15724 +0,0 @@ -/* Generated by Cython 0.13 on Fri May 15 15:29:13 2015 */ - -#define PY_SSIZE_T_CLEAN -#include "Python.h" -#ifndef Py_PYTHON_H - #error Python headers needed to compile C extensions, please install development version of Python. -#else - -#include /* For offsetof */ -#ifndef offsetof -#define offsetof(type, member) ( (size_t) & ((type*)0) -> member ) -#endif - -#if !defined(WIN32) && !defined(MS_WINDOWS) - #ifndef __stdcall - #define __stdcall - #endif - #ifndef __cdecl - #define __cdecl - #endif - #ifndef __fastcall - #define __fastcall - #endif -#endif - -#ifndef DL_IMPORT - #define DL_IMPORT(t) t -#endif -#ifndef DL_EXPORT - #define DL_EXPORT(t) t -#endif - -#ifndef PY_LONG_LONG - #define PY_LONG_LONG LONG_LONG -#endif - -#if PY_VERSION_HEX < 0x02040000 - #define METH_COEXIST 0 - #define PyDict_CheckExact(op) (Py_TYPE(op) == &PyDict_Type) - #define PyDict_Contains(d,o) PySequence_Contains(d,o) -#endif - -#if PY_VERSION_HEX < 0x02050000 - typedef int Py_ssize_t; - #define PY_SSIZE_T_MAX INT_MAX - #define PY_SSIZE_T_MIN INT_MIN - #define PY_FORMAT_SIZE_T "" - #define PyInt_FromSsize_t(z) PyInt_FromLong(z) - #define PyInt_AsSsize_t(o) PyInt_AsLong(o) - #define PyNumber_Index(o) PyNumber_Int(o) - #define PyIndex_Check(o) PyNumber_Check(o) - #define PyErr_WarnEx(category, message, stacklevel) PyErr_Warn(category, message) -#endif - -#if PY_VERSION_HEX < 0x02060000 - #define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt) - #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) - #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size) - #define PyVarObject_HEAD_INIT(type, size) \ - PyObject_HEAD_INIT(type) size, - #define PyType_Modified(t) - - typedef struct { - void *buf; - PyObject *obj; - Py_ssize_t len; - Py_ssize_t itemsize; - int readonly; - int ndim; - char *format; - Py_ssize_t *shape; - Py_ssize_t *strides; - Py_ssize_t *suboffsets; - void *internal; - } Py_buffer; - - #define PyBUF_SIMPLE 0 - #define PyBUF_WRITABLE 0x0001 - #define PyBUF_FORMAT 0x0004 - #define PyBUF_ND 0x0008 - #define PyBUF_STRIDES (0x0010 | PyBUF_ND) - #define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES) - #define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES) - #define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES) - #define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES) - -#endif - -#if PY_MAJOR_VERSION < 3 - #define __Pyx_BUILTIN_MODULE_NAME "__builtin__" -#else - #define __Pyx_BUILTIN_MODULE_NAME "builtins" -#endif - -#if PY_MAJOR_VERSION >= 3 - #define Py_TPFLAGS_CHECKTYPES 0 - #define Py_TPFLAGS_HAVE_INDEX 0 -#endif - -#if (PY_VERSION_HEX < 0x02060000) || (PY_MAJOR_VERSION >= 3) - #define Py_TPFLAGS_HAVE_NEWBUFFER 0 -#endif - -#if PY_MAJOR_VERSION >= 3 - #define PyBaseString_Type PyUnicode_Type - #define PyStringObject PyUnicodeObject - #define PyString_Type PyUnicode_Type - #define PyString_Check PyUnicode_Check - #define PyString_CheckExact PyUnicode_CheckExact -#endif - -#if PY_VERSION_HEX < 0x02060000 - #define PyBytesObject PyStringObject - #define PyBytes_Type PyString_Type - #define PyBytes_Check PyString_Check - #define PyBytes_CheckExact PyString_CheckExact - #define PyBytes_FromString PyString_FromString - #define PyBytes_FromStringAndSize PyString_FromStringAndSize - #define PyBytes_FromFormat PyString_FromFormat - #define PyBytes_DecodeEscape PyString_DecodeEscape - #define PyBytes_AsString PyString_AsString - #define PyBytes_AsStringAndSize PyString_AsStringAndSize - #define PyBytes_Size PyString_Size - #define PyBytes_AS_STRING PyString_AS_STRING - #define PyBytes_GET_SIZE PyString_GET_SIZE - #define PyBytes_Repr PyString_Repr - #define PyBytes_Concat PyString_Concat - #define PyBytes_ConcatAndDel PyString_ConcatAndDel - #define PySet_Check(obj) PyObject_TypeCheck(obj, &PySet_Type) - #define PyFrozenSet_Check(obj) PyObject_TypeCheck(obj, &PyFrozenSet_Type) -#endif - -#ifndef PySet_CheckExact -# define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type) -#endif - -#if PY_MAJOR_VERSION >= 3 - #define PyInt_Type PyLong_Type - #define PyInt_Check(op) PyLong_Check(op) - #define PyInt_CheckExact(op) PyLong_CheckExact(op) - #define PyInt_FromString PyLong_FromString - #define PyInt_FromUnicode PyLong_FromUnicode - #define PyInt_FromLong PyLong_FromLong - #define PyInt_FromSize_t PyLong_FromSize_t - #define PyInt_FromSsize_t PyLong_FromSsize_t - #define PyInt_AsLong PyLong_AsLong - #define PyInt_AS_LONG PyLong_AS_LONG - #define PyInt_AsSsize_t PyLong_AsSsize_t - #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask - #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask -#endif - -#if PY_MAJOR_VERSION >= 3 - #define PyBoolObject PyLongObject -#endif - - -#if PY_MAJOR_VERSION >= 3 - #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y) - #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y) -#else - #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y) - #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y) -#endif - -#if PY_MAJOR_VERSION >= 3 - #define PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func)) -#endif - -#if PY_VERSION_HEX < 0x02050000 - #define __Pyx_GetAttrString(o,n) PyObject_GetAttrString((o),((char *)(n))) - #define __Pyx_SetAttrString(o,n,a) PyObject_SetAttrString((o),((char *)(n)),(a)) - #define __Pyx_DelAttrString(o,n) PyObject_DelAttrString((o),((char *)(n))) -#else - #define __Pyx_GetAttrString(o,n) PyObject_GetAttrString((o),(n)) - #define __Pyx_SetAttrString(o,n,a) PyObject_SetAttrString((o),(n),(a)) - #define __Pyx_DelAttrString(o,n) PyObject_DelAttrString((o),(n)) -#endif - -#if PY_VERSION_HEX < 0x02050000 - #define __Pyx_NAMESTR(n) ((char *)(n)) - #define __Pyx_DOCSTR(n) ((char *)(n)) -#else - #define __Pyx_NAMESTR(n) (n) - #define __Pyx_DOCSTR(n) (n) -#endif - -#ifdef __cplusplus -#define __PYX_EXTERN_C extern "C" -#else -#define __PYX_EXTERN_C extern -#endif - -#if defined(WIN32) || defined(MS_WINDOWS) -#define _USE_MATH_DEFINES -#endif -#include -#define __PYX_HAVE_API__re2 -#include -#include -#include "re2/stringpiece.h" -#include "re2/re2.h" -#include "_re2macros.h" - -/* inline attribute */ -#ifndef CYTHON_INLINE - #if defined(__GNUC__) - #define CYTHON_INLINE __inline__ - #elif defined(_MSC_VER) - #define CYTHON_INLINE __inline - #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - #define CYTHON_INLINE inline - #else - #define CYTHON_INLINE - #endif -#endif - -/* unused attribute */ -#ifndef CYTHON_UNUSED -# if defined(__GNUC__) -# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) -# define CYTHON_UNUSED __attribute__ ((__unused__)) -# else -# define CYTHON_UNUSED -# endif -# elif defined(__ICC) || defined(__INTEL_COMPILER) -# define CYTHON_UNUSED __attribute__ ((__unused__)) -# else -# define CYTHON_UNUSED -# endif -#endif - -typedef struct {PyObject **p; char *s; const long n; const char* encoding; const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; /*proto*/ - - -/* Type Conversion Predeclarations */ - -#define __Pyx_PyBytes_FromUString(s) PyBytes_FromString((char*)s) -#define __Pyx_PyBytes_AsUString(s) ((unsigned char*) PyBytes_AsString(s)) - -#define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False)) -static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); -static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x); - -static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); -static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t); -static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject*); - -#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x)) - - -#ifdef __GNUC__ -/* Test for GCC > 2.95 */ -#if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)) -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) -#else /* __GNUC__ > 2 ... */ -#define likely(x) (x) -#define unlikely(x) (x) -#endif /* __GNUC__ > 2 ... */ -#else /* __GNUC__ */ -#define likely(x) (x) -#define unlikely(x) (x) -#endif /* __GNUC__ */ - -static PyObject *__pyx_m; -static PyObject *__pyx_b; -static PyObject *__pyx_empty_tuple; -static PyObject *__pyx_empty_bytes; -static int __pyx_lineno; -static int __pyx_clineno = 0; -static const char * __pyx_cfilenm= __FILE__; -static const char *__pyx_filename; - - -static const char *__pyx_f[] = { - "re2.pyx", -}; - -/* Type declarations */ - -/* "/Users/maxiak/pyre2/src/re2.pyx":462 - * - * - * cdef _finditer(self, object string, int pos=0, int endpos=-1, int as_match=0): # <<<<<<<<<<<<<< - * cdef Py_ssize_t size - * cdef int result - */ - -struct __pyx_opt_args_3re2_7Pattern__finditer { - int __pyx_n; - int pos; - int endpos; - int as_match; -}; - -/* "/Users/maxiak/pyre2/src/re2.pyx":374 - * - * - * cdef class Pattern: # <<<<<<<<<<<<<< - * cdef _re2.RE2 * re_pattern - * cdef int ngroups - */ - -struct __pyx_obj_3re2_Pattern { - PyObject_HEAD - struct __pyx_vtabstruct_3re2_Pattern *__pyx_vtab; - re2::RE2 *re_pattern; - int ngroups; - int encoded; - int _flags; - PyObject *pattern; - PyObject *__weakref__; -}; - -/* "/Users/maxiak/pyre2/src/re2.pyx":97 - * cdef void emit_endif "#endif //" () - * - * cdef class Match: # <<<<<<<<<<<<<< - * cdef _re2.StringPiece * matches - * cdef _re2.const_stringintmap * named_groups - */ - -struct __pyx_obj_3re2_Match { - PyObject_HEAD - struct __pyx_vtabstruct_3re2_Match *__pyx_vtab; - re2::StringPiece *matches; - const std::map *named_groups; - int encoded; - int _lastindex; - int nmatches; - int _pos; - int _endpos; - PyObject *match_string; - PyObject *_pattern_object; - PyObject *_groups; - PyObject *_spans; - PyObject *_named_groups; - PyObject *_named_indexes; -}; - - -struct __pyx_vtabstruct_3re2_Match { - PyObject *(*init_groups)(struct __pyx_obj_3re2_Match *); - PyObject *(*_convert_positions)(struct __pyx_obj_3re2_Match *, PyObject *); - PyObject *(*_make_spans)(struct __pyx_obj_3re2_Match *); -}; -static struct __pyx_vtabstruct_3re2_Match *__pyx_vtabptr_3re2_Match; - - -/* "/Users/maxiak/pyre2/src/re2.pyx":374 - * - * - * cdef class Pattern: # <<<<<<<<<<<<<< - * cdef _re2.RE2 * re_pattern - * cdef int ngroups - */ - -struct __pyx_vtabstruct_3re2_Pattern { - PyObject *(*_search)(struct __pyx_obj_3re2_Pattern *, PyObject *, int, int, RE2::Anchor); - PyObject *(*_print_pattern)(struct __pyx_obj_3re2_Pattern *); - PyObject *(*_finditer)(struct __pyx_obj_3re2_Pattern *, PyObject *, struct __pyx_opt_args_3re2_7Pattern__finditer *__pyx_optional_args); -}; -static struct __pyx_vtabstruct_3re2_Pattern *__pyx_vtabptr_3re2_Pattern; - -#ifndef CYTHON_REFNANNY - #define CYTHON_REFNANNY 0 -#endif - -#if CYTHON_REFNANNY - typedef struct { - void (*INCREF)(void*, PyObject*, int); - void (*DECREF)(void*, PyObject*, int); - void (*GOTREF)(void*, PyObject*, int); - void (*GIVEREF)(void*, PyObject*, int); - void* (*SetupContext)(const char*, int, const char*); - void (*FinishContext)(void**); - } __Pyx_RefNannyAPIStruct; - static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL; - static __Pyx_RefNannyAPIStruct * __Pyx_RefNannyImportAPI(const char *modname) { - PyObject *m = NULL, *p = NULL; - void *r = NULL; - m = PyImport_ImportModule((char *)modname); - if (!m) goto end; - p = PyObject_GetAttrString(m, (char *)"RefNannyAPI"); - if (!p) goto end; - r = PyLong_AsVoidPtr(p); - end: - Py_XDECREF(p); - Py_XDECREF(m); - return (__Pyx_RefNannyAPIStruct *)r; - } - #define __Pyx_RefNannySetupContext(name) void *__pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__) - #define __Pyx_RefNannyFinishContext() __Pyx_RefNanny->FinishContext(&__pyx_refnanny) - #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__) - #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__) - #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__) - #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__) - #define __Pyx_XDECREF(r) do { if((r) != NULL) {__Pyx_DECREF(r);} } while(0) -#else - #define __Pyx_RefNannySetupContext(name) - #define __Pyx_RefNannyFinishContext() - #define __Pyx_INCREF(r) Py_INCREF(r) - #define __Pyx_DECREF(r) Py_DECREF(r) - #define __Pyx_GOTREF(r) - #define __Pyx_GIVEREF(r) - #define __Pyx_XDECREF(r) Py_XDECREF(r) -#endif /* CYTHON_REFNANNY */ -#define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);} } while(0) -#define __Pyx_XGOTREF(r) do { if((r) != NULL) {__Pyx_GOTREF(r);} } while(0) - -static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name); /*proto*/ - -static void __Pyx_RaiseDoubleKeywordsError( - const char* func_name, PyObject* kw_name); /*proto*/ - -static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact, - Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); /*proto*/ - -static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[], PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args, const char* function_name); /*proto*/ - -static CYTHON_INLINE int __Pyx_CheckKeywordStrings(PyObject *kwdict, - const char* function_name, int kw_allowed); /*proto*/ - - -static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j) { - PyObject *r; - if (!j) return NULL; - r = PyObject_GetItem(o, j); - Py_DECREF(j); - return r; -} - - -#define __Pyx_GetItemInt_List(o, i, size, to_py_func) (((size) <= sizeof(Py_ssize_t)) ? \ - __Pyx_GetItemInt_List_Fast(o, i) : \ - __Pyx_GetItemInt_Generic(o, to_py_func(i))) - -static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i) { - if (likely(o != Py_None)) { - if (likely((0 <= i) & (i < PyList_GET_SIZE(o)))) { - PyObject *r = PyList_GET_ITEM(o, i); - Py_INCREF(r); - return r; - } - else if ((-PyList_GET_SIZE(o) <= i) & (i < 0)) { - PyObject *r = PyList_GET_ITEM(o, PyList_GET_SIZE(o) + i); - Py_INCREF(r); - return r; - } - } - return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); -} - -#define __Pyx_GetItemInt_Tuple(o, i, size, to_py_func) (((size) <= sizeof(Py_ssize_t)) ? \ - __Pyx_GetItemInt_Tuple_Fast(o, i) : \ - __Pyx_GetItemInt_Generic(o, to_py_func(i))) - -static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i) { - if (likely(o != Py_None)) { - if (likely((0 <= i) & (i < PyTuple_GET_SIZE(o)))) { - PyObject *r = PyTuple_GET_ITEM(o, i); - Py_INCREF(r); - return r; - } - else if ((-PyTuple_GET_SIZE(o) <= i) & (i < 0)) { - PyObject *r = PyTuple_GET_ITEM(o, PyTuple_GET_SIZE(o) + i); - Py_INCREF(r); - return r; - } - } - return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); -} - - -#define __Pyx_GetItemInt(o, i, size, to_py_func) (((size) <= sizeof(Py_ssize_t)) ? \ - __Pyx_GetItemInt_Fast(o, i) : \ - __Pyx_GetItemInt_Generic(o, to_py_func(i))) - -static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i) { - PyObject *r; - if (PyList_CheckExact(o) && ((0 <= i) & (i < PyList_GET_SIZE(o)))) { - r = PyList_GET_ITEM(o, i); - Py_INCREF(r); - } - else if (PyTuple_CheckExact(o) && ((0 <= i) & (i < PyTuple_GET_SIZE(o)))) { - r = PyTuple_GET_ITEM(o, i); - Py_INCREF(r); - } - else if (Py_TYPE(o)->tp_as_sequence && Py_TYPE(o)->tp_as_sequence->sq_item && (likely(i >= 0))) { - r = PySequence_GetItem(o, i); - } - else { - r = __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); - } - return r; -} - -static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index); - -static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected); - -static PyObject *__Pyx_UnpackItem(PyObject *, Py_ssize_t index); /*proto*/ -static int __Pyx_EndUnpack(PyObject *, Py_ssize_t expected); /*proto*/ - -static CYTHON_INLINE void __Pyx_RaiseNoneIndexingError(void); - - -#if PY_MAJOR_VERSION >= 3 -static PyObject *__Pyx_PyDict_GetItem(PyObject *d, PyObject* key) { - PyObject *value; - if (unlikely(d == Py_None)) { - __Pyx_RaiseNoneIndexingError(); - return NULL; - } - value = PyDict_GetItemWithError(d, key); - if (unlikely(!value)) { - if (!PyErr_Occurred()) - PyErr_SetObject(PyExc_KeyError, key); - return NULL; - } - Py_INCREF(value); - return value; -} -#else - #define __Pyx_PyDict_GetItem(d, key) PyObject_GetItem(d, key) -#endif - -static CYTHON_INLINE PyObject* __Pyx_PyObject_Append(PyObject* L, PyObject* x) { - if (likely(PyList_CheckExact(L))) { - if (PyList_Append(L, x) < 0) return NULL; - Py_INCREF(Py_None); - return Py_None; /* this is just to have an accurate signature */ - } - else { - PyObject *r, *m; - m = __Pyx_GetAttrString(L, "append"); - if (!m) return NULL; - r = PyObject_CallFunctionObjArgs(m, x, NULL); - Py_DECREF(m); - return r; - } -} - -static CYTHON_INLINE long __Pyx_NegateNonNeg(long b) { return unlikely(b < 0) ? b : !b; } -static CYTHON_INLINE PyObject* __Pyx_PyBoolOrNull_FromLong(long b) { - return unlikely(b < 0) ? NULL : __Pyx_PyBool_FromLong(b); -} - -static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void); - -static CYTHON_INLINE void __Pyx_ErrRestore(PyObject *type, PyObject *value, PyObject *tb); /*proto*/ -static CYTHON_INLINE void __Pyx_ErrFetch(PyObject **type, PyObject **value, PyObject **tb); /*proto*/ - -static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb); /*proto*/ - -#define __Pyx_SetItemInt(o, i, v, size, to_py_func) (((size) <= sizeof(Py_ssize_t)) ? \ - __Pyx_SetItemInt_Fast(o, i, v) : \ - __Pyx_SetItemInt_Generic(o, to_py_func(i), v)) - -static CYTHON_INLINE int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v) { - int r; - if (!j) return -1; - r = PyObject_SetItem(o, j, v); - Py_DECREF(j); - return r; -} - -static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v) { - if (PyList_CheckExact(o) && ((0 <= i) & (i < PyList_GET_SIZE(o)))) { - Py_INCREF(v); - Py_DECREF(PyList_GET_ITEM(o, i)); - PyList_SET_ITEM(o, i, v); - return 1; - } - else if (Py_TYPE(o)->tp_as_sequence && Py_TYPE(o)->tp_as_sequence->sq_ass_item && (likely(i >= 0))) - return PySequence_SetItem(o, i, v); - else { - PyObject *j = PyInt_FromSsize_t(i); - return __Pyx_SetItemInt_Generic(o, j, v); - } -} - -static CYTHON_INLINE void __Pyx_ExceptionSave(PyObject **type, PyObject **value, PyObject **tb); /*proto*/ -static void __Pyx_ExceptionReset(PyObject *type, PyObject *value, PyObject *tb); /*proto*/ - -static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list); /*proto*/ - -static PyObject *__Pyx_CreateClass(PyObject *bases, PyObject *dict, PyObject *name, const char *modname); /*proto*/ - -#if PY_VERSION_HEX < 0x02050000 -#ifndef PyAnySet_CheckExact - -#define PyAnySet_CheckExact(ob) \ - ((ob)->ob_type == &PySet_Type || \ - (ob)->ob_type == &PyFrozenSet_Type) - -#define PySet_New(iterable) \ - PyObject_CallFunctionObjArgs((PyObject *)&PySet_Type, (iterable), NULL) - -#define Pyx_PyFrozenSet_New(iterable) \ - PyObject_CallFunctionObjArgs((PyObject *)&PyFrozenSet_Type, (iterable), NULL) - -#define PySet_Size(anyset) \ - PyObject_Size((anyset)) - -#define PySet_Contains(anyset, key) \ - PySequence_Contains((anyset), (key)) - -#define PySet_Pop(set) \ - PyObject_CallMethod(set, (char *)"pop", NULL) - -static CYTHON_INLINE int PySet_Clear(PyObject *set) { - PyObject *ret = PyObject_CallMethod(set, (char *)"clear", NULL); - if (!ret) return -1; - Py_DECREF(ret); return 0; -} - -static CYTHON_INLINE int PySet_Discard(PyObject *set, PyObject *key) { - PyObject *ret = PyObject_CallMethod(set, (char *)"discard", (char *)"O", key); - if (!ret) return -1; - Py_DECREF(ret); return 0; -} - -static CYTHON_INLINE int PySet_Add(PyObject *set, PyObject *key) { - PyObject *ret = PyObject_CallMethod(set, (char *)"add", (char *)"O", key); - if (!ret) return -1; - Py_DECREF(ret); return 0; -} - -#endif /* PyAnySet_CheckExact (<= Py2.4) */ - -#if PY_VERSION_HEX < 0x02040000 -#ifndef Py_SETOBJECT_H -#define Py_SETOBJECT_H - -static PyTypeObject *__Pyx_PySet_Type = NULL; -static PyTypeObject *__Pyx_PyFrozenSet_Type = NULL; - -#define PySet_Type (*__Pyx_PySet_Type) -#define PyFrozenSet_Type (*__Pyx_PyFrozenSet_Type) - -#define PyAnySet_Check(ob) \ - (PyAnySet_CheckExact(ob) || \ - PyType_IsSubtype((ob)->ob_type, &PySet_Type) || \ - PyType_IsSubtype((ob)->ob_type, &PyFrozenSet_Type)) - -#define PyFrozenSet_CheckExact(ob) ((ob)->ob_type == &PyFrozenSet_Type) - -static int __Pyx_Py23SetsImport(void) { - PyObject *sets=0, *Set=0, *ImmutableSet=0; - - sets = PyImport_ImportModule((char *)"sets"); - if (!sets) goto bad; - Set = PyObject_GetAttrString(sets, (char *)"Set"); - if (!Set) goto bad; - ImmutableSet = PyObject_GetAttrString(sets, (char *)"ImmutableSet"); - if (!ImmutableSet) goto bad; - Py_DECREF(sets); - - __Pyx_PySet_Type = (PyTypeObject*) Set; - __Pyx_PyFrozenSet_Type = (PyTypeObject*) ImmutableSet; - - return 0; - - bad: - Py_XDECREF(sets); - Py_XDECREF(Set); - Py_XDECREF(ImmutableSet); - return -1; -} - -#else -static int __Pyx_Py23SetsImport(void) { return 0; } -#endif /* !Py_SETOBJECT_H */ -#endif /* < Py2.4 */ -#endif /* < Py2.5 */ - -static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb); /*proto*/ - -#ifndef __PYX_FORCE_INIT_THREADS - #if PY_VERSION_HEX < 0x02040200 - #define __PYX_FORCE_INIT_THREADS 1 - #else - #define __PYX_FORCE_INIT_THREADS 0 - #endif -#endif - -static int __Pyx_Print(PyObject*, PyObject *, int); /*proto*/ -#if PY_MAJOR_VERSION >= 3 -static PyObject* __pyx_print = 0; -static PyObject* __pyx_print_kwargs = 0; -#endif - -static int __Pyx_PrintOne(PyObject* stream, PyObject *o); /*proto*/ - -static CYTHON_INLINE unsigned char __Pyx_PyInt_AsUnsignedChar(PyObject *); - -static CYTHON_INLINE unsigned short __Pyx_PyInt_AsUnsignedShort(PyObject *); - -static CYTHON_INLINE unsigned int __Pyx_PyInt_AsUnsignedInt(PyObject *); - -static CYTHON_INLINE char __Pyx_PyInt_AsChar(PyObject *); - -static CYTHON_INLINE short __Pyx_PyInt_AsShort(PyObject *); - -static CYTHON_INLINE int __Pyx_PyInt_AsInt(PyObject *); - -static CYTHON_INLINE signed char __Pyx_PyInt_AsSignedChar(PyObject *); - -static CYTHON_INLINE signed short __Pyx_PyInt_AsSignedShort(PyObject *); - -static CYTHON_INLINE signed int __Pyx_PyInt_AsSignedInt(PyObject *); - -static CYTHON_INLINE int __Pyx_PyInt_AsLongDouble(PyObject *); - -static CYTHON_INLINE unsigned long __Pyx_PyInt_AsUnsignedLong(PyObject *); - -static CYTHON_INLINE unsigned PY_LONG_LONG __Pyx_PyInt_AsUnsignedLongLong(PyObject *); - -static CYTHON_INLINE long __Pyx_PyInt_AsLong(PyObject *); - -static CYTHON_INLINE PY_LONG_LONG __Pyx_PyInt_AsLongLong(PyObject *); - -static CYTHON_INLINE signed long __Pyx_PyInt_AsSignedLong(PyObject *); - -static CYTHON_INLINE signed PY_LONG_LONG __Pyx_PyInt_AsSignedLongLong(PyObject *); - -static int __Pyx_SetVtable(PyObject *dict, void *vtable); /*proto*/ - -static void __Pyx_AddTraceback(const char *funcname); /*proto*/ - -static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/ -/* Module declarations from _re2 */ - -/* Module declarations from cpython.unicode */ - -/* Module declarations from cpython */ - -/* Module declarations from re2 */ - -static PyTypeObject *__pyx_ptype_3re2_Match = 0; -static PyTypeObject *__pyx_ptype_3re2_Pattern = 0; -static int __pyx_v_3re2_current_notification; -static int __pyx_v_3re2__I; -static int __pyx_v_3re2__M; -static int __pyx_v_3re2__S; -static int __pyx_v_3re2__U; -static int __pyx_v_3re2__X; -static int __pyx_v_3re2__L; -static PyObject *__pyx_f_3re2_cpp_to_pystring(std::string); /*proto*/ -static CYTHON_INLINE PyObject *__pyx_f_3re2_cpp_to_utf8(std::string); /*proto*/ -static CYTHON_INLINE PyObject *__pyx_f_3re2_char_to_utf8(const char*, int); /*proto*/ -static CYTHON_INLINE PyObject *__pyx_f_3re2_unicode_to_bytestring(PyObject *, int *); /*proto*/ -static CYTHON_INLINE int __pyx_f_3re2_pystring_to_bytestring(PyObject *, char **, Py_ssize_t *); /*proto*/ -#define __Pyx_MODULE_NAME "re2" -int __pyx_module_is_main_re2 = 0; - -/* Implementation of re2 */ -static PyObject *__pyx_builtin_Exception; -static PyObject *__pyx_builtin_ValueError; -static PyObject *__pyx_builtin_range; -static PyObject *__pyx_builtin_basestring; -static PyObject *__pyx_builtin_IndexError; -static PyObject *__pyx_builtin_sorted; -static PyObject *__pyx_builtin_zip; -static PyObject *__pyx_builtin_enumerate; -static PyObject *__pyx_builtin_TypeError; -static PyObject *__pyx_builtin_callable; -static PyObject *__pyx_builtin_NotImplementedError; -static PyObject *__pyx_builtin_ord; -static char __pyx_k_1[] = "This function expects a valid notification level."; -static char __pyx_k_2[] = "no such group"; -static char __pyx_k_3[] = "\\"; -static char __pyx_k_4[] = "\000"; -static char __pyx_k_5[] = "g<"; -static char __pyx_k_6[] = ">"; -static char __pyx_k_7[] = ""; -static char __pyx_k_8[] = "expected string or buffer"; -static char __pyx_k_9[] = "NamedCapturingGroups"; -static char __pyx_k_10[] = "\n"; -static char __pyx_k_11[] = "Invalid rewrite pattern"; -static char __pyx_k_12[] = "So far pyre2 does not support custom replacement counts"; -static char __pyx_k_13[] = "bogus escape (end of line)"; -static char __pyx_k_14[] = "(?"; -static char __pyx_k_15[] = ")"; -static char __pyx_k_16[] = "#"; -static char __pyx_k_17[] = "[\\"; -static char __pyx_k_18[] = "["; -static char __pyx_k_19[] = "unexpected end of regular expression"; -static char __pyx_k_20[] = "]"; -static char __pyx_k_21[] = "\\p{Nd}"; -static char __pyx_k_22[] = "_\\p{L}\\p{Nd}"; -static char __pyx_k_23[] = "\\s\\p{Z}"; -static char __pyx_k_24[] = "\\P{Nd}"; -static char __pyx_k_25[] = "CharClassProblemException"; -static char __pyx_k_26[] = "BackreferencesException"; -static char __pyx_k_27[] = "[_\\p{L}\\p{Nd}]"; -static char __pyx_k_28[] = "[\\s\\p{Z}]"; -static char __pyx_k_29[] = "[^\\p{Nd}]"; -static char __pyx_k_30[] = "[^_\\p{L}\\p{Nd}]"; -static char __pyx_k_31[] = "[^\\s\\p{Z}]"; -static char __pyx_k_32[] = "Cannot process flags argument with a compiled pattern"; -static char __pyx_k_33[] = "Backreferences not supported"; -static char __pyx_k_34[] = "WARNING: Using re module. Reason: %s"; -static char __pyx_k_35[] = "\\W and \\S not supported inside character classes"; -static char __pyx_k_36[] = "first argument must be a string or compiled pattern"; -static char __pyx_k_37[] = "NumberOfCapturingGroups"; -static char __pyx_k_38[] = "\\000"; -static char __pyx_k_39[] = "\n Some error has occured in compilation of the regex.\n "; -static char __pyx_k_40[] = " \t\n\r\013\014"; -static char __pyx_k_41[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890"; -static char __pyx_k_42[] = "set_fallback_notification (line 31)"; -static char __pyx_k_43[] = "set_fallback_notification"; -static char __pyx_k_44[] = "Pattern.search (line 441)"; -static char __pyx_k_45[] = "Pattern.match (line 449)"; -static char __pyx_k_46[] = "Pattern.finditer (line 513)"; -static char __pyx_k_47[] = "Pattern.findall (line 521)"; -static char __pyx_k_48[] = "Pattern.split (line 528)"; -static char __pyx_k_49[] = "Pattern.sub (line 605)"; -static char __pyx_k_50[] = "Pattern.subn (line 613)"; -static char __pyx_k_51[] = "Pattern._subn_callback (line 697)"; -static char __pyx_k_52[] = "_compile (line 904)"; -static char __pyx_k_53[] = "search (line 987)"; -static char __pyx_k_54[] = "match (line 994)"; -static char __pyx_k_55[] = "finditer (line 1001)"; -static char __pyx_k_56[] = "findall (line 1010)"; -static char __pyx_k_57[] = "split (line 1019)"; -static char __pyx_k_58[] = "sub (line 1026)"; -static char __pyx_k_59[] = "subn (line 1037)"; -static char __pyx_k_60[] = "escape (line 1055)"; -static char __pyx_k__0[] = "0"; -static char __pyx_k__D[] = "D"; -static char __pyx_k__I[] = "I"; -static char __pyx_k__L[] = "L"; -static char __pyx_k__M[] = "M"; -static char __pyx_k__S[] = "S"; -static char __pyx_k__U[] = "U"; -static char __pyx_k__W[] = "W"; -static char __pyx_k__X[] = "X"; -static char __pyx_k__c[] = "c"; -static char __pyx_k__d[] = "d"; -static char __pyx_k__m[] = "m"; -static char __pyx_k__s[] = "s"; -static char __pyx_k__w[] = "w"; -static char __pyx_k__89[] = "89"; -static char __pyx_k__ok[] = "ok"; -static char __pyx_k__re[] = "re"; -static char __pyx_k__end[] = "end"; -static char __pyx_k__get[] = "get"; -static char __pyx_k__ord[] = "ord"; -static char __pyx_k__pos[] = "pos"; -static char __pyx_k__sub[] = "sub"; -static char __pyx_k__sys[] = "sys"; -static char __pyx_k__zip[] = "zip"; -static char __pyx_k___pos[] = "_pos"; -static char __pyx_k__data[] = "data"; -static char __pyx_k__join[] = "join"; -static char __pyx_k__next[] = "next"; -static char __pyx_k__repl[] = "repl"; -static char __pyx_k__self[] = "self"; -static char __pyx_k__span[] = "span"; -static char __pyx_k__subn[] = "subn"; -static char __pyx_k__warn[] = "warn"; -static char __pyx_k__Match[] = "Match"; -static char __pyx_k__begin[] = "begin"; -static char __pyx_k__c_str[] = "c_str"; -static char __pyx_k__clear[] = "clear"; -static char __pyx_k__count[] = "count"; -static char __pyx_k__error[] = "error"; -static char __pyx_k__first[] = "first"; -static char __pyx_k__flags[] = "flags"; -static char __pyx_k__flush[] = "flush"; -static char __pyx_k__group[] = "group"; -static char __pyx_k__index[] = "index"; -static char __pyx_k__match[] = "match"; -static char __pyx_k__range[] = "range"; -static char __pyx_k__split[] = "split"; -static char __pyx_k__DOTALL[] = "DOTALL"; -static char __pyx_k__LOCALE[] = "LOCALE"; -static char __pyx_k____next[] = "__next"; -static char __pyx_k___cache[] = "_cache"; -static char __pyx_k___flags[] = "_flags"; -static char __pyx_k___spans[] = "_spans"; -static char __pyx_k__endpos[] = "endpos"; -static char __pyx_k__escape[] = "escape"; -static char __pyx_k__groups[] = "groups"; -static char __pyx_k__length[] = "length"; -static char __pyx_k__search[] = "search"; -static char __pyx_k__second[] = "second"; -static char __pyx_k__sorted[] = "sorted"; -static char __pyx_k__stdout[] = "stdout"; -static char __pyx_k__strict[] = "strict"; -static char __pyx_k__string[] = "string"; -static char __pyx_k__1234567[] = "1234567"; -static char __pyx_k__Pattern[] = "Pattern"; -static char __pyx_k__UNICODE[] = "UNICODE"; -static char __pyx_k__VERBOSE[] = "VERBOSE"; -static char __pyx_k__VERSION[] = "VERSION"; -static char __pyx_k___endpos[] = "_endpos"; -static char __pyx_k___groups[] = "_groups"; -static char __pyx_k___search[] = "_search"; -static char __pyx_k__compile[] = "compile"; -static char __pyx_k__default[] = "default"; -static char __pyx_k__encoded[] = "encoded"; -static char __pyx_k__findall[] = "findall"; -static char __pyx_k__isdigit[] = "isdigit"; -static char __pyx_k__matches[] = "matches"; -static char __pyx_k__max_mem[] = "max_mem"; -static char __pyx_k__ngroups[] = "ngroups"; -static char __pyx_k__pattern[] = "pattern"; -static char __pyx_k____init__[] = "__init__"; -static char __pyx_k____iter__[] = "__iter__"; -static char __pyx_k____main__[] = "__main__"; -static char __pyx_k____test__[] = "__test__"; -static char __pyx_k___compile[] = "_compile"; -static char __pyx_k__callable[] = "callable"; -static char __pyx_k__callback[] = "callback"; -static char __pyx_k__finditer[] = "finditer"; -static char __pyx_k__maxsplit[] = "maxsplit"; -static char __pyx_k__nmatches[] = "nmatches"; -static char __pyx_k__tostring[] = "tostring"; -static char __pyx_k__warnings[] = "warnings"; -static char __pyx_k__Exception[] = "Exception"; -static char __pyx_k__MULTILINE[] = "MULTILINE"; -static char __pyx_k__Tokenizer[] = "Tokenizer"; -static char __pyx_k__TypeError[] = "TypeError"; -static char __pyx_k___MAXCACHE[] = "_MAXCACHE"; -static char __pyx_k___alphanum[] = "_alphanum"; -static char __pyx_k___finditer[] = "_finditer"; -static char __pyx_k__enumerate[] = "enumerate"; -static char __pyx_k__groupdict[] = "groupdict"; -static char __pyx_k__push_back[] = "push_back"; -static char __pyx_k__IGNORECASE[] = "IGNORECASE"; -static char __pyx_k__IndexError[] = "IndexError"; -static char __pyx_k__RegexError[] = "RegexError"; -static char __pyx_k__SREPattern[] = "SREPattern"; -static char __pyx_k__ValueError[] = "ValueError"; -static char __pyx_k__WHITESPACE[] = "WHITESPACE"; -static char __pyx_k___lastindex[] = "_lastindex"; -static char __pyx_k__basestring[] = "basestring"; -static char __pyx_k__error_code[] = "error_code"; -static char __pyx_k__num_groups[] = "num_groups"; -static char __pyx_k__re_pattern[] = "re_pattern"; -static char __pyx_k__VERSION_HEX[] = "VERSION_HEX"; -static char __pyx_k___cache_repl[] = "_cache_repl"; -static char __pyx_k___make_spans[] = "_make_spans"; -static char __pyx_k__init_groups[] = "init_groups"; -static char __pyx_k__set_max_mem[] = "set_max_mem"; -static char __pyx_k__match_string[] = "match_string"; -static char __pyx_k__named_groups[] = "named_groups"; -static char __pyx_k__set_encoding[] = "set_encoding"; -static char __pyx_k___named_groups[] = "_named_groups"; -static char __pyx_k___convert_spans[] = "_convert_spans"; -static char __pyx_k___named_indexes[] = "_named_indexes"; -static char __pyx_k___subn_callback[] = "_subn_callback"; -static char __pyx_k__pattern_object[] = "pattern_object"; -static char __pyx_k__set_log_errors[] = "set_log_errors"; -static char __pyx_k___pattern_object[] = "_pattern_object"; -static char __pyx_k__prepare_pattern[] = "prepare_pattern"; -static char __pyx_k__FALLBACK_QUIETLY[] = "FALLBACK_QUIETLY"; -static char __pyx_k__FALLBACK_WARNING[] = "FALLBACK_WARNING"; -static char __pyx_k__FALLBACK_EXCEPTION[] = "FALLBACK_EXCEPTION"; -static char __pyx_k___convert_positions[] = "_convert_positions"; -static char __pyx_k__set_case_sensitive[] = "set_case_sensitive"; -static char __pyx_k__NotImplementedError[] = "NotImplementedError"; -static PyObject *__pyx_kp_s_1; -static PyObject *__pyx_kp_s_10; -static PyObject *__pyx_kp_s_11; -static PyObject *__pyx_kp_s_12; -static PyObject *__pyx_kp_s_13; -static PyObject *__pyx_kp_s_14; -static PyObject *__pyx_kp_s_15; -static PyObject *__pyx_kp_s_16; -static PyObject *__pyx_kp_s_17; -static PyObject *__pyx_kp_s_18; -static PyObject *__pyx_kp_s_19; -static PyObject *__pyx_kp_s_2; -static PyObject *__pyx_kp_s_20; -static PyObject *__pyx_kp_s_21; -static PyObject *__pyx_kp_s_22; -static PyObject *__pyx_kp_s_23; -static PyObject *__pyx_kp_s_24; -static PyObject *__pyx_n_s_25; -static PyObject *__pyx_n_s_26; -static PyObject *__pyx_kp_s_27; -static PyObject *__pyx_kp_s_28; -static PyObject *__pyx_kp_s_29; -static PyObject *__pyx_kp_s_3; -static PyObject *__pyx_kp_s_30; -static PyObject *__pyx_kp_s_31; -static PyObject *__pyx_kp_s_32; -static PyObject *__pyx_kp_s_33; -static PyObject *__pyx_kp_s_34; -static PyObject *__pyx_kp_s_35; -static PyObject *__pyx_kp_s_36; -static PyObject *__pyx_n_s_37; -static PyObject *__pyx_kp_s_38; -static PyObject *__pyx_kp_s_39; -static PyObject *__pyx_kp_s_4; -static PyObject *__pyx_kp_s_40; -static PyObject *__pyx_n_s_41; -static PyObject *__pyx_kp_u_42; -static PyObject *__pyx_n_s_43; -static PyObject *__pyx_kp_u_44; -static PyObject *__pyx_kp_u_45; -static PyObject *__pyx_kp_u_46; -static PyObject *__pyx_kp_u_47; -static PyObject *__pyx_kp_u_48; -static PyObject *__pyx_kp_u_49; -static PyObject *__pyx_kp_s_5; -static PyObject *__pyx_kp_u_50; -static PyObject *__pyx_kp_u_51; -static PyObject *__pyx_kp_u_52; -static PyObject *__pyx_kp_u_53; -static PyObject *__pyx_kp_u_54; -static PyObject *__pyx_kp_u_55; -static PyObject *__pyx_kp_u_56; -static PyObject *__pyx_kp_u_57; -static PyObject *__pyx_kp_u_58; -static PyObject *__pyx_kp_u_59; -static PyObject *__pyx_kp_s_6; -static PyObject *__pyx_kp_u_60; -static PyObject *__pyx_kp_s_7; -static PyObject *__pyx_kp_u_7; -static PyObject *__pyx_kp_s_8; -static PyObject *__pyx_n_s_9; -static PyObject *__pyx_kp_s__0; -static PyObject *__pyx_kp_s__1234567; -static PyObject *__pyx_kp_s__89; -static PyObject *__pyx_n_s__D; -static PyObject *__pyx_n_s__DOTALL; -static PyObject *__pyx_n_s__Exception; -static PyObject *__pyx_n_s__FALLBACK_EXCEPTION; -static PyObject *__pyx_n_s__FALLBACK_QUIETLY; -static PyObject *__pyx_n_s__FALLBACK_WARNING; -static PyObject *__pyx_n_s__I; -static PyObject *__pyx_n_s__IGNORECASE; -static PyObject *__pyx_n_s__IndexError; -static PyObject *__pyx_n_s__L; -static PyObject *__pyx_n_s__LOCALE; -static PyObject *__pyx_n_s__M; -static PyObject *__pyx_n_s__MULTILINE; -static PyObject *__pyx_n_s__Match; -static PyObject *__pyx_n_s__NotImplementedError; -static PyObject *__pyx_n_s__Pattern; -static PyObject *__pyx_n_s__RegexError; -static PyObject *__pyx_n_s__S; -static PyObject *__pyx_n_s__SREPattern; -static PyObject *__pyx_n_s__Tokenizer; -static PyObject *__pyx_n_s__TypeError; -static PyObject *__pyx_n_s__U; -static PyObject *__pyx_n_s__UNICODE; -static PyObject *__pyx_n_s__VERBOSE; -static PyObject *__pyx_n_s__VERSION; -static PyObject *__pyx_n_s__VERSION_HEX; -static PyObject *__pyx_n_s__ValueError; -static PyObject *__pyx_n_s__W; -static PyObject *__pyx_n_s__WHITESPACE; -static PyObject *__pyx_n_s__X; -static PyObject *__pyx_n_s___MAXCACHE; -static PyObject *__pyx_n_s____init__; -static PyObject *__pyx_n_s____iter__; -static PyObject *__pyx_n_s____main__; -static PyObject *__pyx_n_s____next; -static PyObject *__pyx_n_s____test__; -static PyObject *__pyx_n_s___alphanum; -static PyObject *__pyx_n_s___cache; -static PyObject *__pyx_n_s___cache_repl; -static PyObject *__pyx_n_s___compile; -static PyObject *__pyx_n_s___convert_positions; -static PyObject *__pyx_n_s___convert_spans; -static PyObject *__pyx_n_s___endpos; -static PyObject *__pyx_n_s___finditer; -static PyObject *__pyx_n_s___flags; -static PyObject *__pyx_n_s___groups; -static PyObject *__pyx_n_s___lastindex; -static PyObject *__pyx_n_s___make_spans; -static PyObject *__pyx_n_s___named_groups; -static PyObject *__pyx_n_s___named_indexes; -static PyObject *__pyx_n_s___pattern_object; -static PyObject *__pyx_n_s___pos; -static PyObject *__pyx_n_s___search; -static PyObject *__pyx_n_s___spans; -static PyObject *__pyx_n_s___subn_callback; -static PyObject *__pyx_n_s__basestring; -static PyObject *__pyx_n_s__begin; -static PyObject *__pyx_n_s__c; -static PyObject *__pyx_n_s__c_str; -static PyObject *__pyx_n_s__callable; -static PyObject *__pyx_n_s__callback; -static PyObject *__pyx_n_s__clear; -static PyObject *__pyx_n_s__compile; -static PyObject *__pyx_n_s__count; -static PyObject *__pyx_n_s__d; -static PyObject *__pyx_n_s__data; -static PyObject *__pyx_n_s__default; -static PyObject *__pyx_n_s__encoded; -static PyObject *__pyx_n_s__end; -static PyObject *__pyx_n_s__endpos; -static PyObject *__pyx_n_s__enumerate; -static PyObject *__pyx_n_s__error; -static PyObject *__pyx_n_s__error_code; -static PyObject *__pyx_n_s__escape; -static PyObject *__pyx_n_s__findall; -static PyObject *__pyx_n_s__finditer; -static PyObject *__pyx_n_s__first; -static PyObject *__pyx_n_s__flags; -static PyObject *__pyx_n_s__flush; -static PyObject *__pyx_n_s__get; -static PyObject *__pyx_n_s__group; -static PyObject *__pyx_n_s__groupdict; -static PyObject *__pyx_n_s__groups; -static PyObject *__pyx_n_s__index; -static PyObject *__pyx_n_s__init_groups; -static PyObject *__pyx_n_s__isdigit; -static PyObject *__pyx_n_s__join; -static PyObject *__pyx_n_s__length; -static PyObject *__pyx_n_s__m; -static PyObject *__pyx_n_s__match; -static PyObject *__pyx_n_s__match_string; -static PyObject *__pyx_n_s__matches; -static PyObject *__pyx_n_s__max_mem; -static PyObject *__pyx_n_s__maxsplit; -static PyObject *__pyx_n_s__named_groups; -static PyObject *__pyx_n_s__next; -static PyObject *__pyx_n_s__ngroups; -static PyObject *__pyx_n_s__nmatches; -static PyObject *__pyx_n_s__num_groups; -static PyObject *__pyx_n_s__ok; -static PyObject *__pyx_n_s__ord; -static PyObject *__pyx_n_s__pattern; -static PyObject *__pyx_n_s__pattern_object; -static PyObject *__pyx_n_s__pos; -static PyObject *__pyx_n_s__prepare_pattern; -static PyObject *__pyx_n_s__push_back; -static PyObject *__pyx_n_s__range; -static PyObject *__pyx_n_s__re; -static PyObject *__pyx_n_s__re_pattern; -static PyObject *__pyx_n_s__repl; -static PyObject *__pyx_n_s__s; -static PyObject *__pyx_n_s__search; -static PyObject *__pyx_n_s__second; -static PyObject *__pyx_n_s__self; -static PyObject *__pyx_n_s__set_case_sensitive; -static PyObject *__pyx_n_s__set_encoding; -static PyObject *__pyx_n_s__set_log_errors; -static PyObject *__pyx_n_s__set_max_mem; -static PyObject *__pyx_n_s__sorted; -static PyObject *__pyx_n_s__span; -static PyObject *__pyx_n_s__split; -static PyObject *__pyx_n_s__stdout; -static PyObject *__pyx_n_s__string; -static PyObject *__pyx_n_s__sub; -static PyObject *__pyx_n_s__subn; -static PyObject *__pyx_n_s__sys; -static PyObject *__pyx_n_s__tostring; -static PyObject *__pyx_n_s__w; -static PyObject *__pyx_n_s__warn; -static PyObject *__pyx_n_s__warnings; -static PyObject *__pyx_n_s__zip; -static PyObject *__pyx_int_0; -static PyObject *__pyx_int_1; -static PyObject *__pyx_int_2; -static PyObject *__pyx_int_neg_1; -static PyObject *__pyx_int_23; -static PyObject *__pyx_int_100; -static PyObject *__pyx_int_0x80; -static PyObject *__pyx_int_0x000217; - -/* "/Users/maxiak/pyre2/src/re2.pyx":31 - * cdef int current_notification = FALLBACK_QUIETLY - * - * def set_fallback_notification(level): # <<<<<<<<<<<<<< - * """ - * Set the fallback notification to a level; one of: - */ - -static PyObject *__pyx_pf_3re2_set_fallback_notification(PyObject *__pyx_self, PyObject *__pyx_v_level); /*proto*/ -static char __pyx_doc_3re2_set_fallback_notification[] = "\n Set the fallback notification to a level; one of:\n FALLBACK_QUIETLY\n\tFALLBACK_WARNING\n\tFALLBACK_EXCEPTION\n "; -static PyObject *__pyx_pf_3re2_set_fallback_notification(PyObject *__pyx_self, PyObject *__pyx_v_level) { - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - int __pyx_t_3; - int __pyx_t_4; - int __pyx_t_5; - int __pyx_t_6; - __Pyx_RefNannySetupContext("set_fallback_notification"); - __pyx_self = __pyx_self; - __Pyx_INCREF(__pyx_v_level); - - /* "/Users/maxiak/pyre2/src/re2.pyx":39 - * """ - * global current_notification - * level = int(level) # <<<<<<<<<<<<<< - * if level < 0 or level > 2: - * raise ValueError("This function expects a valid notification level.") - */ - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(__pyx_v_level); - PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_level); - __Pyx_GIVEREF(__pyx_v_level); - __pyx_t_2 = PyObject_Call(((PyObject *)((PyObject*)&PyInt_Type)), __pyx_t_1, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_v_level); - __pyx_v_level = __pyx_t_2; - __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":40 - * global current_notification - * level = int(level) - * if level < 0 or level > 2: # <<<<<<<<<<<<<< - * raise ValueError("This function expects a valid notification level.") - * current_notification = level - */ - __pyx_t_2 = PyObject_RichCompare(__pyx_v_level, __pyx_int_0, Py_LT); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_3 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (!__pyx_t_3) { - __pyx_t_2 = PyObject_RichCompare(__pyx_v_level, __pyx_int_2, Py_GT); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_5 = __pyx_t_4; - } else { - __pyx_t_5 = __pyx_t_3; - } - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":41 - * level = int(level) - * if level < 0 or level > 2: - * raise ValueError("This function expects a valid notification level.") # <<<<<<<<<<<<<< - * current_notification = level - * - */ - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_1)); - PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_1)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_1)); - __pyx_t_1 = PyObject_Call(__pyx_builtin_ValueError, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_Raise(__pyx_t_1, 0, 0); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L5; - } - __pyx_L5:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":42 - * if level < 0 or level > 2: - * raise ValueError("This function expects a valid notification level.") - * current_notification = level # <<<<<<<<<<<<<< - * - * - */ - __pyx_t_6 = __Pyx_PyInt_AsInt(__pyx_v_level); if (unlikely((__pyx_t_6 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_3re2_current_notification = __pyx_t_6; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("re2.set_fallback_notification"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_DECREF(__pyx_v_level); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":60 - * import warnings - * - * cdef object cpp_to_pystring(_re2.cpp_string input): # <<<<<<<<<<<<<< - * # This function is a quick converter from a std::string object - * # to a python string. By taking the slice we go to the right size, - */ - -static PyObject *__pyx_f_3re2_cpp_to_pystring(std::string __pyx_v_input) { - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - __Pyx_RefNannySetupContext("cpp_to_pystring"); - - /* "/Users/maxiak/pyre2/src/re2.pyx":64 - * # to a python string. By taking the slice we go to the right size, - * # despite spurious or missing null characters. - * return input.c_str()[:input.length()] # <<<<<<<<<<<<<< - * - * cdef inline object cpp_to_utf8(_re2.cpp_string input): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyBytes_FromStringAndSize(__pyx_v_input.c_str() + 0, __pyx_v_input.length() - 0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - __pyx_r = ((PyObject *)__pyx_t_1); - __pyx_t_1 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("re2.cpp_to_pystring"); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":66 - * return input.c_str()[:input.length()] - * - * cdef inline object cpp_to_utf8(_re2.cpp_string input): # <<<<<<<<<<<<<< - * # This function converts a std::string object to a utf8 object. - * return cpython.unicode.PyUnicode_DecodeUTF8(input.c_str(), input.length(), 'strict') - */ - -static CYTHON_INLINE PyObject *__pyx_f_3re2_cpp_to_utf8(std::string __pyx_v_input) { - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - __Pyx_RefNannySetupContext("cpp_to_utf8"); - - /* "/Users/maxiak/pyre2/src/re2.pyx":68 - * cdef inline object cpp_to_utf8(_re2.cpp_string input): - * # This function converts a std::string object to a utf8 object. - * return cpython.unicode.PyUnicode_DecodeUTF8(input.c_str(), input.length(), 'strict') # <<<<<<<<<<<<<< - * - * cdef inline object char_to_utf8(_re2.const_char_ptr input, int length): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyUnicode_DecodeUTF8(__pyx_v_input.c_str(), __pyx_v_input.length(), __pyx_k__strict); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("re2.cpp_to_utf8"); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":70 - * return cpython.unicode.PyUnicode_DecodeUTF8(input.c_str(), input.length(), 'strict') - * - * cdef inline object char_to_utf8(_re2.const_char_ptr input, int length): # <<<<<<<<<<<<<< - * # This function converts a C string to a utf8 object. - * return cpython.unicode.PyUnicode_DecodeUTF8(input, length, 'strict') - */ - -static CYTHON_INLINE PyObject *__pyx_f_3re2_char_to_utf8(const char* __pyx_v_input, int __pyx_v_length) { - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - __Pyx_RefNannySetupContext("char_to_utf8"); - - /* "/Users/maxiak/pyre2/src/re2.pyx":72 - * cdef inline object char_to_utf8(_re2.const_char_ptr input, int length): - * # This function converts a C string to a utf8 object. - * return cpython.unicode.PyUnicode_DecodeUTF8(input, length, 'strict') # <<<<<<<<<<<<<< - * - * cdef inline object unicode_to_bytestring(object pystring, int * encoded): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyUnicode_DecodeUTF8(__pyx_v_input, __pyx_v_length, __pyx_k__strict); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("re2.char_to_utf8"); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":74 - * return cpython.unicode.PyUnicode_DecodeUTF8(input, length, 'strict') - * - * cdef inline object unicode_to_bytestring(object pystring, int * encoded): # <<<<<<<<<<<<<< - * # This function will convert a utf8 string to a bytestring object. - * if cpython.unicode.PyUnicode_Check(pystring): - */ - -static CYTHON_INLINE PyObject *__pyx_f_3re2_unicode_to_bytestring(PyObject *__pyx_v_pystring, int *__pyx_v_encoded) { - PyObject *__pyx_r = NULL; - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - __Pyx_RefNannySetupContext("unicode_to_bytestring"); - __Pyx_INCREF(__pyx_v_pystring); - - /* "/Users/maxiak/pyre2/src/re2.pyx":76 - * cdef inline object unicode_to_bytestring(object pystring, int * encoded): - * # This function will convert a utf8 string to a bytestring object. - * if cpython.unicode.PyUnicode_Check(pystring): # <<<<<<<<<<<<<< - * pystring = cpython.unicode.PyUnicode_EncodeUTF8(cpython.unicode.PyUnicode_AS_UNICODE(pystring), - * cpython.unicode.PyUnicode_GET_SIZE(pystring), - */ - __pyx_t_1 = PyUnicode_Check(__pyx_v_pystring); - if (__pyx_t_1) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":78 - * if cpython.unicode.PyUnicode_Check(pystring): - * pystring = cpython.unicode.PyUnicode_EncodeUTF8(cpython.unicode.PyUnicode_AS_UNICODE(pystring), - * cpython.unicode.PyUnicode_GET_SIZE(pystring), # <<<<<<<<<<<<<< - * "strict") - * encoded[0] = 1 - */ - __pyx_t_2 = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(__pyx_v_pystring), PyUnicode_GET_SIZE(__pyx_v_pystring), __pyx_k__strict); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 77; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_v_pystring); - __pyx_v_pystring = __pyx_t_2; - __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":80 - * cpython.unicode.PyUnicode_GET_SIZE(pystring), - * "strict") - * encoded[0] = 1 # <<<<<<<<<<<<<< - * else: - * encoded[0] = 0 - */ - (__pyx_v_encoded[0]) = 1; - goto __pyx_L3; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":82 - * encoded[0] = 1 - * else: - * encoded[0] = 0 # <<<<<<<<<<<<<< - * return pystring - * - */ - (__pyx_v_encoded[0]) = 0; - } - __pyx_L3:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":83 - * else: - * encoded[0] = 0 - * return pystring # <<<<<<<<<<<<<< - * - * cdef inline int pystring_to_bytestring(object pystring, char ** cstring, Py_ssize_t * length): - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_pystring); - __pyx_r = __pyx_v_pystring; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("re2.unicode_to_bytestring"); - __pyx_r = 0; - __pyx_L0:; - __Pyx_DECREF(__pyx_v_pystring); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":85 - * return pystring - * - * cdef inline int pystring_to_bytestring(object pystring, char ** cstring, Py_ssize_t * length): # <<<<<<<<<<<<<< - * # This function will convert a pystring to a bytesstring, placing - * # the char * in cstring, and the length in length. - */ - -static CYTHON_INLINE int __pyx_f_3re2_pystring_to_bytestring(PyObject *__pyx_v_pystring, char **__pyx_v_cstring, Py_ssize_t *__pyx_v_length) { - int __pyx_r; - __Pyx_RefNannySetupContext("pystring_to_bytestring"); - - /* "/Users/maxiak/pyre2/src/re2.pyx":91 - * # it will move to utf-8. If utf8 does not work, then it has to be - * # a non-supported encoding. - * return _re2.PyObject_AsCharBuffer(pystring, <_re2.const_char_ptr*> cstring, length) # <<<<<<<<<<<<<< - * - * cdef extern from *: - */ - __pyx_r = PyObject_AsCharBuffer(__pyx_v_pystring, ((const char* *)__pyx_v_cstring), __pyx_v_length); - goto __pyx_L0; - - __pyx_r = 0; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":113 - * cdef dict _named_indexes - * - * def __init__(self, object pattern_object, int num_groups): # <<<<<<<<<<<<<< - * self._lastindex = -1 - * self._groups = None - */ - -static int __pyx_pf_3re2_5Match___init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static int __pyx_pf_3re2_5Match___init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_pattern_object = 0; - int __pyx_v_num_groups; - int __pyx_r; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__pattern_object,&__pyx_n_s__num_groups,0}; - __Pyx_RefNannySetupContext("__init__"); - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[2] = {0,0}; - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__pattern_object); - if (likely(values[0])) kw_args--; - else goto __pyx_L5_argtuple_error; - case 1: - values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__num_groups); - if (likely(values[1])) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("__init__", 1, 2, 2, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "__init__") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_pattern_object = values[0]; - __pyx_v_num_groups = __Pyx_PyInt_AsInt(values[1]); if (unlikely((__pyx_v_num_groups == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else if (PyTuple_GET_SIZE(__pyx_args) != 2) { - goto __pyx_L5_argtuple_error; - } else { - __pyx_v_pattern_object = PyTuple_GET_ITEM(__pyx_args, 0); - __pyx_v_num_groups = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 1)); if (unlikely((__pyx_v_num_groups == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__init__", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.Match.__init__"); - __Pyx_RefNannyFinishContext(); - return -1; - __pyx_L4_argument_unpacking_done:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":114 - * - * def __init__(self, object pattern_object, int num_groups): - * self._lastindex = -1 # <<<<<<<<<<<<<< - * self._groups = None - * self._pos = 0 - */ - ((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_lastindex = -1; - - /* "/Users/maxiak/pyre2/src/re2.pyx":115 - * def __init__(self, object pattern_object, int num_groups): - * self._lastindex = -1 - * self._groups = None # <<<<<<<<<<<<<< - * self._pos = 0 - * self._endpos = -1 - */ - __Pyx_INCREF(Py_None); - __Pyx_GIVEREF(Py_None); - __Pyx_GOTREF(((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_groups); - __Pyx_DECREF(((PyObject *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_groups)); - ((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_groups = ((PyObject *)Py_None); - - /* "/Users/maxiak/pyre2/src/re2.pyx":116 - * self._lastindex = -1 - * self._groups = None - * self._pos = 0 # <<<<<<<<<<<<<< - * self._endpos = -1 - * self.matches = _re2.new_StringPiece_array(num_groups + 1) - */ - ((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_pos = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":117 - * self._groups = None - * self._pos = 0 - * self._endpos = -1 # <<<<<<<<<<<<<< - * self.matches = _re2.new_StringPiece_array(num_groups + 1) - * self.nmatches = num_groups - */ - ((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_endpos = -1; - - /* "/Users/maxiak/pyre2/src/re2.pyx":118 - * self._pos = 0 - * self._endpos = -1 - * self.matches = _re2.new_StringPiece_array(num_groups + 1) # <<<<<<<<<<<<<< - * self.nmatches = num_groups - * self._pattern_object = pattern_object - */ - ((struct __pyx_obj_3re2_Match *)__pyx_v_self)->matches = new_StringPiece_array((__pyx_v_num_groups + 1)); - - /* "/Users/maxiak/pyre2/src/re2.pyx":119 - * self._endpos = -1 - * self.matches = _re2.new_StringPiece_array(num_groups + 1) - * self.nmatches = num_groups # <<<<<<<<<<<<<< - * self._pattern_object = pattern_object - * - */ - ((struct __pyx_obj_3re2_Match *)__pyx_v_self)->nmatches = __pyx_v_num_groups; - - /* "/Users/maxiak/pyre2/src/re2.pyx":120 - * self.matches = _re2.new_StringPiece_array(num_groups + 1) - * self.nmatches = num_groups - * self._pattern_object = pattern_object # <<<<<<<<<<<<<< - * - * def __dealloc__(self): - */ - __Pyx_INCREF(__pyx_v_pattern_object); - __Pyx_GIVEREF(__pyx_v_pattern_object); - __Pyx_GOTREF(((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_pattern_object); - __Pyx_DECREF(((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_pattern_object); - ((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_pattern_object = __pyx_v_pattern_object; - - __pyx_r = 0; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":122 - * self._pattern_object = pattern_object - * - * def __dealloc__(self): # <<<<<<<<<<<<<< - * _re2.delete_StringPiece_array(self.matches) - * - */ - -static void __pyx_pf_3re2_5Match___dealloc__(PyObject *__pyx_v_self); /*proto*/ -static void __pyx_pf_3re2_5Match___dealloc__(PyObject *__pyx_v_self) { - __Pyx_RefNannySetupContext("__dealloc__"); - - /* "/Users/maxiak/pyre2/src/re2.pyx":123 - * - * def __dealloc__(self): - * _re2.delete_StringPiece_array(self.matches) # <<<<<<<<<<<<<< - * - * property re: - */ - delete_StringPiece_array(((struct __pyx_obj_3re2_Match *)__pyx_v_self)->matches); - - __Pyx_RefNannyFinishContext(); -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":126 - * - * property re: - * def __get__(self): # <<<<<<<<<<<<<< - * return self._pattern_object - * - */ - -static PyObject *__pyx_pf_3re2_5Match_2re___get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pf_3re2_5Match_2re___get__(PyObject *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannySetupContext("__get__"); - - /* "/Users/maxiak/pyre2/src/re2.pyx":127 - * property re: - * def __get__(self): - * return self._pattern_object # <<<<<<<<<<<<<< - * - * property pos: - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_pattern_object); - __pyx_r = ((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_pattern_object; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":130 - * - * property pos: - * def __get__(self): # <<<<<<<<<<<<<< - * return self._pos - * - */ - -static PyObject *__pyx_pf_3re2_5Match_3pos___get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pf_3re2_5Match_3pos___get__(PyObject *__pyx_v_self) { - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - __Pyx_RefNannySetupContext("__get__"); - - /* "/Users/maxiak/pyre2/src/re2.pyx":131 - * property pos: - * def __get__(self): - * return self._pos # <<<<<<<<<<<<<< - * - * property endpos: - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyInt_FromLong(((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_pos); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("re2.Match.pos.__get__"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":134 - * - * property endpos: - * def __get__(self): # <<<<<<<<<<<<<< - * return self._endpos - * - */ - -static PyObject *__pyx_pf_3re2_5Match_6endpos___get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pf_3re2_5Match_6endpos___get__(PyObject *__pyx_v_self) { - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - __Pyx_RefNannySetupContext("__get__"); - - /* "/Users/maxiak/pyre2/src/re2.pyx":135 - * property endpos: - * def __get__(self): - * return self._endpos # <<<<<<<<<<<<<< - * - * property string: - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyInt_FromLong(((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_endpos); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 135; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("re2.Match.endpos.__get__"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":138 - * - * property string: - * def __get__(self): # <<<<<<<<<<<<<< - * return self.match_string - * - */ - -static PyObject *__pyx_pf_3re2_5Match_6string___get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pf_3re2_5Match_6string___get__(PyObject *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannySetupContext("__get__"); - - /* "/Users/maxiak/pyre2/src/re2.pyx":139 - * property string: - * def __get__(self): - * return self.match_string # <<<<<<<<<<<<<< - * - * cdef init_groups(self): - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(((struct __pyx_obj_3re2_Match *)__pyx_v_self)->match_string); - __pyx_r = ((struct __pyx_obj_3re2_Match *)__pyx_v_self)->match_string; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":141 - * return self.match_string - * - * cdef init_groups(self): # <<<<<<<<<<<<<< - * cdef list groups = [] - * cdef int i - */ - -static PyObject *__pyx_f_3re2_5Match_init_groups(struct __pyx_obj_3re2_Match *__pyx_v_self) { - PyObject *__pyx_v_groups = 0; - int __pyx_v_i; - int __pyx_v_cur_encoded; - const char* __pyx_v_last_end; - const char* __pyx_v_cur_end; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - int __pyx_t_2; - int __pyx_t_3; - int __pyx_t_4; - int __pyx_t_5; - __Pyx_RefNannySetupContext("init_groups"); - - /* "/Users/maxiak/pyre2/src/re2.pyx":142 - * - * cdef init_groups(self): - * cdef list groups = [] # <<<<<<<<<<<<<< - * cdef int i - * cdef bint cur_encoded = self.encoded - */ - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - __pyx_v_groups = __pyx_t_1; - __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":144 - * cdef list groups = [] - * cdef int i - * cdef bint cur_encoded = self.encoded # <<<<<<<<<<<<<< - * - * if self._groups is not None: - */ - __pyx_v_cur_encoded = __pyx_v_self->encoded; - - /* "/Users/maxiak/pyre2/src/re2.pyx":146 - * cdef bint cur_encoded = self.encoded - * - * if self._groups is not None: # <<<<<<<<<<<<<< - * return - * - */ - __pyx_t_2 = (__pyx_v_self->_groups != ((PyObject *)Py_None)); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":147 - * - * if self._groups is not None: - * return # <<<<<<<<<<<<<< - * - * cdef _re2.const_char_ptr last_end = NULL - */ - __Pyx_XDECREF(__pyx_r); - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - goto __pyx_L3; - } - __pyx_L3:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":149 - * return - * - * cdef _re2.const_char_ptr last_end = NULL # <<<<<<<<<<<<<< - * cdef _re2.const_char_ptr cur_end = NULL - * - */ - __pyx_v_last_end = NULL; - - /* "/Users/maxiak/pyre2/src/re2.pyx":150 - * - * cdef _re2.const_char_ptr last_end = NULL - * cdef _re2.const_char_ptr cur_end = NULL # <<<<<<<<<<<<<< - * - * for i in range(self.nmatches): - */ - __pyx_v_cur_end = NULL; - - /* "/Users/maxiak/pyre2/src/re2.pyx":152 - * cdef _re2.const_char_ptr cur_end = NULL - * - * for i in range(self.nmatches): # <<<<<<<<<<<<<< - * if self.matches[i].data() == NULL: - * groups.append(None) - */ - __pyx_t_3 = __pyx_v_self->nmatches; - for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { - __pyx_v_i = __pyx_t_4; - - /* "/Users/maxiak/pyre2/src/re2.pyx":153 - * - * for i in range(self.nmatches): - * if self.matches[i].data() == NULL: # <<<<<<<<<<<<<< - * groups.append(None) - * else: - */ - __pyx_t_2 = ((__pyx_v_self->matches[__pyx_v_i]).data() == NULL); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":154 - * for i in range(self.nmatches): - * if self.matches[i].data() == NULL: - * groups.append(None) # <<<<<<<<<<<<<< - * else: - * if i > 0: - */ - if (unlikely(__pyx_v_groups == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_5 = PyList_Append(((PyObject *)__pyx_v_groups), Py_None); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L6; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":156 - * groups.append(None) - * else: - * if i > 0: # <<<<<<<<<<<<<< - * cur_end = self.matches[i].data() + self.matches[i].length() - * - */ - __pyx_t_2 = (__pyx_v_i > 0); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":157 - * else: - * if i > 0: - * cur_end = self.matches[i].data() + self.matches[i].length() # <<<<<<<<<<<<<< - * - * if last_end == NULL: - */ - __pyx_v_cur_end = ((__pyx_v_self->matches[__pyx_v_i]).data() + (__pyx_v_self->matches[__pyx_v_i]).length()); - - /* "/Users/maxiak/pyre2/src/re2.pyx":159 - * cur_end = self.matches[i].data() + self.matches[i].length() - * - * if last_end == NULL: # <<<<<<<<<<<<<< - * last_end = cur_end - * self._lastindex = i - */ - __pyx_t_2 = (__pyx_v_last_end == NULL); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":160 - * - * if last_end == NULL: - * last_end = cur_end # <<<<<<<<<<<<<< - * self._lastindex = i - * else: - */ - __pyx_v_last_end = __pyx_v_cur_end; - - /* "/Users/maxiak/pyre2/src/re2.pyx":161 - * if last_end == NULL: - * last_end = cur_end - * self._lastindex = i # <<<<<<<<<<<<<< - * else: - * # The rules for last group are a bit complicated: - */ - __pyx_v_self->_lastindex = __pyx_v_i; - goto __pyx_L8; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":166 - * # if two groups end at the same point, the earlier one is considered last - * # so we don't switch our selection unless the end point has moved - * if cur_end > last_end: # <<<<<<<<<<<<<< - * last_end = cur_end - * self._lastindex = i - */ - __pyx_t_2 = (__pyx_v_cur_end > __pyx_v_last_end); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":167 - * # so we don't switch our selection unless the end point has moved - * if cur_end > last_end: - * last_end = cur_end # <<<<<<<<<<<<<< - * self._lastindex = i - * - */ - __pyx_v_last_end = __pyx_v_cur_end; - - /* "/Users/maxiak/pyre2/src/re2.pyx":168 - * if cur_end > last_end: - * last_end = cur_end - * self._lastindex = i # <<<<<<<<<<<<<< - * - * if cur_encoded: - */ - __pyx_v_self->_lastindex = __pyx_v_i; - goto __pyx_L9; - } - __pyx_L9:; - } - __pyx_L8:; - goto __pyx_L7; - } - __pyx_L7:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":170 - * self._lastindex = i - * - * if cur_encoded: # <<<<<<<<<<<<<< - * groups.append(char_to_utf8(self.matches[i].data(), self.matches[i].length())) - * else: - */ - if (__pyx_v_cur_encoded) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":171 - * - * if cur_encoded: - * groups.append(char_to_utf8(self.matches[i].data(), self.matches[i].length())) # <<<<<<<<<<<<<< - * else: - * groups.append(self.matches[i].data()[:self.matches[i].length()]) - */ - if (unlikely(__pyx_v_groups == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 171; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_1 = __pyx_f_3re2_char_to_utf8((__pyx_v_self->matches[__pyx_v_i]).data(), (__pyx_v_self->matches[__pyx_v_i]).length()); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 171; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyList_Append(((PyObject *)__pyx_v_groups), __pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 171; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - goto __pyx_L10; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":173 - * groups.append(char_to_utf8(self.matches[i].data(), self.matches[i].length())) - * else: - * groups.append(self.matches[i].data()[:self.matches[i].length()]) # <<<<<<<<<<<<<< - * self._groups = tuple(groups) - * - */ - if (unlikely(__pyx_v_groups == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 173; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_1 = PyBytes_FromStringAndSize((__pyx_v_self->matches[__pyx_v_i]).data() + 0, (__pyx_v_self->matches[__pyx_v_i]).length() - 0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 173; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - __pyx_t_5 = PyList_Append(((PyObject *)__pyx_v_groups), ((PyObject *)__pyx_t_1)); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 173; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; - } - __pyx_L10:; - } - __pyx_L6:; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":174 - * else: - * groups.append(self.matches[i].data()[:self.matches[i].length()]) - * self._groups = tuple(groups) # <<<<<<<<<<<<<< - * - * def groups(self, default=None): - */ - if (unlikely(__pyx_v_groups == Py_None)) { - PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_1 = ((PyObject *)PyList_AsTuple(__pyx_v_groups)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - __Pyx_GIVEREF(((PyObject *)__pyx_t_1)); - __Pyx_GOTREF(__pyx_v_self->_groups); - __Pyx_DECREF(((PyObject *)__pyx_v_self->_groups)); - __pyx_v_self->_groups = __pyx_t_1; - __pyx_t_1 = 0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("re2.Match.init_groups"); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_groups); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":176 - * self._groups = tuple(groups) - * - * def groups(self, default=None): # <<<<<<<<<<<<<< - * self.init_groups() - * if default is not None: - */ - -static PyObject *__pyx_pf_3re2_5Match_groups(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static PyObject *__pyx_pf_3re2_5Match_groups(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_default = 0; - PyObject *__pyx_v_g; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - int __pyx_t_2; - Py_ssize_t __pyx_t_3; - PyObject *__pyx_t_4 = NULL; - PyObject *__pyx_t_5 = NULL; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__default,0}; - __Pyx_RefNannySetupContext("groups"); - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[1] = {0}; - values[0] = ((PyObject *)Py_None); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__default); - if (value) { values[0] = value; kw_args--; } - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "groups") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 176; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_default = values[0]; - } else { - __pyx_v_default = ((PyObject *)Py_None); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 1: __pyx_v_default = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("groups", 0, 0, 1, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 176; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.Match.groups"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_v_g = Py_None; __Pyx_INCREF(Py_None); - - /* "/Users/maxiak/pyre2/src/re2.pyx":177 - * - * def groups(self, default=None): - * self.init_groups() # <<<<<<<<<<<<<< - * if default is not None: - * return tuple([g or default for g in self._groups[1:]]) - */ - __pyx_t_1 = ((struct __pyx_vtabstruct_3re2_Match *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->__pyx_vtab)->init_groups(((struct __pyx_obj_3re2_Match *)__pyx_v_self)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 177; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":178 - * def groups(self, default=None): - * self.init_groups() - * if default is not None: # <<<<<<<<<<<<<< - * return tuple([g or default for g in self._groups[1:]]) - * return self._groups[1:] - */ - __pyx_t_2 = (__pyx_v_default != Py_None); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":179 - * self.init_groups() - * if default is not None: - * return tuple([g or default for g in self._groups[1:]]) # <<<<<<<<<<<<<< - * return self._groups[1:] - * - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 179; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - __pyx_t_4 = PySequence_GetSlice(((PyObject *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_groups), 1, PY_SSIZE_T_MAX); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 179; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_4)); - if (likely(((PyObject *)__pyx_t_4) != Py_None)) { - __pyx_t_3 = 0; __pyx_t_5 = ((PyObject *)__pyx_t_4); __Pyx_INCREF(__pyx_t_5); - } else { - PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 179; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; - for (;;) { - if (__pyx_t_3 >= PyTuple_GET_SIZE(__pyx_t_5)) break; - __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_5, __pyx_t_3); __Pyx_INCREF(__pyx_t_4); __pyx_t_3++; - __Pyx_DECREF(__pyx_v_g); - __pyx_v_g = __pyx_t_4; - __pyx_t_4 = 0; - __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_g); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 179; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (!__pyx_t_2) { - __Pyx_INCREF(__pyx_v_default); - __pyx_t_4 = __pyx_v_default; - } else { - __Pyx_INCREF(__pyx_v_g); - __pyx_t_4 = __pyx_v_g; - } - if (unlikely(PyList_Append(__pyx_t_1, (PyObject*)__pyx_t_4))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 179; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - } - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = ((PyObject *)PyList_AsTuple(__pyx_t_1)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 179; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_5)); - __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; - __pyx_r = ((PyObject *)__pyx_t_5); - __pyx_t_5 = 0; - goto __pyx_L0; - goto __pyx_L6; - } - __pyx_L6:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":180 - * if default is not None: - * return tuple([g or default for g in self._groups[1:]]) - * return self._groups[1:] # <<<<<<<<<<<<<< - * - * def group(self, *args): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_5 = PySequence_GetSlice(((PyObject *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_groups), 1, PY_SSIZE_T_MAX); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 180; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_5)); - __pyx_r = ((PyObject *)__pyx_t_5); - __pyx_t_5 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_XDECREF(__pyx_t_5); - __Pyx_AddTraceback("re2.Match.groups"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_DECREF(__pyx_v_g); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":182 - * return self._groups[1:] - * - * def group(self, *args): # <<<<<<<<<<<<<< - * if len(args) > 1: - * return tuple([self.group(i) for i in args]) - */ - -static PyObject *__pyx_pf_3re2_5Match_group(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static PyObject *__pyx_pf_3re2_5Match_group(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_args = 0; - PyObject *__pyx_v_groupnum; - int __pyx_v_idx; - PyObject *__pyx_v_i; - PyObject *__pyx_r = NULL; - Py_ssize_t __pyx_t_1; - int __pyx_t_2; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - PyObject *__pyx_t_5 = NULL; - PyObject *__pyx_t_6 = NULL; - PyObject *__pyx_t_7 = NULL; - int __pyx_t_8; - __Pyx_RefNannySetupContext("group"); - if (unlikely(__pyx_kwds) && unlikely(PyDict_Size(__pyx_kwds) > 0) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "group", 0))) return NULL; - __Pyx_INCREF(__pyx_args); - __pyx_v_args = __pyx_args; - __pyx_v_groupnum = Py_None; __Pyx_INCREF(Py_None); - __pyx_v_i = Py_None; __Pyx_INCREF(Py_None); - - /* "/Users/maxiak/pyre2/src/re2.pyx":183 - * - * def group(self, *args): - * if len(args) > 1: # <<<<<<<<<<<<<< - * return tuple([self.group(i) for i in args]) - * elif len(args) > 0: - */ - if (unlikely(__pyx_v_args == Py_None)) { - PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 183; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_1 = PyTuple_GET_SIZE(((PyObject *)__pyx_v_args)); - __pyx_t_2 = (__pyx_t_1 > 1); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":184 - * def group(self, *args): - * if len(args) > 1: - * return tuple([self.group(i) for i in args]) # <<<<<<<<<<<<<< - * elif len(args) > 0: - * groupnum = args[0] - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 184; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_3)); - if (likely(((PyObject *)__pyx_v_args) != Py_None)) { - __pyx_t_1 = 0; __pyx_t_4 = ((PyObject *)__pyx_v_args); __Pyx_INCREF(__pyx_t_4); - } else { - PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 184; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - for (;;) { - if (__pyx_t_1 >= PyTuple_GET_SIZE(__pyx_t_4)) break; - __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_1); __Pyx_INCREF(__pyx_t_5); __pyx_t_1++; - __Pyx_DECREF(__pyx_v_i); - __pyx_v_i = __pyx_t_5; - __pyx_t_5 = 0; - __pyx_t_5 = PyObject_GetAttr(__pyx_v_self, __pyx_n_s__group); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 184; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 184; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_INCREF(__pyx_v_i); - PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_v_i); - __Pyx_GIVEREF(__pyx_v_i); - __pyx_t_7 = PyObject_Call(__pyx_t_5, __pyx_t_6, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 184; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - if (unlikely(PyList_Append(__pyx_t_3, (PyObject*)__pyx_t_7))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 184; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - } - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = ((PyObject *)PyList_AsTuple(__pyx_t_3)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 184; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_4)); - __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; - __pyx_r = ((PyObject *)__pyx_t_4); - __pyx_t_4 = 0; - goto __pyx_L0; - goto __pyx_L5; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":185 - * if len(args) > 1: - * return tuple([self.group(i) for i in args]) - * elif len(args) > 0: # <<<<<<<<<<<<<< - * groupnum = args[0] - * else: - */ - if (unlikely(__pyx_v_args == Py_None)) { - PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_1 = PyTuple_GET_SIZE(((PyObject *)__pyx_v_args)); - __pyx_t_2 = (__pyx_t_1 > 0); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":186 - * return tuple([self.group(i) for i in args]) - * elif len(args) > 0: - * groupnum = args[0] # <<<<<<<<<<<<<< - * else: - * groupnum = 0 - */ - __pyx_t_4 = __Pyx_GetItemInt_Tuple(((PyObject *)__pyx_v_args), 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 186; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_v_groupnum); - __pyx_v_groupnum = __pyx_t_4; - __pyx_t_4 = 0; - goto __pyx_L5; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":188 - * groupnum = args[0] - * else: - * groupnum = 0 # <<<<<<<<<<<<<< - * - * cdef int idx - */ - __Pyx_INCREF(__pyx_int_0); - __Pyx_DECREF(__pyx_v_groupnum); - __pyx_v_groupnum = __pyx_int_0; - } - __pyx_L5:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":192 - * cdef int idx - * - * self.init_groups() # <<<<<<<<<<<<<< - * - * if isinstance(groupnum, basestring): - */ - __pyx_t_4 = ((struct __pyx_vtabstruct_3re2_Match *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->__pyx_vtab)->init_groups(((struct __pyx_obj_3re2_Match *)__pyx_v_self)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 192; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":194 - * self.init_groups() - * - * if isinstance(groupnum, basestring): # <<<<<<<<<<<<<< - * return self.groupdict()[groupnum] - * - */ - __pyx_t_2 = PyObject_IsInstance(__pyx_v_groupnum, __pyx_builtin_basestring); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 194; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":195 - * - * if isinstance(groupnum, basestring): - * return self.groupdict()[groupnum] # <<<<<<<<<<<<<< - * - * idx = groupnum - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_4 = PyObject_GetAttr(__pyx_v_self, __pyx_n_s__groupdict); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 195; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_3 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 195; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyObject_GetItem(__pyx_t_3, __pyx_v_groupnum); if (!__pyx_t_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 195; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_r = __pyx_t_4; - __pyx_t_4 = 0; - goto __pyx_L0; - goto __pyx_L8; - } - __pyx_L8:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":197 - * return self.groupdict()[groupnum] - * - * idx = groupnum # <<<<<<<<<<<<<< - * - * if idx > self.nmatches - 1: - */ - __pyx_t_8 = __Pyx_PyInt_AsInt(__pyx_v_groupnum); if (unlikely((__pyx_t_8 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 197; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_idx = __pyx_t_8; - - /* "/Users/maxiak/pyre2/src/re2.pyx":199 - * idx = groupnum - * - * if idx > self.nmatches - 1: # <<<<<<<<<<<<<< - * raise IndexError("no such group") - * return self._groups[idx] - */ - __pyx_t_2 = (__pyx_v_idx > (((struct __pyx_obj_3re2_Match *)__pyx_v_self)->nmatches - 1)); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":200 - * - * if idx > self.nmatches - 1: - * raise IndexError("no such group") # <<<<<<<<<<<<<< - * return self._groups[idx] - * - */ - __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 200; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_2)); - PyTuple_SET_ITEM(__pyx_t_4, 0, ((PyObject *)__pyx_kp_s_2)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_2)); - __pyx_t_3 = PyObject_Call(__pyx_builtin_IndexError, __pyx_t_4, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 200; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_Raise(__pyx_t_3, 0, 0); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 200; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L9; - } - __pyx_L9:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":201 - * if idx > self.nmatches - 1: - * raise IndexError("no such group") - * return self._groups[idx] # <<<<<<<<<<<<<< - * - * cdef object _convert_positions(self, positions): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_3 = __Pyx_GetItemInt_Tuple(((PyObject *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_groups), __pyx_v_idx, sizeof(int), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 201; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_XDECREF(__pyx_t_5); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_AddTraceback("re2.Match.group"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_DECREF(__pyx_v_args); - __Pyx_DECREF(__pyx_v_groupnum); - __Pyx_DECREF(__pyx_v_i); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":203 - * return self._groups[idx] - * - * cdef object _convert_positions(self, positions): # <<<<<<<<<<<<<< - * cdef char * s = self.match_string - * cdef int cpos = 0 - */ - -static PyObject *__pyx_f_3re2_5Match__convert_positions(struct __pyx_obj_3re2_Match *__pyx_v_self, PyObject *__pyx_v_positions) { - char *__pyx_v_s; - int __pyx_v_cpos; - int __pyx_v_upos; - int __pyx_v_size; - int __pyx_v_c; - PyObject *__pyx_v_new_positions; - long __pyx_v_i; - Py_ssize_t __pyx_v_num_positions; - PyObject *__pyx_r = NULL; - char *__pyx_t_1; - Py_ssize_t __pyx_t_2; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - int __pyx_t_5; - int __pyx_t_6; - PyObject *__pyx_t_7 = NULL; - __Pyx_RefNannySetupContext("_convert_positions"); - __pyx_v_new_positions = ((PyObject *)Py_None); __Pyx_INCREF(Py_None); - - /* "/Users/maxiak/pyre2/src/re2.pyx":204 - * - * cdef object _convert_positions(self, positions): - * cdef char * s = self.match_string # <<<<<<<<<<<<<< - * cdef int cpos = 0 - * cdef int upos = 0 - */ - __pyx_t_1 = PyBytes_AsString(__pyx_v_self->match_string); if (unlikely((!__pyx_t_1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 204; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_s = __pyx_t_1; - - /* "/Users/maxiak/pyre2/src/re2.pyx":205 - * cdef object _convert_positions(self, positions): - * cdef char * s = self.match_string - * cdef int cpos = 0 # <<<<<<<<<<<<<< - * cdef int upos = 0 - * cdef int size = len(self.match_string) - */ - __pyx_v_cpos = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":206 - * cdef char * s = self.match_string - * cdef int cpos = 0 - * cdef int upos = 0 # <<<<<<<<<<<<<< - * cdef int size = len(self.match_string) - * cdef int c - */ - __pyx_v_upos = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":207 - * cdef int cpos = 0 - * cdef int upos = 0 - * cdef int size = len(self.match_string) # <<<<<<<<<<<<<< - * cdef int c - * - */ - __pyx_t_2 = PyObject_Length(__pyx_v_self->match_string); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 207; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_size = __pyx_t_2; - - /* "/Users/maxiak/pyre2/src/re2.pyx":210 - * cdef int c - * - * new_positions = [] # <<<<<<<<<<<<<< - * i = 0 - * num_positions = len(positions) - */ - __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_3)); - __Pyx_DECREF(((PyObject *)__pyx_v_new_positions)); - __pyx_v_new_positions = __pyx_t_3; - __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":211 - * - * new_positions = [] - * i = 0 # <<<<<<<<<<<<<< - * num_positions = len(positions) - * if positions[i] == -1: - */ - __pyx_v_i = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":212 - * new_positions = [] - * i = 0 - * num_positions = len(positions) # <<<<<<<<<<<<<< - * if positions[i] == -1: - * new_positions.append(-1) - */ - __pyx_t_2 = PyObject_Length(__pyx_v_positions); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 212; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_num_positions = __pyx_t_2; - - /* "/Users/maxiak/pyre2/src/re2.pyx":213 - * i = 0 - * num_positions = len(positions) - * if positions[i] == -1: # <<<<<<<<<<<<<< - * new_positions.append(-1) - * inc(i) - */ - __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_positions, __pyx_v_i, sizeof(long), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 213; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyObject_RichCompare(__pyx_t_3, __pyx_int_neg_1, Py_EQ); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 213; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 213; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":214 - * num_positions = len(positions) - * if positions[i] == -1: - * new_positions.append(-1) # <<<<<<<<<<<<<< - * inc(i) - * if i == num_positions: - */ - if (unlikely(__pyx_v_new_positions == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 214; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_6 = PyList_Append(((PyObject *)__pyx_v_new_positions), __pyx_int_neg_1); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 214; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - - /* "/Users/maxiak/pyre2/src/re2.pyx":215 - * if positions[i] == -1: - * new_positions.append(-1) - * inc(i) # <<<<<<<<<<<<<< - * if i == num_positions: - * return new_positions - */ - (++__pyx_v_i); - - /* "/Users/maxiak/pyre2/src/re2.pyx":216 - * new_positions.append(-1) - * inc(i) - * if i == num_positions: # <<<<<<<<<<<<<< - * return new_positions - * if positions[i] == 0: - */ - __pyx_t_5 = (__pyx_v_i == __pyx_v_num_positions); - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":217 - * inc(i) - * if i == num_positions: - * return new_positions # <<<<<<<<<<<<<< - * if positions[i] == 0: - * new_positions.append(0) - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(((PyObject *)__pyx_v_new_positions)); - __pyx_r = ((PyObject *)__pyx_v_new_positions); - goto __pyx_L0; - goto __pyx_L4; - } - __pyx_L4:; - goto __pyx_L3; - } - __pyx_L3:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":218 - * if i == num_positions: - * return new_positions - * if positions[i] == 0: # <<<<<<<<<<<<<< - * new_positions.append(0) - * inc(i) - */ - __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_positions, __pyx_v_i, sizeof(long), PyInt_FromLong); if (!__pyx_t_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_3 = PyObject_RichCompare(__pyx_t_4, __pyx_int_0, Py_EQ); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":219 - * return new_positions - * if positions[i] == 0: - * new_positions.append(0) # <<<<<<<<<<<<<< - * inc(i) - * if i == num_positions: - */ - if (unlikely(__pyx_v_new_positions == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 219; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_6 = PyList_Append(((PyObject *)__pyx_v_new_positions), __pyx_int_0); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 219; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - - /* "/Users/maxiak/pyre2/src/re2.pyx":220 - * if positions[i] == 0: - * new_positions.append(0) - * inc(i) # <<<<<<<<<<<<<< - * if i == num_positions: - * return new_positions - */ - (++__pyx_v_i); - - /* "/Users/maxiak/pyre2/src/re2.pyx":221 - * new_positions.append(0) - * inc(i) - * if i == num_positions: # <<<<<<<<<<<<<< - * return new_positions - * - */ - __pyx_t_5 = (__pyx_v_i == __pyx_v_num_positions); - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":222 - * inc(i) - * if i == num_positions: - * return new_positions # <<<<<<<<<<<<<< - * - * while cpos < size: - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(((PyObject *)__pyx_v_new_positions)); - __pyx_r = ((PyObject *)__pyx_v_new_positions); - goto __pyx_L0; - goto __pyx_L6; - } - __pyx_L6:; - goto __pyx_L5; - } - __pyx_L5:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":224 - * return new_positions - * - * while cpos < size: # <<<<<<<<<<<<<< - * c = s[cpos] - * if c < 0x80: - */ - while (1) { - __pyx_t_5 = (__pyx_v_cpos < __pyx_v_size); - if (!__pyx_t_5) break; - - /* "/Users/maxiak/pyre2/src/re2.pyx":225 - * - * while cpos < size: - * c = s[cpos] # <<<<<<<<<<<<<< - * if c < 0x80: - * inc(cpos) - */ - __pyx_v_c = ((unsigned char)(__pyx_v_s[__pyx_v_cpos])); - - /* "/Users/maxiak/pyre2/src/re2.pyx":226 - * while cpos < size: - * c = s[cpos] - * if c < 0x80: # <<<<<<<<<<<<<< - * inc(cpos) - * inc(upos) - */ - __pyx_t_5 = (__pyx_v_c < 0x80); - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":227 - * c = s[cpos] - * if c < 0x80: - * inc(cpos) # <<<<<<<<<<<<<< - * inc(upos) - * elif c < 0xe0: - */ - (++__pyx_v_cpos); - - /* "/Users/maxiak/pyre2/src/re2.pyx":228 - * if c < 0x80: - * inc(cpos) - * inc(upos) # <<<<<<<<<<<<<< - * elif c < 0xe0: - * cpos += 2 - */ - (++__pyx_v_upos); - goto __pyx_L9; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":229 - * inc(cpos) - * inc(upos) - * elif c < 0xe0: # <<<<<<<<<<<<<< - * cpos += 2 - * inc(upos) - */ - __pyx_t_5 = (__pyx_v_c < 0xe0); - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":230 - * inc(upos) - * elif c < 0xe0: - * cpos += 2 # <<<<<<<<<<<<<< - * inc(upos) - * elif c < 0xf0: - */ - __pyx_v_cpos += 2; - - /* "/Users/maxiak/pyre2/src/re2.pyx":231 - * elif c < 0xe0: - * cpos += 2 - * inc(upos) # <<<<<<<<<<<<<< - * elif c < 0xf0: - * cpos += 3 - */ - (++__pyx_v_upos); - goto __pyx_L9; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":232 - * cpos += 2 - * inc(upos) - * elif c < 0xf0: # <<<<<<<<<<<<<< - * cpos += 3 - * inc(upos) - */ - __pyx_t_5 = (__pyx_v_c < 0xf0); - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":233 - * inc(upos) - * elif c < 0xf0: - * cpos += 3 # <<<<<<<<<<<<<< - * inc(upos) - * else: - */ - __pyx_v_cpos += 3; - - /* "/Users/maxiak/pyre2/src/re2.pyx":234 - * elif c < 0xf0: - * cpos += 3 - * inc(upos) # <<<<<<<<<<<<<< - * else: - * cpos += 4 - */ - (++__pyx_v_upos); - goto __pyx_L9; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":236 - * inc(upos) - * else: - * cpos += 4 # <<<<<<<<<<<<<< - * inc(upos) - * # wide unicode chars get 2 unichars when python is compiled with --enable-unicode=ucs2 - */ - __pyx_v_cpos += 4; - - /* "/Users/maxiak/pyre2/src/re2.pyx":237 - * else: - * cpos += 4 - * inc(upos) # <<<<<<<<<<<<<< - * # wide unicode chars get 2 unichars when python is compiled with --enable-unicode=ucs2 - * # TODO: verify this - */ - (++__pyx_v_upos); - - /* "/Users/maxiak/pyre2/src/re2.pyx":240 - * # wide unicode chars get 2 unichars when python is compiled with --enable-unicode=ucs2 - * # TODO: verify this - * emit_ifndef_py_unicode_wide() # <<<<<<<<<<<<<< - * inc(upos) - * emit_endif() - */ - #if !defined(Py_UNICODE_WIDE) //(); - - /* "/Users/maxiak/pyre2/src/re2.pyx":241 - * # TODO: verify this - * emit_ifndef_py_unicode_wide() - * inc(upos) # <<<<<<<<<<<<<< - * emit_endif() - * - */ - (++__pyx_v_upos); - - /* "/Users/maxiak/pyre2/src/re2.pyx":242 - * emit_ifndef_py_unicode_wide() - * inc(upos) - * emit_endif() # <<<<<<<<<<<<<< - * - * if positions[i] == cpos: - */ - #endif //(); - } - __pyx_L9:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":244 - * emit_endif() - * - * if positions[i] == cpos: # <<<<<<<<<<<<<< - * new_positions.append(upos) - * inc(i) - */ - __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_positions, __pyx_v_i, sizeof(long), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyInt_FromLong(__pyx_v_cpos); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_7 = PyObject_RichCompare(__pyx_t_3, __pyx_t_4, Py_EQ); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_7); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":245 - * - * if positions[i] == cpos: - * new_positions.append(upos) # <<<<<<<<<<<<<< - * inc(i) - * if i == num_positions: - */ - if (unlikely(__pyx_v_new_positions == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 245; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_7 = PyInt_FromLong(__pyx_v_upos); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 245; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_6 = PyList_Append(((PyObject *)__pyx_v_new_positions), __pyx_t_7); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 245; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":246 - * if positions[i] == cpos: - * new_positions.append(upos) - * inc(i) # <<<<<<<<<<<<<< - * if i == num_positions: - * return new_positions - */ - (++__pyx_v_i); - - /* "/Users/maxiak/pyre2/src/re2.pyx":247 - * new_positions.append(upos) - * inc(i) - * if i == num_positions: # <<<<<<<<<<<<<< - * return new_positions - * - */ - __pyx_t_5 = (__pyx_v_i == __pyx_v_num_positions); - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":248 - * inc(i) - * if i == num_positions: - * return new_positions # <<<<<<<<<<<<<< - * - * def _convert_spans(self, spans): - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(((PyObject *)__pyx_v_new_positions)); - __pyx_r = ((PyObject *)__pyx_v_new_positions); - goto __pyx_L0; - goto __pyx_L11; - } - __pyx_L11:; - goto __pyx_L10; - } - __pyx_L10:; - } - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_AddTraceback("re2.Match._convert_positions"); - __pyx_r = 0; - __pyx_L0:; - __Pyx_DECREF(__pyx_v_new_positions); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":250 - * return new_positions - * - * def _convert_spans(self, spans): # <<<<<<<<<<<<<< - * positions = [x for x,y in spans] + [y for x,y in spans] - * positions = sorted(set(positions)) - */ - -static PyObject *__pyx_pf_3re2_5Match__convert_spans(PyObject *__pyx_v_self, PyObject *__pyx_v_spans); /*proto*/ -static PyObject *__pyx_pf_3re2_5Match__convert_spans(PyObject *__pyx_v_self, PyObject *__pyx_v_spans) { - PyObject *__pyx_v_positions; - PyObject *__pyx_v_posdict; - PyObject *__pyx_v_x; - PyObject *__pyx_v_y; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - Py_ssize_t __pyx_t_2; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - PyObject *__pyx_t_5 = NULL; - PyObject *__pyx_t_6 = NULL; - PyObject *__pyx_t_7 = NULL; - PyObject *__pyx_t_8 = NULL; - __Pyx_RefNannySetupContext("_convert_spans"); - __pyx_v_positions = Py_None; __Pyx_INCREF(Py_None); - __pyx_v_posdict = ((PyObject *)Py_None); __Pyx_INCREF(Py_None); - __pyx_v_x = Py_None; __Pyx_INCREF(Py_None); - __pyx_v_y = Py_None; __Pyx_INCREF(Py_None); - - /* "/Users/maxiak/pyre2/src/re2.pyx":251 - * - * def _convert_spans(self, spans): - * positions = [x for x,y in spans] + [y for x,y in spans] # <<<<<<<<<<<<<< - * positions = sorted(set(positions)) - * posdict = dict(zip(positions, self._convert_positions(positions))) - */ - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - if (PyList_CheckExact(__pyx_v_spans) || PyTuple_CheckExact(__pyx_v_spans)) { - __pyx_t_2 = 0; __pyx_t_3 = __pyx_v_spans; __Pyx_INCREF(__pyx_t_3); - } else { - __pyx_t_2 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_spans); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - } - for (;;) { - if (likely(PyList_CheckExact(__pyx_t_3))) { - if (__pyx_t_2 >= PyList_GET_SIZE(__pyx_t_3)) break; - __pyx_t_4 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; - } else if (likely(PyTuple_CheckExact(__pyx_t_3))) { - if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_3)) break; - __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; - } else { - __pyx_t_4 = PyIter_Next(__pyx_t_3); - if (!__pyx_t_4) { - if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - break; - } - __Pyx_GOTREF(__pyx_t_4); - } - if (PyTuple_CheckExact(__pyx_t_4) && likely(PyTuple_GET_SIZE(__pyx_t_4) == 2)) { - PyObject* tuple = __pyx_t_4; - __pyx_t_5 = PyTuple_GET_ITEM(tuple, 0); __Pyx_INCREF(__pyx_t_5); - __pyx_t_6 = PyTuple_GET_ITEM(tuple, 1); __Pyx_INCREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_DECREF(__pyx_v_x); - __pyx_v_x = __pyx_t_5; - __pyx_t_5 = 0; - __Pyx_DECREF(__pyx_v_y); - __pyx_v_y = __pyx_t_6; - __pyx_t_6 = 0; - } else { - __pyx_t_7 = PyObject_GetIter(__pyx_t_4); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_5 = __Pyx_UnpackItem(__pyx_t_7, 0); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_6 = __Pyx_UnpackItem(__pyx_t_7, 1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - if (__Pyx_EndUnpack(__pyx_t_7, 2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __Pyx_DECREF(__pyx_v_x); - __pyx_v_x = __pyx_t_5; - __pyx_t_5 = 0; - __Pyx_DECREF(__pyx_v_y); - __pyx_v_y = __pyx_t_6; - __pyx_t_6 = 0; - } - if (unlikely(PyList_Append(__pyx_t_1, (PyObject*)__pyx_v_x))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_3)); - if (PyList_CheckExact(__pyx_v_spans) || PyTuple_CheckExact(__pyx_v_spans)) { - __pyx_t_2 = 0; __pyx_t_4 = __pyx_v_spans; __Pyx_INCREF(__pyx_t_4); - } else { - __pyx_t_2 = -1; __pyx_t_4 = PyObject_GetIter(__pyx_v_spans); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - } - for (;;) { - if (likely(PyList_CheckExact(__pyx_t_4))) { - if (__pyx_t_2 >= PyList_GET_SIZE(__pyx_t_4)) break; - __pyx_t_6 = PyList_GET_ITEM(__pyx_t_4, __pyx_t_2); __Pyx_INCREF(__pyx_t_6); __pyx_t_2++; - } else if (likely(PyTuple_CheckExact(__pyx_t_4))) { - if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_4)) break; - __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_2); __Pyx_INCREF(__pyx_t_6); __pyx_t_2++; - } else { - __pyx_t_6 = PyIter_Next(__pyx_t_4); - if (!__pyx_t_6) { - if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - break; - } - __Pyx_GOTREF(__pyx_t_6); - } - if (PyTuple_CheckExact(__pyx_t_6) && likely(PyTuple_GET_SIZE(__pyx_t_6) == 2)) { - PyObject* tuple = __pyx_t_6; - __pyx_t_5 = PyTuple_GET_ITEM(tuple, 0); __Pyx_INCREF(__pyx_t_5); - __pyx_t_7 = PyTuple_GET_ITEM(tuple, 1); __Pyx_INCREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __Pyx_DECREF(__pyx_v_x); - __pyx_v_x = __pyx_t_5; - __pyx_t_5 = 0; - __Pyx_DECREF(__pyx_v_y); - __pyx_v_y = __pyx_t_7; - __pyx_t_7 = 0; - } else { - __pyx_t_8 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_8); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_5 = __Pyx_UnpackItem(__pyx_t_8, 0); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_7 = __Pyx_UnpackItem(__pyx_t_8, 1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - if (__Pyx_EndUnpack(__pyx_t_8, 2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __Pyx_DECREF(__pyx_v_x); - __pyx_v_x = __pyx_t_5; - __pyx_t_5 = 0; - __Pyx_DECREF(__pyx_v_y); - __pyx_v_y = __pyx_t_7; - __pyx_t_7 = 0; - } - if (unlikely(PyList_Append(__pyx_t_3, (PyObject*)__pyx_v_y))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyNumber_Add(((PyObject *)__pyx_t_1), ((PyObject *)__pyx_t_3)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_4)); - __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; - __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_v_positions); - __pyx_v_positions = ((PyObject *)__pyx_t_4); - __pyx_t_4 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":252 - * def _convert_spans(self, spans): - * positions = [x for x,y in spans] + [y for x,y in spans] - * positions = sorted(set(positions)) # <<<<<<<<<<<<<< - * posdict = dict(zip(positions, self._convert_positions(positions))) - * - */ - __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 252; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_INCREF(__pyx_v_positions); - PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_v_positions); - __Pyx_GIVEREF(__pyx_v_positions); - __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)&PySet_Type)), __pyx_t_4, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 252; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 252; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_3); - __Pyx_GIVEREF(__pyx_t_3); - __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(__pyx_builtin_sorted, __pyx_t_4, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 252; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_DECREF(__pyx_v_positions); - __pyx_v_positions = __pyx_t_3; - __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":253 - * positions = [x for x,y in spans] + [y for x,y in spans] - * positions = sorted(set(positions)) - * posdict = dict(zip(positions, self._convert_positions(positions))) # <<<<<<<<<<<<<< - * - * return [(posdict[x], posdict[y]) for x,y in spans] - */ - __pyx_t_3 = ((struct __pyx_vtabstruct_3re2_Match *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->__pyx_vtab)->_convert_positions(((struct __pyx_obj_3re2_Match *)__pyx_v_self), __pyx_v_positions); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 253; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 253; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_INCREF(__pyx_v_positions); - PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_v_positions); - __Pyx_GIVEREF(__pyx_v_positions); - PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_t_3); - __Pyx_GIVEREF(__pyx_t_3); - __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(__pyx_builtin_zip, __pyx_t_4, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 253; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 253; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_3); - __Pyx_GIVEREF(__pyx_t_3); - __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)&PyDict_Type)), __pyx_t_4, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 253; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_DECREF(((PyObject *)__pyx_v_posdict)); - __pyx_v_posdict = ((PyObject *)__pyx_t_3); - __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":255 - * posdict = dict(zip(positions, self._convert_positions(positions))) - * - * return [(posdict[x], posdict[y]) for x,y in spans] # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_3)); - if (PyList_CheckExact(__pyx_v_spans) || PyTuple_CheckExact(__pyx_v_spans)) { - __pyx_t_2 = 0; __pyx_t_4 = __pyx_v_spans; __Pyx_INCREF(__pyx_t_4); - } else { - __pyx_t_2 = -1; __pyx_t_4 = PyObject_GetIter(__pyx_v_spans); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - } - for (;;) { - if (likely(PyList_CheckExact(__pyx_t_4))) { - if (__pyx_t_2 >= PyList_GET_SIZE(__pyx_t_4)) break; - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_4, __pyx_t_2); __Pyx_INCREF(__pyx_t_1); __pyx_t_2++; - } else if (likely(PyTuple_CheckExact(__pyx_t_4))) { - if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_4)) break; - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_2); __Pyx_INCREF(__pyx_t_1); __pyx_t_2++; - } else { - __pyx_t_1 = PyIter_Next(__pyx_t_4); - if (!__pyx_t_1) { - if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - break; - } - __Pyx_GOTREF(__pyx_t_1); - } - if (PyTuple_CheckExact(__pyx_t_1) && likely(PyTuple_GET_SIZE(__pyx_t_1) == 2)) { - PyObject* tuple = __pyx_t_1; - __pyx_t_6 = PyTuple_GET_ITEM(tuple, 0); __Pyx_INCREF(__pyx_t_6); - __pyx_t_7 = PyTuple_GET_ITEM(tuple, 1); __Pyx_INCREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_v_x); - __pyx_v_x = __pyx_t_6; - __pyx_t_6 = 0; - __Pyx_DECREF(__pyx_v_y); - __pyx_v_y = __pyx_t_7; - __pyx_t_7 = 0; - } else { - __pyx_t_5 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_6 = __Pyx_UnpackItem(__pyx_t_5, 0); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = __Pyx_UnpackItem(__pyx_t_5, 1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - if (__Pyx_EndUnpack(__pyx_t_5, 2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __Pyx_DECREF(__pyx_v_x); - __pyx_v_x = __pyx_t_6; - __pyx_t_6 = 0; - __Pyx_DECREF(__pyx_v_y); - __pyx_v_y = __pyx_t_7; - __pyx_t_7 = 0; - } - __pyx_t_1 = __Pyx_PyDict_GetItem(((PyObject *)__pyx_v_posdict), __pyx_v_x); if (!__pyx_t_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_7 = __Pyx_PyDict_GetItem(((PyObject *)__pyx_v_posdict), __pyx_v_y); if (!__pyx_t_7) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_6 = PyTuple_New(2); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - PyTuple_SET_ITEM(__pyx_t_6, 1, __pyx_t_7); - __Pyx_GIVEREF(__pyx_t_7); - __pyx_t_1 = 0; - __pyx_t_7 = 0; - if (unlikely(PyList_Append(__pyx_t_3, (PyObject*)__pyx_t_6))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - } - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_INCREF(((PyObject *)__pyx_t_3)); - __pyx_r = ((PyObject *)__pyx_t_3); - __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_XDECREF(__pyx_t_5); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_XDECREF(__pyx_t_8); - __Pyx_AddTraceback("re2.Match._convert_spans"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_DECREF(__pyx_v_positions); - __Pyx_DECREF(__pyx_v_posdict); - __Pyx_DECREF(__pyx_v_x); - __Pyx_DECREF(__pyx_v_y); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":258 - * - * - * cdef _make_spans(self): # <<<<<<<<<<<<<< - * if self._spans is not None: - * return - */ - -static PyObject *__pyx_f_3re2_5Match__make_spans(struct __pyx_obj_3re2_Match *__pyx_v_self) { - int __pyx_v_start; - int __pyx_v_end; - char *__pyx_v_s; - re2::StringPiece *__pyx_v_piece; - PyObject *__pyx_v_spans; - int __pyx_v_i; - PyObject *__pyx_r = NULL; - int __pyx_t_1; - char *__pyx_t_2; - PyObject *__pyx_t_3 = NULL; - int __pyx_t_4; - int __pyx_t_5; - PyObject *__pyx_t_6 = NULL; - PyObject *__pyx_t_7 = NULL; - __Pyx_RefNannySetupContext("_make_spans"); - __pyx_v_spans = Py_None; __Pyx_INCREF(Py_None); - - /* "/Users/maxiak/pyre2/src/re2.pyx":259 - * - * cdef _make_spans(self): - * if self._spans is not None: # <<<<<<<<<<<<<< - * return - * - */ - __pyx_t_1 = (__pyx_v_self->_spans != ((PyObject *)Py_None)); - if (__pyx_t_1) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":260 - * cdef _make_spans(self): - * if self._spans is not None: - * return # <<<<<<<<<<<<<< - * - * cdef int start, end - */ - __Pyx_XDECREF(__pyx_r); - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - goto __pyx_L3; - } - __pyx_L3:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":263 - * - * cdef int start, end - * cdef char * s = self.match_string # <<<<<<<<<<<<<< - * cdef _re2.StringPiece * piece - * - */ - __pyx_t_2 = PyBytes_AsString(__pyx_v_self->match_string); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 263; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_s = __pyx_t_2; - - /* "/Users/maxiak/pyre2/src/re2.pyx":266 - * cdef _re2.StringPiece * piece - * - * spans = [] # <<<<<<<<<<<<<< - * for i in range(self.nmatches): - * if self.matches[i].data() == NULL: - */ - __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 266; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_3)); - __Pyx_DECREF(__pyx_v_spans); - __pyx_v_spans = ((PyObject *)__pyx_t_3); - __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":267 - * - * spans = [] - * for i in range(self.nmatches): # <<<<<<<<<<<<<< - * if self.matches[i].data() == NULL: - * spans.append((-1, -1)) - */ - __pyx_t_4 = __pyx_v_self->nmatches; - for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { - __pyx_v_i = __pyx_t_5; - - /* "/Users/maxiak/pyre2/src/re2.pyx":268 - * spans = [] - * for i in range(self.nmatches): - * if self.matches[i].data() == NULL: # <<<<<<<<<<<<<< - * spans.append((-1, -1)) - * else: - */ - __pyx_t_1 = ((__pyx_v_self->matches[__pyx_v_i]).data() == NULL); - if (__pyx_t_1) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":269 - * for i in range(self.nmatches): - * if self.matches[i].data() == NULL: - * spans.append((-1, -1)) # <<<<<<<<<<<<<< - * else: - * piece = &self.matches[i] - */ - __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 269; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_INCREF(__pyx_int_neg_1); - PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_int_neg_1); - __Pyx_GIVEREF(__pyx_int_neg_1); - __Pyx_INCREF(__pyx_int_neg_1); - PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_int_neg_1); - __Pyx_GIVEREF(__pyx_int_neg_1); - __pyx_t_6 = __Pyx_PyObject_Append(__pyx_v_spans, __pyx_t_3); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 269; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - goto __pyx_L6; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":271 - * spans.append((-1, -1)) - * else: - * piece = &self.matches[i] # <<<<<<<<<<<<<< - * if piece.data() == NULL: - * return (-1, -1) - */ - __pyx_v_piece = (&(__pyx_v_self->matches[__pyx_v_i])); - - /* "/Users/maxiak/pyre2/src/re2.pyx":272 - * else: - * piece = &self.matches[i] - * if piece.data() == NULL: # <<<<<<<<<<<<<< - * return (-1, -1) - * start = piece.data() - s - */ - __pyx_t_1 = (__pyx_v_piece->data() == NULL); - if (__pyx_t_1) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":273 - * piece = &self.matches[i] - * if piece.data() == NULL: - * return (-1, -1) # <<<<<<<<<<<<<< - * start = piece.data() - s - * end = start + piece.length() - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_6 = PyTuple_New(2); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 273; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_INCREF(__pyx_int_neg_1); - PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_int_neg_1); - __Pyx_GIVEREF(__pyx_int_neg_1); - __Pyx_INCREF(__pyx_int_neg_1); - PyTuple_SET_ITEM(__pyx_t_6, 1, __pyx_int_neg_1); - __Pyx_GIVEREF(__pyx_int_neg_1); - __pyx_r = __pyx_t_6; - __pyx_t_6 = 0; - goto __pyx_L0; - goto __pyx_L7; - } - __pyx_L7:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":274 - * if piece.data() == NULL: - * return (-1, -1) - * start = piece.data() - s # <<<<<<<<<<<<<< - * end = start + piece.length() - * spans.append((start, end)) - */ - __pyx_v_start = (__pyx_v_piece->data() - __pyx_v_s); - - /* "/Users/maxiak/pyre2/src/re2.pyx":275 - * return (-1, -1) - * start = piece.data() - s - * end = start + piece.length() # <<<<<<<<<<<<<< - * spans.append((start, end)) - * - */ - __pyx_v_end = (__pyx_v_start + __pyx_v_piece->length()); - - /* "/Users/maxiak/pyre2/src/re2.pyx":276 - * start = piece.data() - s - * end = start + piece.length() - * spans.append((start, end)) # <<<<<<<<<<<<<< - * - * if self.encoded: - */ - __pyx_t_6 = PyInt_FromLong(__pyx_v_start); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_3 = PyInt_FromLong(__pyx_v_end); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_7 = PyTuple_New(2); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_6); - __Pyx_GIVEREF(__pyx_t_6); - PyTuple_SET_ITEM(__pyx_t_7, 1, __pyx_t_3); - __Pyx_GIVEREF(__pyx_t_3); - __pyx_t_6 = 0; - __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Append(__pyx_v_spans, __pyx_t_7); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - } - __pyx_L6:; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":278 - * spans.append((start, end)) - * - * if self.encoded: # <<<<<<<<<<<<<< - * spans = self._convert_spans(spans) - * - */ - if (__pyx_v_self->encoded) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":279 - * - * if self.encoded: - * spans = self._convert_spans(spans) # <<<<<<<<<<<<<< - * - * self._spans = tuple(spans) - */ - __pyx_t_3 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s___convert_spans); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_7 = PyTuple_New(1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_INCREF(__pyx_v_spans); - PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_v_spans); - __Pyx_GIVEREF(__pyx_v_spans); - __pyx_t_6 = PyObject_Call(__pyx_t_3, __pyx_t_7, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __Pyx_DECREF(__pyx_v_spans); - __pyx_v_spans = __pyx_t_6; - __pyx_t_6 = 0; - goto __pyx_L8; - } - __pyx_L8:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":281 - * spans = self._convert_spans(spans) - * - * self._spans = tuple(spans) # <<<<<<<<<<<<<< - * - * property regs: - */ - __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_INCREF(__pyx_v_spans); - PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_v_spans); - __Pyx_GIVEREF(__pyx_v_spans); - __pyx_t_7 = PyObject_Call(((PyObject *)((PyObject*)&PyTuple_Type)), __pyx_t_6, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __Pyx_GIVEREF(__pyx_t_7); - __Pyx_GOTREF(__pyx_v_self->_spans); - __Pyx_DECREF(((PyObject *)__pyx_v_self->_spans)); - __pyx_v_self->_spans = ((PyObject *)__pyx_t_7); - __pyx_t_7 = 0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_AddTraceback("re2.Match._make_spans"); - __pyx_r = 0; - __pyx_L0:; - __Pyx_DECREF(__pyx_v_spans); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":284 - * - * property regs: - * def __get__(self): # <<<<<<<<<<<<<< - * if self._spans is None: - * self._make_spans() - */ - -static PyObject *__pyx_pf_3re2_5Match_4regs___get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pf_3re2_5Match_4regs___get__(PyObject *__pyx_v_self) { - PyObject *__pyx_r = NULL; - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - __Pyx_RefNannySetupContext("__get__"); - - /* "/Users/maxiak/pyre2/src/re2.pyx":285 - * property regs: - * def __get__(self): - * if self._spans is None: # <<<<<<<<<<<<<< - * self._make_spans() - * return self._spans - */ - __pyx_t_1 = (((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_spans == ((PyObject *)Py_None)); - if (__pyx_t_1) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":286 - * def __get__(self): - * if self._spans is None: - * self._make_spans() # <<<<<<<<<<<<<< - * return self._spans - * - */ - __pyx_t_2 = ((struct __pyx_vtabstruct_3re2_Match *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->__pyx_vtab)->_make_spans(((struct __pyx_obj_3re2_Match *)__pyx_v_self)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - goto __pyx_L5; - } - __pyx_L5:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":287 - * if self._spans is None: - * self._make_spans() - * return self._spans # <<<<<<<<<<<<<< - * - * def expand(self, object template): - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(((PyObject *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_spans)); - __pyx_r = ((PyObject *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_spans); - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("re2.Match.regs.__get__"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":289 - * return self._spans - * - * def expand(self, object template): # <<<<<<<<<<<<<< - * # TODO - This can be optimized to work a bit faster in C. - * # Expand a template with groups - */ - -static PyObject *__pyx_pf_3re2_5Match_expand(PyObject *__pyx_v_self, PyObject *__pyx_v_template); /*proto*/ -static PyObject *__pyx_pf_3re2_5Match_expand(PyObject *__pyx_v_self, PyObject *__pyx_v_template) { - PyObject *__pyx_v_items; - PyObject *__pyx_v_i; - PyObject *__pyx_v_item; - PyObject *__pyx_v_name; - PyObject *__pyx_v_rest; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - Py_ssize_t __pyx_t_4; - PyObject *__pyx_t_5 = NULL; - int __pyx_t_6; - PyObject *__pyx_t_7 = NULL; - int __pyx_t_8; - int __pyx_t_9; - PyObject *__pyx_t_10 = NULL; - __Pyx_RefNannySetupContext("expand"); - __pyx_v_items = Py_None; __Pyx_INCREF(Py_None); - __pyx_v_i = Py_None; __Pyx_INCREF(Py_None); - __pyx_v_item = Py_None; __Pyx_INCREF(Py_None); - __pyx_v_name = Py_None; __Pyx_INCREF(Py_None); - __pyx_v_rest = Py_None; __Pyx_INCREF(Py_None); - - /* "/Users/maxiak/pyre2/src/re2.pyx":292 - * # TODO - This can be optimized to work a bit faster in C. - * # Expand a template with groups - * items = template.split('\\') # <<<<<<<<<<<<<< - * for i, item in enumerate(items[1:]): - * if item[0].isdigit(): - */ - __pyx_t_1 = PyObject_GetAttr(__pyx_v_template, __pyx_n_s__split); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 292; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 292; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_3)); - PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_3)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_3)); - __pyx_t_3 = PyObject_Call(__pyx_t_1, __pyx_t_2, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 292; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_v_items); - __pyx_v_items = __pyx_t_3; - __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":293 - * # Expand a template with groups - * items = template.split('\\') - * for i, item in enumerate(items[1:]): # <<<<<<<<<<<<<< - * if item[0].isdigit(): - * # Number group - */ - __Pyx_INCREF(__pyx_int_0); - __pyx_t_3 = __pyx_int_0; - __pyx_t_2 = PySequence_GetSlice(__pyx_v_items, 1, PY_SSIZE_T_MAX); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - if (PyList_CheckExact(__pyx_t_2) || PyTuple_CheckExact(__pyx_t_2)) { - __pyx_t_4 = 0; __pyx_t_1 = __pyx_t_2; __Pyx_INCREF(__pyx_t_1); - } else { - __pyx_t_4 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - } - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - for (;;) { - if (likely(PyList_CheckExact(__pyx_t_1))) { - if (__pyx_t_4 >= PyList_GET_SIZE(__pyx_t_1)) break; - __pyx_t_2 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_4); __Pyx_INCREF(__pyx_t_2); __pyx_t_4++; - } else if (likely(PyTuple_CheckExact(__pyx_t_1))) { - if (__pyx_t_4 >= PyTuple_GET_SIZE(__pyx_t_1)) break; - __pyx_t_2 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_4); __Pyx_INCREF(__pyx_t_2); __pyx_t_4++; - } else { - __pyx_t_2 = PyIter_Next(__pyx_t_1); - if (!__pyx_t_2) { - if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - break; - } - __Pyx_GOTREF(__pyx_t_2); - } - __Pyx_DECREF(__pyx_v_item); - __pyx_v_item = __pyx_t_2; - __pyx_t_2 = 0; - __Pyx_INCREF(__pyx_t_3); - __Pyx_DECREF(__pyx_v_i); - __pyx_v_i = __pyx_t_3; - __pyx_t_2 = PyNumber_Add(__pyx_t_3, __pyx_int_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); - __pyx_t_3 = __pyx_t_2; - __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":294 - * items = template.split('\\') - * for i, item in enumerate(items[1:]): - * if item[0].isdigit(): # <<<<<<<<<<<<<< - * # Number group - * if item[0] == '0': - */ - __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_item, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 294; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_5 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__isdigit); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 294; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyObject_Call(__pyx_t_5, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 294; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 294; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (__pyx_t_6) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":296 - * if item[0].isdigit(): - * # Number group - * if item[0] == '0': # <<<<<<<<<<<<<< - * items[i + 1] = '\x00' + item[1:] - * else: - */ - __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_item, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 296; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_5 = PyObject_RichCompare(__pyx_t_2, ((PyObject *)__pyx_kp_s__0), Py_EQ); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 296; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 296; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - if (__pyx_t_6) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":297 - * # Number group - * if item[0] == '0': - * items[i + 1] = '\x00' + item[1:] # <<<<<<<<<<<<<< - * else: - * items[i + 1] = self.group(int(item[0])) + item[1:] - */ - __pyx_t_5 = PySequence_GetSlice(__pyx_v_item, 1, PY_SSIZE_T_MAX); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_2 = PyNumber_Add(((PyObject *)__pyx_kp_s_4), __pyx_t_5); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = PyNumber_Add(__pyx_v_i, __pyx_int_1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - if (PyObject_SetItem(__pyx_v_items, __pyx_t_5, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - goto __pyx_L8; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":299 - * items[i + 1] = '\x00' + item[1:] - * else: - * items[i + 1] = self.group(int(item[0])) + item[1:] # <<<<<<<<<<<<<< - * elif item[:2] == 'g<' and '>' in item: - * # This is a named group - */ - __pyx_t_2 = PyObject_GetAttr(__pyx_v_self, __pyx_n_s__group); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_5 = __Pyx_GetItemInt(__pyx_v_item, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_5) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_7 = PyTuple_New(1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_5); - __Pyx_GIVEREF(__pyx_t_5); - __pyx_t_5 = 0; - __pyx_t_5 = PyObject_Call(((PyObject *)((PyObject*)&PyInt_Type)), __pyx_t_7, NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = PyTuple_New(1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_5); - __Pyx_GIVEREF(__pyx_t_5); - __pyx_t_5 = 0; - __pyx_t_5 = PyObject_Call(__pyx_t_2, __pyx_t_7, NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = PySequence_GetSlice(__pyx_v_item, 1, PY_SSIZE_T_MAX); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_2 = PyNumber_Add(__pyx_t_5, __pyx_t_7); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = PyNumber_Add(__pyx_v_i, __pyx_int_1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - if (PyObject_SetItem(__pyx_v_items, __pyx_t_7, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - } - __pyx_L8:; - goto __pyx_L7; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":300 - * else: - * items[i + 1] = self.group(int(item[0])) + item[1:] - * elif item[:2] == 'g<' and '>' in item: # <<<<<<<<<<<<<< - * # This is a named group - * name, rest = item[2:].split('>', 1) - */ - __pyx_t_2 = PySequence_GetSlice(__pyx_v_item, 0, 2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_7 = PyObject_RichCompare(__pyx_t_2, ((PyObject *)__pyx_kp_s_5), Py_EQ); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_7); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - if (__pyx_t_6) { - __pyx_t_8 = ((PySequence_Contains(__pyx_v_item, ((PyObject *)__pyx_kp_s_6)))); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_9 = __pyx_t_8; - } else { - __pyx_t_9 = __pyx_t_6; - } - if (__pyx_t_9) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":302 - * elif item[:2] == 'g<' and '>' in item: - * # This is a named group - * name, rest = item[2:].split('>', 1) # <<<<<<<<<<<<<< - * items[i + 1] = self.group(name) + rest - * else: - */ - __pyx_t_7 = PySequence_GetSlice(__pyx_v_item, 2, PY_SSIZE_T_MAX); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_2 = PyObject_GetAttr(__pyx_t_7, __pyx_n_s__split); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = PyTuple_New(2); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_6)); - PyTuple_SET_ITEM(__pyx_t_7, 0, ((PyObject *)__pyx_kp_s_6)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_6)); - __Pyx_INCREF(__pyx_int_1); - PyTuple_SET_ITEM(__pyx_t_7, 1, __pyx_int_1); - __Pyx_GIVEREF(__pyx_int_1); - __pyx_t_5 = PyObject_Call(__pyx_t_2, __pyx_t_7, NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - if (PyTuple_CheckExact(__pyx_t_5) && likely(PyTuple_GET_SIZE(__pyx_t_5) == 2)) { - PyObject* tuple = __pyx_t_5; - __pyx_t_7 = PyTuple_GET_ITEM(tuple, 0); __Pyx_INCREF(__pyx_t_7); - __pyx_t_2 = PyTuple_GET_ITEM(tuple, 1); __Pyx_INCREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __Pyx_DECREF(__pyx_v_name); - __pyx_v_name = __pyx_t_7; - __pyx_t_7 = 0; - __Pyx_DECREF(__pyx_v_rest); - __pyx_v_rest = __pyx_t_2; - __pyx_t_2 = 0; - } else { - __pyx_t_10 = PyObject_GetIter(__pyx_t_5); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_10); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_7 = __Pyx_UnpackItem(__pyx_t_10, 0); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_2 = __Pyx_UnpackItem(__pyx_t_10, 1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - if (__Pyx_EndUnpack(__pyx_t_10, 2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __Pyx_DECREF(__pyx_v_name); - __pyx_v_name = __pyx_t_7; - __pyx_t_7 = 0; - __Pyx_DECREF(__pyx_v_rest); - __pyx_v_rest = __pyx_t_2; - __pyx_t_2 = 0; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":303 - * # This is a named group - * name, rest = item[2:].split('>', 1) - * items[i + 1] = self.group(name) + rest # <<<<<<<<<<<<<< - * else: - * # This isn't a template at all - */ - __pyx_t_5 = PyObject_GetAttr(__pyx_v_self, __pyx_n_s__group); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 303; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 303; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(__pyx_v_name); - PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_name); - __Pyx_GIVEREF(__pyx_v_name); - __pyx_t_7 = PyObject_Call(__pyx_t_5, __pyx_t_2, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 303; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyNumber_Add(__pyx_t_7, __pyx_v_rest); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 303; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = PyNumber_Add(__pyx_v_i, __pyx_int_1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 303; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - if (PyObject_SetItem(__pyx_v_items, __pyx_t_7, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 303; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - goto __pyx_L7; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":306 - * else: - * # This isn't a template at all - * items[i + 1] = '\\' + item # <<<<<<<<<<<<<< - * return ''.join(items) - * - */ - __pyx_t_2 = PyNumber_Add(((PyObject *)__pyx_kp_s_3), __pyx_v_item); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_7 = PyNumber_Add(__pyx_v_i, __pyx_int_1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - if (PyObject_SetItem(__pyx_v_items, __pyx_t_7, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - } - __pyx_L7:; - } - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":307 - * # This isn't a template at all - * items[i + 1] = '\\' + item - * return ''.join(items) # <<<<<<<<<<<<<< - * - * def groupdict(self): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_3 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_7), __pyx_n_s__join); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(__pyx_v_items); - PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_items); - __Pyx_GIVEREF(__pyx_v_items); - __pyx_t_2 = PyObject_Call(__pyx_t_3, __pyx_t_1, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_5); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_XDECREF(__pyx_t_10); - __Pyx_AddTraceback("re2.Match.expand"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_DECREF(__pyx_v_items); - __Pyx_DECREF(__pyx_v_i); - __Pyx_DECREF(__pyx_v_item); - __Pyx_DECREF(__pyx_v_name); - __Pyx_DECREF(__pyx_v_rest); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":309 - * return ''.join(items) - * - * def groupdict(self): # <<<<<<<<<<<<<< - * cdef _re2.stringintmapiterator it - * cdef dict result = {} - */ - -static PyObject *__pyx_pf_3re2_5Match_groupdict(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/ -static PyObject *__pyx_pf_3re2_5Match_groupdict(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) { - std::map::const_iterator __pyx_v_it; - PyObject *__pyx_v_result = 0; - PyObject *__pyx_v_indexes = 0; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - int __pyx_t_2; - PyObject *__pyx_t_3 = NULL; - int __pyx_t_4; - __Pyx_RefNannySetupContext("groupdict"); - - /* "/Users/maxiak/pyre2/src/re2.pyx":311 - * def groupdict(self): - * cdef _re2.stringintmapiterator it - * cdef dict result = {} # <<<<<<<<<<<<<< - * cdef dict indexes = {} - * - */ - __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 311; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - __pyx_v_result = __pyx_t_1; - __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":312 - * cdef _re2.stringintmapiterator it - * cdef dict result = {} - * cdef dict indexes = {} # <<<<<<<<<<<<<< - * - * self.init_groups() - */ - __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - __pyx_v_indexes = __pyx_t_1; - __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":314 - * cdef dict indexes = {} - * - * self.init_groups() # <<<<<<<<<<<<<< - * - * if self._named_groups: - */ - __pyx_t_1 = ((struct __pyx_vtabstruct_3re2_Match *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->__pyx_vtab)->init_groups(((struct __pyx_obj_3re2_Match *)__pyx_v_self)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":316 - * self.init_groups() - * - * if self._named_groups: # <<<<<<<<<<<<<< - * return self._named_groups - * - */ - __pyx_t_2 = __Pyx_PyObject_IsTrue(((PyObject *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_named_groups)); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 316; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":317 - * - * if self._named_groups: - * return self._named_groups # <<<<<<<<<<<<<< - * - * self._named_groups = result - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(((PyObject *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_named_groups)); - __pyx_r = ((PyObject *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_named_groups); - goto __pyx_L0; - goto __pyx_L5; - } - __pyx_L5:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":319 - * return self._named_groups - * - * self._named_groups = result # <<<<<<<<<<<<<< - * it = self.named_groups.begin() - * while it != self.named_groups.end(): - */ - __Pyx_INCREF(((PyObject *)__pyx_v_result)); - __Pyx_GIVEREF(((PyObject *)__pyx_v_result)); - __Pyx_GOTREF(((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_named_groups); - __Pyx_DECREF(((PyObject *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_named_groups)); - ((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_named_groups = __pyx_v_result; - - /* "/Users/maxiak/pyre2/src/re2.pyx":320 - * - * self._named_groups = result - * it = self.named_groups.begin() # <<<<<<<<<<<<<< - * while it != self.named_groups.end(): - * indexes[cpp_to_pystring(deref(it).first)] = deref(it).second - */ - __pyx_v_it = ((struct __pyx_obj_3re2_Match *)__pyx_v_self)->named_groups->begin(); - - /* "/Users/maxiak/pyre2/src/re2.pyx":321 - * self._named_groups = result - * it = self.named_groups.begin() - * while it != self.named_groups.end(): # <<<<<<<<<<<<<< - * indexes[cpp_to_pystring(deref(it).first)] = deref(it).second - * result[cpp_to_pystring(deref(it).first)] = self._groups[deref(it).second] - */ - while (1) { - __pyx_t_2 = (__pyx_v_it != ((struct __pyx_obj_3re2_Match *)__pyx_v_self)->named_groups->end()); - if (!__pyx_t_2) break; - - /* "/Users/maxiak/pyre2/src/re2.pyx":322 - * it = self.named_groups.begin() - * while it != self.named_groups.end(): - * indexes[cpp_to_pystring(deref(it).first)] = deref(it).second # <<<<<<<<<<<<<< - * result[cpp_to_pystring(deref(it).first)] = self._groups[deref(it).second] - * inc(it) - */ - __pyx_t_1 = PyInt_FromLong((*__pyx_v_it).second); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 322; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __pyx_f_3re2_cpp_to_pystring((*__pyx_v_it).first); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 322; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - if (PyDict_SetItem(((PyObject *)__pyx_v_indexes), __pyx_t_3, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 322; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":323 - * while it != self.named_groups.end(): - * indexes[cpp_to_pystring(deref(it).first)] = deref(it).second - * result[cpp_to_pystring(deref(it).first)] = self._groups[deref(it).second] # <<<<<<<<<<<<<< - * inc(it) - * - */ - __pyx_t_4 = (*__pyx_v_it).second; - __pyx_t_1 = __Pyx_GetItemInt_Tuple(((PyObject *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_groups), __pyx_t_4, sizeof(int), PyInt_FromLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __pyx_f_3re2_cpp_to_pystring((*__pyx_v_it).first); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - if (PyDict_SetItem(((PyObject *)__pyx_v_result), __pyx_t_3, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":324 - * indexes[cpp_to_pystring(deref(it).first)] = deref(it).second - * result[cpp_to_pystring(deref(it).first)] = self._groups[deref(it).second] - * inc(it) # <<<<<<<<<<<<<< - * - * self._named_groups = result - */ - (++__pyx_v_it); - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":326 - * inc(it) - * - * self._named_groups = result # <<<<<<<<<<<<<< - * self._named_indexes = indexes - * return result - */ - __Pyx_INCREF(((PyObject *)__pyx_v_result)); - __Pyx_GIVEREF(((PyObject *)__pyx_v_result)); - __Pyx_GOTREF(((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_named_groups); - __Pyx_DECREF(((PyObject *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_named_groups)); - ((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_named_groups = __pyx_v_result; - - /* "/Users/maxiak/pyre2/src/re2.pyx":327 - * - * self._named_groups = result - * self._named_indexes = indexes # <<<<<<<<<<<<<< - * return result - * - */ - __Pyx_INCREF(((PyObject *)__pyx_v_indexes)); - __Pyx_GIVEREF(((PyObject *)__pyx_v_indexes)); - __Pyx_GOTREF(((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_named_indexes); - __Pyx_DECREF(((PyObject *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_named_indexes)); - ((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_named_indexes = __pyx_v_indexes; - - /* "/Users/maxiak/pyre2/src/re2.pyx":328 - * self._named_groups = result - * self._named_indexes = indexes - * return result # <<<<<<<<<<<<<< - * - * def end(self, group=0): - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(((PyObject *)__pyx_v_result)); - __pyx_r = ((PyObject *)__pyx_v_result); - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("re2.Match.groupdict"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_result); - __Pyx_XDECREF(__pyx_v_indexes); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":330 - * return result - * - * def end(self, group=0): # <<<<<<<<<<<<<< - * return self.span(group)[1] - * - */ - -static PyObject *__pyx_pf_3re2_5Match_end(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static PyObject *__pyx_pf_3re2_5Match_end(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_group = 0; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__group,0}; - __Pyx_RefNannySetupContext("end"); - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[1] = {0}; - values[0] = ((PyObject *)__pyx_int_0); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__group); - if (value) { values[0] = value; kw_args--; } - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "end") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 330; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_group = values[0]; - } else { - __pyx_v_group = ((PyObject *)__pyx_int_0); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 1: __pyx_v_group = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("end", 0, 0, 1, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 330; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.Match.end"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":331 - * - * def end(self, group=0): - * return self.span(group)[1] # <<<<<<<<<<<<<< - * - * def start(self, group=0): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyObject_GetAttr(__pyx_v_self, __pyx_n_s__span); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(__pyx_v_group); - PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_group); - __Pyx_GIVEREF(__pyx_v_group); - __pyx_t_3 = PyObject_Call(__pyx_t_1, __pyx_t_2, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = __Pyx_GetItemInt(__pyx_t_3, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("re2.Match.end"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":333 - * return self.span(group)[1] - * - * def start(self, group=0): # <<<<<<<<<<<<<< - * return self.span(group)[0] - * - */ - -static PyObject *__pyx_pf_3re2_5Match_start(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static PyObject *__pyx_pf_3re2_5Match_start(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_group = 0; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__group,0}; - __Pyx_RefNannySetupContext("start"); - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[1] = {0}; - values[0] = ((PyObject *)__pyx_int_0); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__group); - if (value) { values[0] = value; kw_args--; } - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "start") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 333; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_group = values[0]; - } else { - __pyx_v_group = ((PyObject *)__pyx_int_0); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 1: __pyx_v_group = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("start", 0, 0, 1, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 333; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.Match.start"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":334 - * - * def start(self, group=0): - * return self.span(group)[0] # <<<<<<<<<<<<<< - * - * def span(self, group=0): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyObject_GetAttr(__pyx_v_self, __pyx_n_s__span); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(__pyx_v_group); - PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_group); - __Pyx_GIVEREF(__pyx_v_group); - __pyx_t_3 = PyObject_Call(__pyx_t_1, __pyx_t_2, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = __Pyx_GetItemInt(__pyx_t_3, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("re2.Match.start"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":336 - * return self.span(group)[0] - * - * def span(self, group=0): # <<<<<<<<<<<<<< - * self._make_spans() - * if type(group) is int: - */ - -static PyObject *__pyx_pf_3re2_5Match_span(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static PyObject *__pyx_pf_3re2_5Match_span(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_group = 0; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - int __pyx_t_2; - Py_ssize_t __pyx_t_3; - PyObject *__pyx_t_4 = NULL; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__group,0}; - __Pyx_RefNannySetupContext("span"); - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[1] = {0}; - values[0] = ((PyObject *)__pyx_int_0); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__group); - if (value) { values[0] = value; kw_args--; } - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "span") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 336; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_group = values[0]; - } else { - __pyx_v_group = ((PyObject *)__pyx_int_0); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 1: __pyx_v_group = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("span", 0, 0, 1, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 336; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.Match.span"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":337 - * - * def span(self, group=0): - * self._make_spans() # <<<<<<<<<<<<<< - * if type(group) is int: - * if group > len(self._spans): - */ - __pyx_t_1 = ((struct __pyx_vtabstruct_3re2_Match *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->__pyx_vtab)->_make_spans(((struct __pyx_obj_3re2_Match *)__pyx_v_self)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 337; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":338 - * def span(self, group=0): - * self._make_spans() - * if type(group) is int: # <<<<<<<<<<<<<< - * if group > len(self._spans): - * raise IndexError("no such group") - */ - __pyx_t_2 = (((PyObject *)Py_TYPE(__pyx_v_group)) == ((PyObject *)((PyObject*)&PyInt_Type))); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":339 - * self._make_spans() - * if type(group) is int: - * if group > len(self._spans): # <<<<<<<<<<<<<< - * raise IndexError("no such group") - * return self._spans[group] - */ - if (unlikely(((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_spans == Py_None)) { - PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 339; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_3 = PyTuple_GET_SIZE(((PyObject *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_spans)); - __pyx_t_1 = PyInt_FromSsize_t(__pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 339; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyObject_RichCompare(__pyx_v_group, __pyx_t_1, Py_GT); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 339; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 339; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":340 - * if type(group) is int: - * if group > len(self._spans): - * raise IndexError("no such group") # <<<<<<<<<<<<<< - * return self._spans[group] - * else: - */ - __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 340; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_2)); - PyTuple_SET_ITEM(__pyx_t_4, 0, ((PyObject *)__pyx_kp_s_2)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_2)); - __pyx_t_1 = PyObject_Call(__pyx_builtin_IndexError, __pyx_t_4, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 340; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_Raise(__pyx_t_1, 0, 0); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 340; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L7; - } - __pyx_L7:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":341 - * if group > len(self._spans): - * raise IndexError("no such group") - * return self._spans[group] # <<<<<<<<<<<<<< - * else: - * self.groupdict() - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyObject_GetItem(((PyObject *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_spans), __pyx_v_group); if (!__pyx_t_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 341; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - goto __pyx_L6; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":343 - * return self._spans[group] - * else: - * self.groupdict() # <<<<<<<<<<<<<< - * if group not in self._named_indexes: - * raise IndexError("no such group") - */ - __pyx_t_1 = PyObject_GetAttr(__pyx_v_self, __pyx_n_s__groupdict); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 343; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 343; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":344 - * else: - * self.groupdict() - * if group not in self._named_indexes: # <<<<<<<<<<<<<< - * raise IndexError("no such group") - * return self._spans[self._named_indexes[group]] - */ - if (unlikely(((PyObject *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_named_indexes) == Py_None)) { - __Pyx_RaiseNoneNotIterableError(); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 344; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } else { - __pyx_t_2 = (__Pyx_NegateNonNeg(PyDict_Contains(((PyObject *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_named_indexes), __pyx_v_group))); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 344; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":345 - * self.groupdict() - * if group not in self._named_indexes: - * raise IndexError("no such group") # <<<<<<<<<<<<<< - * return self._spans[self._named_indexes[group]] - * - */ - __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 345; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_2)); - PyTuple_SET_ITEM(__pyx_t_4, 0, ((PyObject *)__pyx_kp_s_2)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_2)); - __pyx_t_1 = PyObject_Call(__pyx_builtin_IndexError, __pyx_t_4, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 345; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_Raise(__pyx_t_1, 0, 0); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 345; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L8; - } - __pyx_L8:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":346 - * if group not in self._named_indexes: - * raise IndexError("no such group") - * return self._spans[self._named_indexes[group]] # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyDict_GetItem(((PyObject *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_named_indexes), __pyx_v_group); if (!__pyx_t_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyObject_GetItem(((PyObject *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_spans), __pyx_t_1); if (!__pyx_t_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_4; - __pyx_t_4 = 0; - goto __pyx_L0; - } - __pyx_L6:; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_AddTraceback("re2.Match.span"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":350 - * - * property lastindex: - * def __get__(self): # <<<<<<<<<<<<<< - * self.init_groups() - * if self._lastindex < 1: - */ - -static PyObject *__pyx_pf_3re2_5Match_9lastindex___get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pf_3re2_5Match_9lastindex___get__(PyObject *__pyx_v_self) { - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - int __pyx_t_2; - __Pyx_RefNannySetupContext("__get__"); - - /* "/Users/maxiak/pyre2/src/re2.pyx":351 - * property lastindex: - * def __get__(self): - * self.init_groups() # <<<<<<<<<<<<<< - * if self._lastindex < 1: - * return None - */ - __pyx_t_1 = ((struct __pyx_vtabstruct_3re2_Match *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->__pyx_vtab)->init_groups(((struct __pyx_obj_3re2_Match *)__pyx_v_self)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 351; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":352 - * def __get__(self): - * self.init_groups() - * if self._lastindex < 1: # <<<<<<<<<<<<<< - * return None - * else: - */ - __pyx_t_2 = (((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_lastindex < 1); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":353 - * self.init_groups() - * if self._lastindex < 1: - * return None # <<<<<<<<<<<<<< - * else: - * return self._lastindex - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(Py_None); - __pyx_r = Py_None; - goto __pyx_L0; - goto __pyx_L5; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":355 - * return None - * else: - * return self._lastindex # <<<<<<<<<<<<<< - * - * property lastgroup: - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyInt_FromLong(((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_lastindex); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - } - __pyx_L5:; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("re2.Match.lastindex.__get__"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":358 - * - * property lastgroup: - * def __get__(self): # <<<<<<<<<<<<<< - * self.init_groups() - * cdef _re2.stringintmapiterator it - */ - -static PyObject *__pyx_pf_3re2_5Match_9lastgroup___get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pf_3re2_5Match_9lastgroup___get__(PyObject *__pyx_v_self) { - std::map::const_iterator __pyx_v_it; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - int __pyx_t_2; - __Pyx_RefNannySetupContext("__get__"); - - /* "/Users/maxiak/pyre2/src/re2.pyx":359 - * property lastgroup: - * def __get__(self): - * self.init_groups() # <<<<<<<<<<<<<< - * cdef _re2.stringintmapiterator it - * - */ - __pyx_t_1 = ((struct __pyx_vtabstruct_3re2_Match *)((struct __pyx_obj_3re2_Match *)__pyx_v_self)->__pyx_vtab)->init_groups(((struct __pyx_obj_3re2_Match *)__pyx_v_self)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 359; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":362 - * cdef _re2.stringintmapiterator it - * - * if self._lastindex < 1: # <<<<<<<<<<<<<< - * return None - * - */ - __pyx_t_2 = (((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_lastindex < 1); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":363 - * - * if self._lastindex < 1: - * return None # <<<<<<<<<<<<<< - * - * it = self.named_groups.begin() - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(Py_None); - __pyx_r = Py_None; - goto __pyx_L0; - goto __pyx_L5; - } - __pyx_L5:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":365 - * return None - * - * it = self.named_groups.begin() # <<<<<<<<<<<<<< - * while it != self.named_groups.end(): - * if deref(it).second == self._lastindex: - */ - __pyx_v_it = ((struct __pyx_obj_3re2_Match *)__pyx_v_self)->named_groups->begin(); - - /* "/Users/maxiak/pyre2/src/re2.pyx":366 - * - * it = self.named_groups.begin() - * while it != self.named_groups.end(): # <<<<<<<<<<<<<< - * if deref(it).second == self._lastindex: - * return cpp_to_pystring(deref(it).first) - */ - while (1) { - __pyx_t_2 = (__pyx_v_it != ((struct __pyx_obj_3re2_Match *)__pyx_v_self)->named_groups->end()); - if (!__pyx_t_2) break; - - /* "/Users/maxiak/pyre2/src/re2.pyx":367 - * it = self.named_groups.begin() - * while it != self.named_groups.end(): - * if deref(it).second == self._lastindex: # <<<<<<<<<<<<<< - * return cpp_to_pystring(deref(it).first) - * inc(it) - */ - __pyx_t_2 = ((*__pyx_v_it).second == ((struct __pyx_obj_3re2_Match *)__pyx_v_self)->_lastindex); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":368 - * while it != self.named_groups.end(): - * if deref(it).second == self._lastindex: - * return cpp_to_pystring(deref(it).first) # <<<<<<<<<<<<<< - * inc(it) - * - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __pyx_f_3re2_cpp_to_pystring((*__pyx_v_it).first); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - goto __pyx_L8; - } - __pyx_L8:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":369 - * if deref(it).second == self._lastindex: - * return cpp_to_pystring(deref(it).first) - * inc(it) # <<<<<<<<<<<<<< - * - * return None - */ - (++__pyx_v_it); - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":371 - * inc(it) - * - * return None # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(Py_None); - __pyx_r = Py_None; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("re2.Match.lastgroup.__get__"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":379 - * cdef bint encoded - * cdef int _flags - * cdef public object pattern # <<<<<<<<<<<<<< - * cdef object __weakref__ - * - */ - -static PyObject *__pyx_pf_3re2_7Pattern_7pattern___get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pf_3re2_7Pattern_7pattern___get__(PyObject *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannySetupContext("__get__"); - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->pattern); - __pyx_r = ((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->pattern; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_3re2_7Pattern_7pattern___set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pf_3re2_7Pattern_7pattern___set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - __Pyx_RefNannySetupContext("__set__"); - __Pyx_INCREF(__pyx_v_value); - __Pyx_GIVEREF(__pyx_v_value); - __Pyx_GOTREF(((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->pattern); - __Pyx_DECREF(((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->pattern); - ((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->pattern = __pyx_v_value; - - __pyx_r = 0; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_3re2_7Pattern_7pattern___del__(PyObject *__pyx_v_self); /*proto*/ -static int __pyx_pf_3re2_7Pattern_7pattern___del__(PyObject *__pyx_v_self) { - int __pyx_r; - __Pyx_RefNannySetupContext("__del__"); - __Pyx_INCREF(Py_None); - __Pyx_GIVEREF(Py_None); - __Pyx_GOTREF(((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->pattern); - __Pyx_DECREF(((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->pattern); - ((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->pattern = Py_None; - - __pyx_r = 0; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":383 - * - * property flags: - * def __get__(self): # <<<<<<<<<<<<<< - * return self._flags - * - */ - -static PyObject *__pyx_pf_3re2_7Pattern_5flags___get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pf_3re2_7Pattern_5flags___get__(PyObject *__pyx_v_self) { - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - __Pyx_RefNannySetupContext("__get__"); - - /* "/Users/maxiak/pyre2/src/re2.pyx":384 - * property flags: - * def __get__(self): - * return self._flags # <<<<<<<<<<<<<< - * - * property groups: - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyInt_FromLong(((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->_flags); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 384; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("re2.Pattern.flags.__get__"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":387 - * - * property groups: - * def __get__(self): # <<<<<<<<<<<<<< - * return self.ngroups - * - */ - -static PyObject *__pyx_pf_3re2_7Pattern_6groups___get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pf_3re2_7Pattern_6groups___get__(PyObject *__pyx_v_self) { - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - __Pyx_RefNannySetupContext("__get__"); - - /* "/Users/maxiak/pyre2/src/re2.pyx":388 - * property groups: - * def __get__(self): - * return self.ngroups # <<<<<<<<<<<<<< - * - * def __dealloc__(self): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyInt_FromLong(((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->ngroups); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 388; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("re2.Pattern.groups.__get__"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":390 - * return self.ngroups - * - * def __dealloc__(self): # <<<<<<<<<<<<<< - * del self.re_pattern - * - */ - -static void __pyx_pf_3re2_7Pattern___dealloc__(PyObject *__pyx_v_self); /*proto*/ -static void __pyx_pf_3re2_7Pattern___dealloc__(PyObject *__pyx_v_self) { - __Pyx_RefNannySetupContext("__dealloc__"); - - /* "/Users/maxiak/pyre2/src/re2.pyx":391 - * - * def __dealloc__(self): - * del self.re_pattern # <<<<<<<<<<<<<< - * - * cdef _search(self, string, int pos, int endpos, _re2.re2_Anchor anchoring): - */ - delete ((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->re_pattern; - - __Pyx_RefNannyFinishContext(); -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":393 - * del self.re_pattern - * - * cdef _search(self, string, int pos, int endpos, _re2.re2_Anchor anchoring): # <<<<<<<<<<<<<< - * """ - * Scan through string looking for a match, and return a corresponding - */ - -static PyObject *__pyx_f_3re2_7Pattern__search(struct __pyx_obj_3re2_Pattern *__pyx_v_self, PyObject *__pyx_v_string, int __pyx_v_pos, int __pyx_v_endpos, RE2::Anchor __pyx_v_anchoring) { - Py_ssize_t __pyx_v_size; - int __pyx_v_result; - char *__pyx_v_cstring; - int __pyx_v_encoded; - re2::StringPiece *__pyx_v_sp; - struct __pyx_obj_3re2_Match *__pyx_v_m = 0; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - int __pyx_t_3; - int __pyx_t_4; - int __pyx_t_5; - Py_ssize_t __pyx_t_6; - __Pyx_RefNannySetupContext("_search"); - __Pyx_INCREF(__pyx_v_string); - - /* "/Users/maxiak/pyre2/src/re2.pyx":401 - * cdef int result - * cdef char * cstring - * cdef int encoded = 0 # <<<<<<<<<<<<<< - * cdef _re2.StringPiece * sp - * cdef Match m = Match(self, self.ngroups + 1) - */ - __pyx_v_encoded = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":403 - * cdef int encoded = 0 - * cdef _re2.StringPiece * sp - * cdef Match m = Match(self, self.ngroups + 1) # <<<<<<<<<<<<<< - * - * if hasattr(string, 'tostring'): - */ - __pyx_t_1 = PyInt_FromLong((__pyx_v_self->ngroups + 1)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(((PyObject *)__pyx_v_self)); - PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_v_self)); - __Pyx_GIVEREF(((PyObject *)__pyx_v_self)); - PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - __pyx_t_1 = 0; - __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3re2_Match)), __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_v_m = ((struct __pyx_obj_3re2_Match *)__pyx_t_1); - __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":405 - * cdef Match m = Match(self, self.ngroups + 1) - * - * if hasattr(string, 'tostring'): # <<<<<<<<<<<<<< - * string = string.tostring() - * - */ - __pyx_t_3 = PyObject_HasAttr(__pyx_v_string, ((PyObject *)__pyx_n_s__tostring)); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 405; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (__pyx_t_3) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":406 - * - * if hasattr(string, 'tostring'): - * string = string.tostring() # <<<<<<<<<<<<<< - * - * string = unicode_to_bytestring(string, &encoded) - */ - __pyx_t_1 = PyObject_GetAttr(__pyx_v_string, __pyx_n_s__tostring); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 406; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 406; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_v_string); - __pyx_v_string = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L3; - } - __pyx_L3:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":408 - * string = string.tostring() - * - * string = unicode_to_bytestring(string, &encoded) # <<<<<<<<<<<<<< - * - * if pystring_to_bytestring(string, &cstring, &size) == -1: - */ - __pyx_t_2 = __pyx_f_3re2_unicode_to_bytestring(__pyx_v_string, (&__pyx_v_encoded)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 408; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_v_string); - __pyx_v_string = __pyx_t_2; - __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":410 - * string = unicode_to_bytestring(string, &encoded) - * - * if pystring_to_bytestring(string, &cstring, &size) == -1: # <<<<<<<<<<<<<< - * raise TypeError("expected string or buffer") - * - */ - __pyx_t_3 = (__pyx_f_3re2_pystring_to_bytestring(__pyx_v_string, (&__pyx_v_cstring), (&__pyx_v_size)) == -1); - if (__pyx_t_3) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":411 - * - * if pystring_to_bytestring(string, &cstring, &size) == -1: - * raise TypeError("expected string or buffer") # <<<<<<<<<<<<<< - * - * if endpos >= 0 and endpos <= pos: - */ - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 411; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_8)); - PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_8)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_8)); - __pyx_t_1 = PyObject_Call(__pyx_builtin_TypeError, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 411; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_Raise(__pyx_t_1, 0, 0); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 411; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L4; - } - __pyx_L4:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":413 - * raise TypeError("expected string or buffer") - * - * if endpos >= 0 and endpos <= pos: # <<<<<<<<<<<<<< - * return None - * - */ - __pyx_t_3 = (__pyx_v_endpos >= 0); - if (__pyx_t_3) { - __pyx_t_4 = (__pyx_v_endpos <= __pyx_v_pos); - __pyx_t_5 = __pyx_t_4; - } else { - __pyx_t_5 = __pyx_t_3; - } - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":414 - * - * if endpos >= 0 and endpos <= pos: - * return None # <<<<<<<<<<<<<< - * - * if endpos >= 0 and endpos < size: - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(Py_None); - __pyx_r = Py_None; - goto __pyx_L0; - goto __pyx_L5; - } - __pyx_L5:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":416 - * return None - * - * if endpos >= 0 and endpos < size: # <<<<<<<<<<<<<< - * size = endpos - * - */ - __pyx_t_5 = (__pyx_v_endpos >= 0); - if (__pyx_t_5) { - __pyx_t_3 = (__pyx_v_endpos < __pyx_v_size); - __pyx_t_4 = __pyx_t_3; - } else { - __pyx_t_4 = __pyx_t_5; - } - if (__pyx_t_4) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":417 - * - * if endpos >= 0 and endpos < size: - * size = endpos # <<<<<<<<<<<<<< - * - * if pos > size: - */ - __pyx_v_size = __pyx_v_endpos; - goto __pyx_L6; - } - __pyx_L6:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":419 - * size = endpos - * - * if pos > size: # <<<<<<<<<<<<<< - * return None - * - */ - __pyx_t_4 = (__pyx_v_pos > __pyx_v_size); - if (__pyx_t_4) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":420 - * - * if pos > size: - * return None # <<<<<<<<<<<<<< - * - * sp = new _re2.StringPiece(cstring, size) - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(Py_None); - __pyx_r = Py_None; - goto __pyx_L0; - goto __pyx_L7; - } - __pyx_L7:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":422 - * return None - * - * sp = new _re2.StringPiece(cstring, size) # <<<<<<<<<<<<<< - * with nogil: - * result = self.re_pattern.Match(sp[0], pos, size, anchoring, m.matches, self.ngroups + 1) - */ - __pyx_v_sp = new re2::StringPiece(__pyx_v_cstring, __pyx_v_size); - - /* "/Users/maxiak/pyre2/src/re2.pyx":423 - * - * sp = new _re2.StringPiece(cstring, size) - * with nogil: # <<<<<<<<<<<<<< - * result = self.re_pattern.Match(sp[0], pos, size, anchoring, m.matches, self.ngroups + 1) - * - */ - { PyThreadState *_save; - Py_UNBLOCK_THREADS - /*try:*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":424 - * sp = new _re2.StringPiece(cstring, size) - * with nogil: - * result = self.re_pattern.Match(sp[0], pos, size, anchoring, m.matches, self.ngroups + 1) # <<<<<<<<<<<<<< - * - * del sp - */ - __pyx_v_result = __pyx_v_self->re_pattern->Match((__pyx_v_sp[0]), __pyx_v_pos, ((int)__pyx_v_size), __pyx_v_anchoring, __pyx_v_m->matches, (__pyx_v_self->ngroups + 1)); - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":423 - * - * sp = new _re2.StringPiece(cstring, size) - * with nogil: # <<<<<<<<<<<<<< - * result = self.re_pattern.Match(sp[0], pos, size, anchoring, m.matches, self.ngroups + 1) - * - */ - /*finally:*/ { - Py_BLOCK_THREADS - } - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":426 - * result = self.re_pattern.Match(sp[0], pos, size, anchoring, m.matches, self.ngroups + 1) - * - * del sp # <<<<<<<<<<<<<< - * if result == 0: - * return None - */ - delete __pyx_v_sp; - - /* "/Users/maxiak/pyre2/src/re2.pyx":427 - * - * del sp - * if result == 0: # <<<<<<<<<<<<<< - * return None - * m.encoded = (encoded) - */ - __pyx_t_4 = (__pyx_v_result == 0); - if (__pyx_t_4) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":428 - * del sp - * if result == 0: - * return None # <<<<<<<<<<<<<< - * m.encoded = (encoded) - * m.named_groups = _re2.addressof(self.re_pattern.NamedCapturingGroups()) - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(Py_None); - __pyx_r = Py_None; - goto __pyx_L0; - goto __pyx_L11; - } - __pyx_L11:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":429 - * if result == 0: - * return None - * m.encoded = (encoded) # <<<<<<<<<<<<<< - * m.named_groups = _re2.addressof(self.re_pattern.NamedCapturingGroups()) - * m.nmatches = self.ngroups + 1 - */ - __pyx_v_m->encoded = ((int)__pyx_v_encoded); - - /* "/Users/maxiak/pyre2/src/re2.pyx":430 - * return None - * m.encoded = (encoded) - * m.named_groups = _re2.addressof(self.re_pattern.NamedCapturingGroups()) # <<<<<<<<<<<<<< - * m.nmatches = self.ngroups + 1 - * m.match_string = string - */ - __pyx_v_m->named_groups = addressof(__pyx_v_self->re_pattern->NamedCapturingGroups()); - - /* "/Users/maxiak/pyre2/src/re2.pyx":431 - * m.encoded = (encoded) - * m.named_groups = _re2.addressof(self.re_pattern.NamedCapturingGroups()) - * m.nmatches = self.ngroups + 1 # <<<<<<<<<<<<<< - * m.match_string = string - * m._pos = pos - */ - __pyx_v_m->nmatches = (__pyx_v_self->ngroups + 1); - - /* "/Users/maxiak/pyre2/src/re2.pyx":432 - * m.named_groups = _re2.addressof(self.re_pattern.NamedCapturingGroups()) - * m.nmatches = self.ngroups + 1 - * m.match_string = string # <<<<<<<<<<<<<< - * m._pos = pos - * if endpos == -1: - */ - __Pyx_INCREF(__pyx_v_string); - __Pyx_GIVEREF(__pyx_v_string); - __Pyx_GOTREF(__pyx_v_m->match_string); - __Pyx_DECREF(__pyx_v_m->match_string); - __pyx_v_m->match_string = __pyx_v_string; - - /* "/Users/maxiak/pyre2/src/re2.pyx":433 - * m.nmatches = self.ngroups + 1 - * m.match_string = string - * m._pos = pos # <<<<<<<<<<<<<< - * if endpos == -1: - * m._endpos = len(string) - */ - __pyx_v_m->_pos = __pyx_v_pos; - - /* "/Users/maxiak/pyre2/src/re2.pyx":434 - * m.match_string = string - * m._pos = pos - * if endpos == -1: # <<<<<<<<<<<<<< - * m._endpos = len(string) - * else: - */ - __pyx_t_4 = (__pyx_v_endpos == -1); - if (__pyx_t_4) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":435 - * m._pos = pos - * if endpos == -1: - * m._endpos = len(string) # <<<<<<<<<<<<<< - * else: - * m._endpos = endpos - */ - __pyx_t_6 = PyObject_Length(__pyx_v_string); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 435; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_m->_endpos = __pyx_t_6; - goto __pyx_L12; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":437 - * m._endpos = len(string) - * else: - * m._endpos = endpos # <<<<<<<<<<<<<< - * return m - * - */ - __pyx_v_m->_endpos = __pyx_v_endpos; - } - __pyx_L12:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":438 - * else: - * m._endpos = endpos - * return m # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(((PyObject *)__pyx_v_m)); - __pyx_r = ((PyObject *)__pyx_v_m); - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("re2.Pattern._search"); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF((PyObject *)__pyx_v_m); - __Pyx_DECREF(__pyx_v_string); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":441 - * - * - * def search(self, string, int pos=0, int endpos=-1): # <<<<<<<<<<<<<< - * """ - * Scan through string looking for a match, and return a corresponding - */ - -static PyObject *__pyx_pf_3re2_7Pattern_search(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static char __pyx_doc_3re2_7Pattern_search[] = "\n Scan through string looking for a match, and return a corresponding\n Match instance. Return None if no position in the string matches.\n "; -static PyObject *__pyx_pf_3re2_7Pattern_search(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_string = 0; - int __pyx_v_pos; - int __pyx_v_endpos; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__string,&__pyx_n_s__pos,&__pyx_n_s__endpos,0}; - __Pyx_RefNannySetupContext("search"); - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[3] = {0,0,0}; - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__string); - if (likely(values[0])) kw_args--; - else goto __pyx_L5_argtuple_error; - case 1: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__pos); - if (value) { values[1] = value; kw_args--; } - } - case 2: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__endpos); - if (value) { values[2] = value; kw_args--; } - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "search") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_string = values[0]; - if (values[1]) { - __pyx_v_pos = __Pyx_PyInt_AsInt(values[1]); if (unlikely((__pyx_v_pos == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_pos = ((int)0); - } - if (values[2]) { - __pyx_v_endpos = __Pyx_PyInt_AsInt(values[2]); if (unlikely((__pyx_v_endpos == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_endpos = ((int)-1); - } - } else { - __pyx_v_pos = ((int)0); - __pyx_v_endpos = ((int)-1); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: __pyx_v_endpos = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 2)); if (unlikely((__pyx_v_endpos == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 2: __pyx_v_pos = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 1)); if (unlikely((__pyx_v_pos == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 1: __pyx_v_string = PyTuple_GET_ITEM(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("search", 0, 1, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.Pattern.search"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":446 - * Match instance. Return None if no position in the string matches. - * """ - * return self._search(string, pos, endpos, _re2.UNANCHORED) # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = ((struct __pyx_vtabstruct_3re2_Pattern *)((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->__pyx_vtab)->_search(((struct __pyx_obj_3re2_Pattern *)__pyx_v_self), __pyx_v_string, __pyx_v_pos, __pyx_v_endpos, RE2::UNANCHORED); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 446; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("re2.Pattern.search"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":449 - * - * - * def match(self, string, int pos=0, int endpos=-1): # <<<<<<<<<<<<<< - * """ - * Matches zero or more characters at the beginning of the string. - */ - -static PyObject *__pyx_pf_3re2_7Pattern_match(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static char __pyx_doc_3re2_7Pattern_match[] = "\n Matches zero or more characters at the beginning of the string.\n "; -static PyObject *__pyx_pf_3re2_7Pattern_match(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_string = 0; - int __pyx_v_pos; - int __pyx_v_endpos; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__string,&__pyx_n_s__pos,&__pyx_n_s__endpos,0}; - __Pyx_RefNannySetupContext("match"); - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[3] = {0,0,0}; - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__string); - if (likely(values[0])) kw_args--; - else goto __pyx_L5_argtuple_error; - case 1: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__pos); - if (value) { values[1] = value; kw_args--; } - } - case 2: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__endpos); - if (value) { values[2] = value; kw_args--; } - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "match") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 449; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_string = values[0]; - if (values[1]) { - __pyx_v_pos = __Pyx_PyInt_AsInt(values[1]); if (unlikely((__pyx_v_pos == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 449; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_pos = ((int)0); - } - if (values[2]) { - __pyx_v_endpos = __Pyx_PyInt_AsInt(values[2]); if (unlikely((__pyx_v_endpos == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 449; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_endpos = ((int)-1); - } - } else { - __pyx_v_pos = ((int)0); - __pyx_v_endpos = ((int)-1); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: __pyx_v_endpos = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 2)); if (unlikely((__pyx_v_endpos == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 449; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 2: __pyx_v_pos = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 1)); if (unlikely((__pyx_v_pos == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 449; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 1: __pyx_v_string = PyTuple_GET_ITEM(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("match", 0, 1, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 449; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.Pattern.match"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":453 - * Matches zero or more characters at the beginning of the string. - * """ - * return self._search(string, pos, endpos, _re2.ANCHOR_START) # <<<<<<<<<<<<<< - * - * cdef _print_pattern(self): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = ((struct __pyx_vtabstruct_3re2_Pattern *)((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->__pyx_vtab)->_search(((struct __pyx_obj_3re2_Pattern *)__pyx_v_self), __pyx_v_string, __pyx_v_pos, __pyx_v_endpos, RE2::ANCHOR_START); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 453; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("re2.Pattern.match"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":455 - * return self._search(string, pos, endpos, _re2.ANCHOR_START) - * - * cdef _print_pattern(self): # <<<<<<<<<<<<<< - * cdef _re2.cpp_string * s - * s = <_re2.cpp_string *>_re2.addressofs(self.re_pattern.pattern()) - */ - -static PyObject *__pyx_f_3re2_7Pattern__print_pattern(struct __pyx_obj_3re2_Pattern *__pyx_v_self) { - std::string *__pyx_v_s; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - __Pyx_RefNannySetupContext("_print_pattern"); - - /* "/Users/maxiak/pyre2/src/re2.pyx":457 - * cdef _print_pattern(self): - * cdef _re2.cpp_string * s - * s = <_re2.cpp_string *>_re2.addressofs(self.re_pattern.pattern()) # <<<<<<<<<<<<<< - * print cpp_to_pystring(s[0]) + "\n" - * sys.stdout.flush() - */ - __pyx_v_s = ((std::string *)addressofs(__pyx_v_self->re_pattern->pattern())); - - /* "/Users/maxiak/pyre2/src/re2.pyx":458 - * cdef _re2.cpp_string * s - * s = <_re2.cpp_string *>_re2.addressofs(self.re_pattern.pattern()) - * print cpp_to_pystring(s[0]) + "\n" # <<<<<<<<<<<<<< - * sys.stdout.flush() - * - */ - __pyx_t_1 = __pyx_f_3re2_cpp_to_pystring((__pyx_v_s[0])); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 458; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyNumber_Add(__pyx_t_1, ((PyObject *)__pyx_kp_s_10)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 458; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (__Pyx_PrintOne(0, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 458; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":459 - * s = <_re2.cpp_string *>_re2.addressofs(self.re_pattern.pattern()) - * print cpp_to_pystring(s[0]) + "\n" - * sys.stdout.flush() # <<<<<<<<<<<<<< - * - * - */ - __pyx_t_2 = __Pyx_GetName(__pyx_m, __pyx_n_s__sys); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 459; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__stdout); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 459; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__flush); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 459; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 459; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("re2.Pattern._print_pattern"); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":462 - * - * - * cdef _finditer(self, object string, int pos=0, int endpos=-1, int as_match=0): # <<<<<<<<<<<<<< - * cdef Py_ssize_t size - * cdef int result - */ - -static PyObject *__pyx_f_3re2_7Pattern__finditer(struct __pyx_obj_3re2_Pattern *__pyx_v_self, PyObject *__pyx_v_string, struct __pyx_opt_args_3re2_7Pattern__finditer *__pyx_optional_args) { - int __pyx_v_pos = ((int)0); - int __pyx_v_endpos = ((int)-1); - int __pyx_v_as_match = ((int)0); - Py_ssize_t __pyx_v_size; - int __pyx_v_result; - char *__pyx_v_cstring; - re2::StringPiece *__pyx_v_sp; - struct __pyx_obj_3re2_Match *__pyx_v_m; - PyObject *__pyx_v_resultlist = 0; - int __pyx_v_encoded; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - int __pyx_t_2; - PyObject *__pyx_t_3 = NULL; - int __pyx_t_4; - int __pyx_t_5; - Py_ssize_t __pyx_t_6; - PyObject *__pyx_t_7 = NULL; - int __pyx_t_8; - __Pyx_RefNannySetupContext("_finditer"); - if (__pyx_optional_args) { - if (__pyx_optional_args->__pyx_n > 0) { - __pyx_v_pos = __pyx_optional_args->pos; - if (__pyx_optional_args->__pyx_n > 1) { - __pyx_v_endpos = __pyx_optional_args->endpos; - if (__pyx_optional_args->__pyx_n > 2) { - __pyx_v_as_match = __pyx_optional_args->as_match; - } - } - } - } - __Pyx_INCREF(__pyx_v_string); - __pyx_v_m = ((struct __pyx_obj_3re2_Match *)Py_None); __Pyx_INCREF(Py_None); - - /* "/Users/maxiak/pyre2/src/re2.pyx":468 - * cdef _re2.StringPiece * sp - * cdef Match m - * cdef list resultlist = [] # <<<<<<<<<<<<<< - * cdef int encoded = 0 - * - */ - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 468; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - __pyx_v_resultlist = __pyx_t_1; - __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":469 - * cdef Match m - * cdef list resultlist = [] - * cdef int encoded = 0 # <<<<<<<<<<<<<< - * - * string = unicode_to_bytestring(string, &encoded) - */ - __pyx_v_encoded = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":471 - * cdef int encoded = 0 - * - * string = unicode_to_bytestring(string, &encoded) # <<<<<<<<<<<<<< - * if pystring_to_bytestring(string, &cstring, &size) == -1: - * raise TypeError("expected string or buffer") - */ - __pyx_t_1 = __pyx_f_3re2_unicode_to_bytestring(__pyx_v_string, (&__pyx_v_encoded)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 471; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_v_string); - __pyx_v_string = __pyx_t_1; - __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":472 - * - * string = unicode_to_bytestring(string, &encoded) - * if pystring_to_bytestring(string, &cstring, &size) == -1: # <<<<<<<<<<<<<< - * raise TypeError("expected string or buffer") - * encoded = encoded - */ - __pyx_t_2 = (__pyx_f_3re2_pystring_to_bytestring(__pyx_v_string, (&__pyx_v_cstring), (&__pyx_v_size)) == -1); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":473 - * string = unicode_to_bytestring(string, &encoded) - * if pystring_to_bytestring(string, &cstring, &size) == -1: - * raise TypeError("expected string or buffer") # <<<<<<<<<<<<<< - * encoded = encoded - * - */ - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 473; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_8)); - PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_kp_s_8)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_8)); - __pyx_t_3 = PyObject_Call(__pyx_builtin_TypeError, __pyx_t_1, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 473; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_Raise(__pyx_t_3, 0, 0); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 473; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L3; - } - __pyx_L3:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":474 - * if pystring_to_bytestring(string, &cstring, &size) == -1: - * raise TypeError("expected string or buffer") - * encoded = encoded # <<<<<<<<<<<<<< - * - * if endpos != -1 and endpos < size: - */ - __pyx_v_encoded = ((int)__pyx_v_encoded); - - /* "/Users/maxiak/pyre2/src/re2.pyx":476 - * encoded = encoded - * - * if endpos != -1 and endpos < size: # <<<<<<<<<<<<<< - * size = endpos - * - */ - __pyx_t_2 = (__pyx_v_endpos != -1); - if (__pyx_t_2) { - __pyx_t_4 = (__pyx_v_endpos < __pyx_v_size); - __pyx_t_5 = __pyx_t_4; - } else { - __pyx_t_5 = __pyx_t_2; - } - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":477 - * - * if endpos != -1 and endpos < size: - * size = endpos # <<<<<<<<<<<<<< - * - * sp = new _re2.StringPiece(cstring, size) - */ - __pyx_v_size = __pyx_v_endpos; - goto __pyx_L4; - } - __pyx_L4:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":479 - * size = endpos - * - * sp = new _re2.StringPiece(cstring, size) # <<<<<<<<<<<<<< - * - * while True: - */ - __pyx_v_sp = new re2::StringPiece(__pyx_v_cstring, __pyx_v_size); - - /* "/Users/maxiak/pyre2/src/re2.pyx":481 - * sp = new _re2.StringPiece(cstring, size) - * - * while True: # <<<<<<<<<<<<<< - * m = Match(self, self.ngroups + 1) - * with nogil: - */ - while (1) { - if (!1) break; - - /* "/Users/maxiak/pyre2/src/re2.pyx":482 - * - * while True: - * m = Match(self, self.ngroups + 1) # <<<<<<<<<<<<<< - * with nogil: - * result = self.re_pattern.Match(sp[0], pos, size, _re2.UNANCHORED, m.matches, self.ngroups + 1) - */ - __pyx_t_3 = PyInt_FromLong((__pyx_v_self->ngroups + 1)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 482; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 482; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(((PyObject *)__pyx_v_self)); - PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_v_self)); - __Pyx_GIVEREF(((PyObject *)__pyx_v_self)); - PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_t_3); - __Pyx_GIVEREF(__pyx_t_3); - __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3re2_Match)), __pyx_t_1, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 482; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(((PyObject *)__pyx_v_m)); - __pyx_v_m = ((struct __pyx_obj_3re2_Match *)__pyx_t_3); - __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":483 - * while True: - * m = Match(self, self.ngroups + 1) - * with nogil: # <<<<<<<<<<<<<< - * result = self.re_pattern.Match(sp[0], pos, size, _re2.UNANCHORED, m.matches, self.ngroups + 1) - * if result == 0: - */ - { PyThreadState *_save; - Py_UNBLOCK_THREADS - /*try:*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":484 - * m = Match(self, self.ngroups + 1) - * with nogil: - * result = self.re_pattern.Match(sp[0], pos, size, _re2.UNANCHORED, m.matches, self.ngroups + 1) # <<<<<<<<<<<<<< - * if result == 0: - * break - */ - __pyx_v_result = __pyx_v_self->re_pattern->Match((__pyx_v_sp[0]), __pyx_v_pos, ((int)__pyx_v_size), RE2::UNANCHORED, __pyx_v_m->matches, (__pyx_v_self->ngroups + 1)); - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":483 - * while True: - * m = Match(self, self.ngroups + 1) - * with nogil: # <<<<<<<<<<<<<< - * result = self.re_pattern.Match(sp[0], pos, size, _re2.UNANCHORED, m.matches, self.ngroups + 1) - * if result == 0: - */ - /*finally:*/ { - Py_BLOCK_THREADS - } - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":485 - * with nogil: - * result = self.re_pattern.Match(sp[0], pos, size, _re2.UNANCHORED, m.matches, self.ngroups + 1) - * if result == 0: # <<<<<<<<<<<<<< - * break - * m.encoded = encoded - */ - __pyx_t_5 = (__pyx_v_result == 0); - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":486 - * result = self.re_pattern.Match(sp[0], pos, size, _re2.UNANCHORED, m.matches, self.ngroups + 1) - * if result == 0: - * break # <<<<<<<<<<<<<< - * m.encoded = encoded - * m.named_groups = _re2.addressof(self.re_pattern.NamedCapturingGroups()) - */ - goto __pyx_L6_break; - goto __pyx_L12; - } - __pyx_L12:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":487 - * if result == 0: - * break - * m.encoded = encoded # <<<<<<<<<<<<<< - * m.named_groups = _re2.addressof(self.re_pattern.NamedCapturingGroups()) - * m.nmatches = self.ngroups + 1 - */ - __pyx_v_m->encoded = __pyx_v_encoded; - - /* "/Users/maxiak/pyre2/src/re2.pyx":488 - * break - * m.encoded = encoded - * m.named_groups = _re2.addressof(self.re_pattern.NamedCapturingGroups()) # <<<<<<<<<<<<<< - * m.nmatches = self.ngroups + 1 - * m.match_string = string - */ - __pyx_v_m->named_groups = addressof(__pyx_v_self->re_pattern->NamedCapturingGroups()); - - /* "/Users/maxiak/pyre2/src/re2.pyx":489 - * m.encoded = encoded - * m.named_groups = _re2.addressof(self.re_pattern.NamedCapturingGroups()) - * m.nmatches = self.ngroups + 1 # <<<<<<<<<<<<<< - * m.match_string = string - * m._pos = pos - */ - __pyx_v_m->nmatches = (__pyx_v_self->ngroups + 1); - - /* "/Users/maxiak/pyre2/src/re2.pyx":490 - * m.named_groups = _re2.addressof(self.re_pattern.NamedCapturingGroups()) - * m.nmatches = self.ngroups + 1 - * m.match_string = string # <<<<<<<<<<<<<< - * m._pos = pos - * if endpos == -1: - */ - __Pyx_INCREF(__pyx_v_string); - __Pyx_GIVEREF(__pyx_v_string); - __Pyx_GOTREF(__pyx_v_m->match_string); - __Pyx_DECREF(__pyx_v_m->match_string); - __pyx_v_m->match_string = __pyx_v_string; - - /* "/Users/maxiak/pyre2/src/re2.pyx":491 - * m.nmatches = self.ngroups + 1 - * m.match_string = string - * m._pos = pos # <<<<<<<<<<<<<< - * if endpos == -1: - * m._endpos = len(string) - */ - __pyx_v_m->_pos = __pyx_v_pos; - - /* "/Users/maxiak/pyre2/src/re2.pyx":492 - * m.match_string = string - * m._pos = pos - * if endpos == -1: # <<<<<<<<<<<<<< - * m._endpos = len(string) - * else: - */ - __pyx_t_5 = (__pyx_v_endpos == -1); - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":493 - * m._pos = pos - * if endpos == -1: - * m._endpos = len(string) # <<<<<<<<<<<<<< - * else: - * m._endpos = endpos - */ - __pyx_t_6 = PyObject_Length(__pyx_v_string); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 493; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_m->_endpos = __pyx_t_6; - goto __pyx_L13; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":495 - * m._endpos = len(string) - * else: - * m._endpos = endpos # <<<<<<<<<<<<<< - * if as_match: - * if self.ngroups > 1: - */ - __pyx_v_m->_endpos = __pyx_v_endpos; - } - __pyx_L13:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":496 - * else: - * m._endpos = endpos - * if as_match: # <<<<<<<<<<<<<< - * if self.ngroups > 1: - * resultlist.append(m.groups("")) - */ - if (__pyx_v_as_match) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":497 - * m._endpos = endpos - * if as_match: - * if self.ngroups > 1: # <<<<<<<<<<<<<< - * resultlist.append(m.groups("")) - * else: - */ - __pyx_t_5 = (__pyx_v_self->ngroups > 1); - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":498 - * if as_match: - * if self.ngroups > 1: - * resultlist.append(m.groups("")) # <<<<<<<<<<<<<< - * else: - * resultlist.append(m.group(self.ngroups)) - */ - if (unlikely(__pyx_v_resultlist == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 498; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_3 = PyObject_GetAttr(((PyObject *)__pyx_v_m), __pyx_n_s__groups); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 498; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 498; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_7)); - PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_kp_s_7)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_7)); - __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_t_1, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 498; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_8 = PyList_Append(((PyObject *)__pyx_v_resultlist), __pyx_t_7); if (unlikely(__pyx_t_8 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 498; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - goto __pyx_L15; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":500 - * resultlist.append(m.groups("")) - * else: - * resultlist.append(m.group(self.ngroups)) # <<<<<<<<<<<<<< - * else: - * resultlist.append(m) - */ - if (unlikely(__pyx_v_resultlist == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 500; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_7 = PyObject_GetAttr(((PyObject *)__pyx_v_m), __pyx_n_s__group); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 500; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_1 = PyInt_FromLong(__pyx_v_self->ngroups); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 500; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 500; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - __pyx_t_1 = 0; - __pyx_t_1 = PyObject_Call(__pyx_t_7, __pyx_t_3, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 500; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_8 = PyList_Append(((PyObject *)__pyx_v_resultlist), __pyx_t_1); if (unlikely(__pyx_t_8 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 500; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - } - __pyx_L15:; - goto __pyx_L14; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":502 - * resultlist.append(m.group(self.ngroups)) - * else: - * resultlist.append(m) # <<<<<<<<<<<<<< - * if pos == size: - * break - */ - if (unlikely(__pyx_v_resultlist == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 502; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_8 = PyList_Append(((PyObject *)__pyx_v_resultlist), ((PyObject *)__pyx_v_m)); if (unlikely(__pyx_t_8 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 502; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_L14:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":503 - * else: - * resultlist.append(m) - * if pos == size: # <<<<<<<<<<<<<< - * break - * # offset the pos to move to the next point - */ - __pyx_t_5 = (__pyx_v_pos == __pyx_v_size); - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":504 - * resultlist.append(m) - * if pos == size: - * break # <<<<<<<<<<<<<< - * # offset the pos to move to the next point - * if m.matches[0].length() == 0: - */ - goto __pyx_L6_break; - goto __pyx_L16; - } - __pyx_L16:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":506 - * break - * # offset the pos to move to the next point - * if m.matches[0].length() == 0: # <<<<<<<<<<<<<< - * pos += 1 - * else: - */ - __pyx_t_5 = ((__pyx_v_m->matches[0]).length() == 0); - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":507 - * # offset the pos to move to the next point - * if m.matches[0].length() == 0: - * pos += 1 # <<<<<<<<<<<<<< - * else: - * pos = m.matches[0].data() - cstring + m.matches[0].length() - */ - __pyx_v_pos += 1; - goto __pyx_L17; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":509 - * pos += 1 - * else: - * pos = m.matches[0].data() - cstring + m.matches[0].length() # <<<<<<<<<<<<<< - * del sp - * return resultlist - */ - __pyx_v_pos = (((__pyx_v_m->matches[0]).data() - __pyx_v_cstring) + (__pyx_v_m->matches[0]).length()); - } - __pyx_L17:; - } - __pyx_L6_break:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":510 - * else: - * pos = m.matches[0].data() - cstring + m.matches[0].length() - * del sp # <<<<<<<<<<<<<< - * return resultlist - * - */ - delete __pyx_v_sp; - - /* "/Users/maxiak/pyre2/src/re2.pyx":511 - * pos = m.matches[0].data() - cstring + m.matches[0].length() - * del sp - * return resultlist # <<<<<<<<<<<<<< - * - * def finditer(self, object string, int pos=0, int endpos=-1): - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(((PyObject *)__pyx_v_resultlist)); - __pyx_r = ((PyObject *)__pyx_v_resultlist); - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_AddTraceback("re2.Pattern._finditer"); - __pyx_r = 0; - __pyx_L0:; - __Pyx_DECREF((PyObject *)__pyx_v_m); - __Pyx_XDECREF(__pyx_v_resultlist); - __Pyx_DECREF(__pyx_v_string); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":513 - * return resultlist - * - * def finditer(self, object string, int pos=0, int endpos=-1): # <<<<<<<<<<<<<< - * """ - * Return all non-overlapping matches of pattern in string as a list - */ - -static PyObject *__pyx_pf_3re2_7Pattern_finditer(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static char __pyx_doc_3re2_7Pattern_finditer[] = "\n Return all non-overlapping matches of pattern in string as a list\n of match objects.\n "; -static PyObject *__pyx_pf_3re2_7Pattern_finditer(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_string = 0; - int __pyx_v_pos; - int __pyx_v_endpos; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - struct __pyx_opt_args_3re2_7Pattern__finditer __pyx_t_2; - PyObject *__pyx_t_3 = NULL; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__string,&__pyx_n_s__pos,&__pyx_n_s__endpos,0}; - __Pyx_RefNannySetupContext("finditer"); - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[3] = {0,0,0}; - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__string); - if (likely(values[0])) kw_args--; - else goto __pyx_L5_argtuple_error; - case 1: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__pos); - if (value) { values[1] = value; kw_args--; } - } - case 2: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__endpos); - if (value) { values[2] = value; kw_args--; } - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "finditer") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 513; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_string = values[0]; - if (values[1]) { - __pyx_v_pos = __Pyx_PyInt_AsInt(values[1]); if (unlikely((__pyx_v_pos == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 513; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_pos = ((int)0); - } - if (values[2]) { - __pyx_v_endpos = __Pyx_PyInt_AsInt(values[2]); if (unlikely((__pyx_v_endpos == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 513; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_endpos = ((int)-1); - } - } else { - __pyx_v_pos = ((int)0); - __pyx_v_endpos = ((int)-1); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: __pyx_v_endpos = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 2)); if (unlikely((__pyx_v_endpos == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 513; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 2: __pyx_v_pos = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 1)); if (unlikely((__pyx_v_pos == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 513; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 1: __pyx_v_string = PyTuple_GET_ITEM(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("finditer", 0, 1, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 513; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.Pattern.finditer"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":519 - * """ - * # TODO This builds a list and returns its iterator. Probably could be more memory efficient - * return self._finditer(string, pos, endpos, 0).__iter__() # <<<<<<<<<<<<<< - * - * def findall(self, object string, int pos=0, int endpos=-1): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_2.__pyx_n = 3; - __pyx_t_2.pos = __pyx_v_pos; - __pyx_t_2.endpos = __pyx_v_endpos; - __pyx_t_2.as_match = 0; - __pyx_t_1 = ((struct __pyx_vtabstruct_3re2_Pattern *)((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->__pyx_vtab)->_finditer(((struct __pyx_obj_3re2_Pattern *)__pyx_v_self), __pyx_v_string, &__pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 519; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s____iter__); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 519; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 519; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("re2.Pattern.finditer"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":521 - * return self._finditer(string, pos, endpos, 0).__iter__() - * - * def findall(self, object string, int pos=0, int endpos=-1): # <<<<<<<<<<<<<< - * """ - * Return all non-overlapping matches of pattern in string as a list - */ - -static PyObject *__pyx_pf_3re2_7Pattern_findall(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static char __pyx_doc_3re2_7Pattern_findall[] = "\n Return all non-overlapping matches of pattern in string as a list\n of strings.\n "; -static PyObject *__pyx_pf_3re2_7Pattern_findall(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_string = 0; - int __pyx_v_pos; - int __pyx_v_endpos; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - struct __pyx_opt_args_3re2_7Pattern__finditer __pyx_t_2; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__string,&__pyx_n_s__pos,&__pyx_n_s__endpos,0}; - __Pyx_RefNannySetupContext("findall"); - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[3] = {0,0,0}; - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__string); - if (likely(values[0])) kw_args--; - else goto __pyx_L5_argtuple_error; - case 1: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__pos); - if (value) { values[1] = value; kw_args--; } - } - case 2: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__endpos); - if (value) { values[2] = value; kw_args--; } - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "findall") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 521; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_string = values[0]; - if (values[1]) { - __pyx_v_pos = __Pyx_PyInt_AsInt(values[1]); if (unlikely((__pyx_v_pos == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 521; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_pos = ((int)0); - } - if (values[2]) { - __pyx_v_endpos = __Pyx_PyInt_AsInt(values[2]); if (unlikely((__pyx_v_endpos == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 521; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_endpos = ((int)-1); - } - } else { - __pyx_v_pos = ((int)0); - __pyx_v_endpos = ((int)-1); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: __pyx_v_endpos = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 2)); if (unlikely((__pyx_v_endpos == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 521; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 2: __pyx_v_pos = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 1)); if (unlikely((__pyx_v_pos == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 521; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 1: __pyx_v_string = PyTuple_GET_ITEM(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("findall", 0, 1, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 521; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.Pattern.findall"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":526 - * of strings. - * """ - * return self._finditer(string, pos, endpos, 1) # <<<<<<<<<<<<<< - * - * def split(self, string, int maxsplit=0): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_2.__pyx_n = 3; - __pyx_t_2.pos = __pyx_v_pos; - __pyx_t_2.endpos = __pyx_v_endpos; - __pyx_t_2.as_match = 1; - __pyx_t_1 = ((struct __pyx_vtabstruct_3re2_Pattern *)((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->__pyx_vtab)->_finditer(((struct __pyx_obj_3re2_Pattern *)__pyx_v_self), __pyx_v_string, &__pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 526; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("re2.Pattern.findall"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":528 - * return self._finditer(string, pos, endpos, 1) - * - * def split(self, string, int maxsplit=0): # <<<<<<<<<<<<<< - * """ - * split(string[, maxsplit = 0]) --> list - */ - -static PyObject *__pyx_pf_3re2_7Pattern_split(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static char __pyx_doc_3re2_7Pattern_split[] = "\n split(string[, maxsplit = 0]) --> list\n Split a string by the occurances of the pattern.\n "; -static PyObject *__pyx_pf_3re2_7Pattern_split(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_string = 0; - int __pyx_v_maxsplit; - Py_ssize_t __pyx_v_size; - int __pyx_v_num_groups; - int __pyx_v_result; - int __pyx_v_pos; - int __pyx_v_lookahead; - int __pyx_v_num_split; - char *__pyx_v_cstring; - re2::StringPiece *__pyx_v_sp; - re2::StringPiece *__pyx_v_matches; - PyObject *__pyx_v_resultlist = 0; - int __pyx_v_encoded; - PyObject *__pyx_v_match_start; - PyObject *__pyx_v_match_end; - PyObject *__pyx_v_group; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - int __pyx_t_2; - PyObject *__pyx_t_3 = NULL; - int __pyx_t_4; - Py_ssize_t __pyx_t_5; - Py_ssize_t __pyx_t_6; - Py_ssize_t __pyx_t_7; - int __pyx_t_8; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__string,&__pyx_n_s__maxsplit,0}; - __Pyx_RefNannySetupContext("split"); - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[2] = {0,0}; - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__string); - if (likely(values[0])) kw_args--; - else goto __pyx_L5_argtuple_error; - case 1: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__maxsplit); - if (value) { values[1] = value; kw_args--; } - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "split") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 528; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_string = values[0]; - if (values[1]) { - __pyx_v_maxsplit = __Pyx_PyInt_AsInt(values[1]); if (unlikely((__pyx_v_maxsplit == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 528; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_maxsplit = ((int)0); - } - } else { - __pyx_v_maxsplit = ((int)0); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 2: __pyx_v_maxsplit = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 1)); if (unlikely((__pyx_v_maxsplit == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 528; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 1: __pyx_v_string = PyTuple_GET_ITEM(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("split", 0, 1, 2, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 528; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.Pattern.split"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __Pyx_INCREF(__pyx_v_string); - __pyx_v_match_start = Py_None; __Pyx_INCREF(Py_None); - __pyx_v_match_end = Py_None; __Pyx_INCREF(Py_None); - __pyx_v_group = Py_None; __Pyx_INCREF(Py_None); - - /* "/Users/maxiak/pyre2/src/re2.pyx":534 - * """ - * cdef Py_ssize_t size - * cdef int num_groups = 1 # <<<<<<<<<<<<<< - * cdef int result - * cdef int endpos - */ - __pyx_v_num_groups = 1; - - /* "/Users/maxiak/pyre2/src/re2.pyx":537 - * cdef int result - * cdef int endpos - * cdef int pos = 0 # <<<<<<<<<<<<<< - * cdef int lookahead = 0 - * cdef int num_split = 0 - */ - __pyx_v_pos = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":538 - * cdef int endpos - * cdef int pos = 0 - * cdef int lookahead = 0 # <<<<<<<<<<<<<< - * cdef int num_split = 0 - * cdef char * cstring - */ - __pyx_v_lookahead = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":539 - * cdef int pos = 0 - * cdef int lookahead = 0 - * cdef int num_split = 0 # <<<<<<<<<<<<<< - * cdef char * cstring - * cdef _re2.StringPiece * sp - */ - __pyx_v_num_split = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":544 - * cdef _re2.StringPiece * matches - * cdef Match m - * cdef list resultlist = [] # <<<<<<<<<<<<<< - * cdef int encoded = 0 - * - */ - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 544; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - __pyx_v_resultlist = __pyx_t_1; - __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":545 - * cdef Match m - * cdef list resultlist = [] - * cdef int encoded = 0 # <<<<<<<<<<<<<< - * - * if maxsplit < 0: - */ - __pyx_v_encoded = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":547 - * cdef int encoded = 0 - * - * if maxsplit < 0: # <<<<<<<<<<<<<< - * maxsplit = 0 - * - */ - __pyx_t_2 = (__pyx_v_maxsplit < 0); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":548 - * - * if maxsplit < 0: - * maxsplit = 0 # <<<<<<<<<<<<<< - * - * string = unicode_to_bytestring(string, &encoded) - */ - __pyx_v_maxsplit = 0; - goto __pyx_L6; - } - __pyx_L6:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":550 - * maxsplit = 0 - * - * string = unicode_to_bytestring(string, &encoded) # <<<<<<<<<<<<<< - * if pystring_to_bytestring(string, &cstring, &size) == -1: - * raise TypeError("expected string or buffer") - */ - __pyx_t_1 = __pyx_f_3re2_unicode_to_bytestring(__pyx_v_string, (&__pyx_v_encoded)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_v_string); - __pyx_v_string = __pyx_t_1; - __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":551 - * - * string = unicode_to_bytestring(string, &encoded) - * if pystring_to_bytestring(string, &cstring, &size) == -1: # <<<<<<<<<<<<<< - * raise TypeError("expected string or buffer") - * - */ - __pyx_t_2 = (__pyx_f_3re2_pystring_to_bytestring(__pyx_v_string, (&__pyx_v_cstring), (&__pyx_v_size)) == -1); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":552 - * string = unicode_to_bytestring(string, &encoded) - * if pystring_to_bytestring(string, &cstring, &size) == -1: - * raise TypeError("expected string or buffer") # <<<<<<<<<<<<<< - * - * encoded = encoded - */ - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 552; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_8)); - PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_kp_s_8)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_8)); - __pyx_t_3 = PyObject_Call(__pyx_builtin_TypeError, __pyx_t_1, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 552; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_Raise(__pyx_t_3, 0, 0); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 552; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L7; - } - __pyx_L7:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":554 - * raise TypeError("expected string or buffer") - * - * encoded = encoded # <<<<<<<<<<<<<< - * - * matches = _re2.new_StringPiece_array(self.ngroups + 1) - */ - __pyx_v_encoded = ((int)__pyx_v_encoded); - - /* "/Users/maxiak/pyre2/src/re2.pyx":556 - * encoded = encoded - * - * matches = _re2.new_StringPiece_array(self.ngroups + 1) # <<<<<<<<<<<<<< - * sp = new _re2.StringPiece(cstring, size) - * - */ - __pyx_v_matches = new_StringPiece_array((((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->ngroups + 1)); - - /* "/Users/maxiak/pyre2/src/re2.pyx":557 - * - * matches = _re2.new_StringPiece_array(self.ngroups + 1) - * sp = new _re2.StringPiece(cstring, size) # <<<<<<<<<<<<<< - * - * while True: - */ - __pyx_v_sp = new re2::StringPiece(__pyx_v_cstring, __pyx_v_size); - - /* "/Users/maxiak/pyre2/src/re2.pyx":559 - * sp = new _re2.StringPiece(cstring, size) - * - * while True: # <<<<<<<<<<<<<< - * with nogil: - * result = self.re_pattern.Match(sp[0], (pos + lookahead), size, _re2.UNANCHORED, matches, self.ngroups + 1) - */ - while (1) { - if (!1) break; - - /* "/Users/maxiak/pyre2/src/re2.pyx":560 - * - * while True: - * with nogil: # <<<<<<<<<<<<<< - * result = self.re_pattern.Match(sp[0], (pos + lookahead), size, _re2.UNANCHORED, matches, self.ngroups + 1) - * if result == 0: - */ - { PyThreadState *_save; - Py_UNBLOCK_THREADS - /*try:*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":561 - * while True: - * with nogil: - * result = self.re_pattern.Match(sp[0], (pos + lookahead), size, _re2.UNANCHORED, matches, self.ngroups + 1) # <<<<<<<<<<<<<< - * if result == 0: - * break - */ - __pyx_v_result = ((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->re_pattern->Match((__pyx_v_sp[0]), (__pyx_v_pos + __pyx_v_lookahead), ((int)__pyx_v_size), RE2::UNANCHORED, __pyx_v_matches, (((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->ngroups + 1)); - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":560 - * - * while True: - * with nogil: # <<<<<<<<<<<<<< - * result = self.re_pattern.Match(sp[0], (pos + lookahead), size, _re2.UNANCHORED, matches, self.ngroups + 1) - * if result == 0: - */ - /*finally:*/ { - Py_BLOCK_THREADS - } - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":562 - * with nogil: - * result = self.re_pattern.Match(sp[0], (pos + lookahead), size, _re2.UNANCHORED, matches, self.ngroups + 1) - * if result == 0: # <<<<<<<<<<<<<< - * break - * - */ - __pyx_t_2 = (__pyx_v_result == 0); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":563 - * result = self.re_pattern.Match(sp[0], (pos + lookahead), size, _re2.UNANCHORED, matches, self.ngroups + 1) - * if result == 0: - * break # <<<<<<<<<<<<<< - * - * match_start = matches[0].data() - cstring - */ - goto __pyx_L9_break; - goto __pyx_L15; - } - __pyx_L15:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":565 - * break - * - * match_start = matches[0].data() - cstring # <<<<<<<<<<<<<< - * match_end = match_start + matches[0].length() - * - */ - __pyx_t_3 = PyInt_FromLong(((__pyx_v_matches[0]).data() - __pyx_v_cstring)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 565; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_v_match_start); - __pyx_v_match_start = __pyx_t_3; - __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":566 - * - * match_start = matches[0].data() - cstring - * match_end = match_start + matches[0].length() # <<<<<<<<<<<<<< - * - * # If an empty match, just look ahead until you find something - */ - __pyx_t_3 = PyInt_FromLong((__pyx_v_matches[0]).length()); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 566; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = PyNumber_Add(__pyx_v_match_start, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 566; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_v_match_end); - __pyx_v_match_end = __pyx_t_1; - __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":569 - * - * # If an empty match, just look ahead until you find something - * if match_start == match_end: # <<<<<<<<<<<<<< - * if pos + lookahead == size: - * break - */ - __pyx_t_1 = PyObject_RichCompare(__pyx_v_match_start, __pyx_v_match_end, Py_EQ); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 569; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 569; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":570 - * # If an empty match, just look ahead until you find something - * if match_start == match_end: - * if pos + lookahead == size: # <<<<<<<<<<<<<< - * break - * lookahead += 1 - */ - __pyx_t_2 = ((__pyx_v_pos + __pyx_v_lookahead) == __pyx_v_size); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":571 - * if match_start == match_end: - * if pos + lookahead == size: - * break # <<<<<<<<<<<<<< - * lookahead += 1 - * continue - */ - goto __pyx_L9_break; - goto __pyx_L17; - } - __pyx_L17:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":572 - * if pos + lookahead == size: - * break - * lookahead += 1 # <<<<<<<<<<<<<< - * continue - * - */ - __pyx_v_lookahead += 1; - - /* "/Users/maxiak/pyre2/src/re2.pyx":573 - * break - * lookahead += 1 - * continue # <<<<<<<<<<<<<< - * - * if encoded: - */ - goto __pyx_L8_continue; - goto __pyx_L16; - } - __pyx_L16:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":575 - * continue - * - * if encoded: # <<<<<<<<<<<<<< - * resultlist.append(char_to_utf8(&sp.data()[pos], match_start - pos)) - * else: - */ - if (__pyx_v_encoded) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":576 - * - * if encoded: - * resultlist.append(char_to_utf8(&sp.data()[pos], match_start - pos)) # <<<<<<<<<<<<<< - * else: - * resultlist.append(sp.data()[pos:match_start]) - */ - if (unlikely(__pyx_v_resultlist == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 576; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_1 = PyInt_FromLong(__pyx_v_pos); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 576; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = PyNumber_Subtract(__pyx_v_match_start, __pyx_t_1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 576; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_t_3); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 576; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __pyx_f_3re2_char_to_utf8((&(__pyx_v_sp->data()[__pyx_v_pos])), __pyx_t_4); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 576; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_resultlist), __pyx_t_3); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 576; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - goto __pyx_L18; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":578 - * resultlist.append(char_to_utf8(&sp.data()[pos], match_start - pos)) - * else: - * resultlist.append(sp.data()[pos:match_start]) # <<<<<<<<<<<<<< - * if self.ngroups > 0: - * for group in range(self.ngroups): - */ - if (unlikely(__pyx_v_resultlist == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 578; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_5 = __Pyx_PyIndex_AsSsize_t(__pyx_v_match_start); if (unlikely((__pyx_t_5 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 578; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_3 = PyBytes_FromStringAndSize(__pyx_v_sp->data() + __pyx_v_pos, __pyx_t_5 - __pyx_v_pos); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 578; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_3)); - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_resultlist), ((PyObject *)__pyx_t_3)); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 578; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; - } - __pyx_L18:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":579 - * else: - * resultlist.append(sp.data()[pos:match_start]) - * if self.ngroups > 0: # <<<<<<<<<<<<<< - * for group in range(self.ngroups): - * if matches[group + 1].data() == NULL: - */ - __pyx_t_2 = (((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->ngroups > 0); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":580 - * resultlist.append(sp.data()[pos:match_start]) - * if self.ngroups > 0: - * for group in range(self.ngroups): # <<<<<<<<<<<<<< - * if matches[group + 1].data() == NULL: - * resultlist.append(None) - */ - __pyx_t_3 = PyInt_FromLong(((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->ngroups); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_3); - __Pyx_GIVEREF(__pyx_t_3); - __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(__pyx_builtin_range, __pyx_t_1, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyList_CheckExact(__pyx_t_3) || PyTuple_CheckExact(__pyx_t_3)) { - __pyx_t_5 = 0; __pyx_t_1 = __pyx_t_3; __Pyx_INCREF(__pyx_t_1); - } else { - __pyx_t_5 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - } - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - for (;;) { - if (likely(PyList_CheckExact(__pyx_t_1))) { - if (__pyx_t_5 >= PyList_GET_SIZE(__pyx_t_1)) break; - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_5); __Pyx_INCREF(__pyx_t_3); __pyx_t_5++; - } else if (likely(PyTuple_CheckExact(__pyx_t_1))) { - if (__pyx_t_5 >= PyTuple_GET_SIZE(__pyx_t_1)) break; - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_5); __Pyx_INCREF(__pyx_t_3); __pyx_t_5++; - } else { - __pyx_t_3 = PyIter_Next(__pyx_t_1); - if (!__pyx_t_3) { - if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - break; - } - __Pyx_GOTREF(__pyx_t_3); - } - __Pyx_DECREF(__pyx_v_group); - __pyx_v_group = __pyx_t_3; - __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":581 - * if self.ngroups > 0: - * for group in range(self.ngroups): - * if matches[group + 1].data() == NULL: # <<<<<<<<<<<<<< - * resultlist.append(None) - * else: - */ - __pyx_t_3 = PyNumber_Add(__pyx_v_group, __pyx_int_1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 581; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_6 = __Pyx_PyIndex_AsSsize_t(__pyx_t_3); if (unlikely((__pyx_t_6 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 581; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_2 = ((__pyx_v_matches[__pyx_t_6]).data() == NULL); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":582 - * for group in range(self.ngroups): - * if matches[group + 1].data() == NULL: - * resultlist.append(None) # <<<<<<<<<<<<<< - * else: - * if encoded: - */ - if (unlikely(__pyx_v_resultlist == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_resultlist), Py_None); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L22; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":584 - * resultlist.append(None) - * else: - * if encoded: # <<<<<<<<<<<<<< - * resultlist.append(char_to_utf8(matches[group + 1].data(), matches[group + 1].length())) - * else: - */ - if (__pyx_v_encoded) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":585 - * else: - * if encoded: - * resultlist.append(char_to_utf8(matches[group + 1].data(), matches[group + 1].length())) # <<<<<<<<<<<<<< - * else: - * resultlist.append(matches[group + 1].data()[:matches[group + 1].length()]) - */ - if (unlikely(__pyx_v_resultlist == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 585; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_3 = PyNumber_Add(__pyx_v_group, __pyx_int_1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 585; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_6 = __Pyx_PyIndex_AsSsize_t(__pyx_t_3); if (unlikely((__pyx_t_6 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 585; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyNumber_Add(__pyx_v_group, __pyx_int_1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 585; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_7 = __Pyx_PyIndex_AsSsize_t(__pyx_t_3); if (unlikely((__pyx_t_7 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 585; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __pyx_f_3re2_char_to_utf8((__pyx_v_matches[__pyx_t_6]).data(), (__pyx_v_matches[__pyx_t_7]).length()); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 585; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_resultlist), __pyx_t_3); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 585; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - goto __pyx_L23; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":587 - * resultlist.append(char_to_utf8(matches[group + 1].data(), matches[group + 1].length())) - * else: - * resultlist.append(matches[group + 1].data()[:matches[group + 1].length()]) # <<<<<<<<<<<<<< - * - * # offset the pos to move to the next point - */ - if (unlikely(__pyx_v_resultlist == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 587; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_3 = PyNumber_Add(__pyx_v_group, __pyx_int_1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 587; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_7 = __Pyx_PyIndex_AsSsize_t(__pyx_t_3); if (unlikely((__pyx_t_7 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 587; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyNumber_Add(__pyx_v_group, __pyx_int_1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 587; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_6 = __Pyx_PyIndex_AsSsize_t(__pyx_t_3); if (unlikely((__pyx_t_6 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 587; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyBytes_FromStringAndSize((__pyx_v_matches[__pyx_t_7]).data() + 0, (__pyx_v_matches[__pyx_t_6]).length() - 0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 587; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_3)); - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_resultlist), ((PyObject *)__pyx_t_3)); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 587; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; - } - __pyx_L23:; - } - __pyx_L22:; - } - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - goto __pyx_L19; - } - __pyx_L19:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":590 - * - * # offset the pos to move to the next point - * pos = match_end # <<<<<<<<<<<<<< - * lookahead = 0 - * - */ - __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_v_match_end); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 590; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_pos = __pyx_t_4; - - /* "/Users/maxiak/pyre2/src/re2.pyx":591 - * # offset the pos to move to the next point - * pos = match_end - * lookahead = 0 # <<<<<<<<<<<<<< - * - * num_split += 1 - */ - __pyx_v_lookahead = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":593 - * lookahead = 0 - * - * num_split += 1 # <<<<<<<<<<<<<< - * if maxsplit and num_split >= maxsplit: - * break - */ - __pyx_v_num_split += 1; - - /* "/Users/maxiak/pyre2/src/re2.pyx":594 - * - * num_split += 1 - * if maxsplit and num_split >= maxsplit: # <<<<<<<<<<<<<< - * break - * - */ - if (__pyx_v_maxsplit) { - __pyx_t_2 = (__pyx_v_num_split >= __pyx_v_maxsplit); - __pyx_t_8 = __pyx_t_2; - } else { - __pyx_t_8 = __pyx_v_maxsplit; - } - if (__pyx_t_8) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":595 - * num_split += 1 - * if maxsplit and num_split >= maxsplit: - * break # <<<<<<<<<<<<<< - * - * if encoded: - */ - goto __pyx_L9_break; - goto __pyx_L24; - } - __pyx_L24:; - __pyx_L8_continue:; - } - __pyx_L9_break:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":597 - * break - * - * if encoded: # <<<<<<<<<<<<<< - * resultlist.append(char_to_utf8(&sp.data()[pos], sp.length() - pos)) - * else: - */ - if (__pyx_v_encoded) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":598 - * - * if encoded: - * resultlist.append(char_to_utf8(&sp.data()[pos], sp.length() - pos)) # <<<<<<<<<<<<<< - * else: - * resultlist.append(sp.data()[pos:]) - */ - if (unlikely(__pyx_v_resultlist == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 598; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_1 = __pyx_f_3re2_char_to_utf8((&(__pyx_v_sp->data()[__pyx_v_pos])), (__pyx_v_sp->length() - __pyx_v_pos)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 598; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_resultlist), __pyx_t_1); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 598; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - goto __pyx_L25; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":600 - * resultlist.append(char_to_utf8(&sp.data()[pos], sp.length() - pos)) - * else: - * resultlist.append(sp.data()[pos:]) # <<<<<<<<<<<<<< - * _re2.delete_StringPiece_array(matches) - * del sp - */ - if (unlikely(__pyx_v_resultlist == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 600; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_1 = PyBytes_FromString(__pyx_v_sp->data() + __pyx_v_pos); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 600; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_resultlist), ((PyObject *)__pyx_t_1)); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 600; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; - } - __pyx_L25:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":601 - * else: - * resultlist.append(sp.data()[pos:]) - * _re2.delete_StringPiece_array(matches) # <<<<<<<<<<<<<< - * del sp - * return resultlist - */ - delete_StringPiece_array(__pyx_v_matches); - - /* "/Users/maxiak/pyre2/src/re2.pyx":602 - * resultlist.append(sp.data()[pos:]) - * _re2.delete_StringPiece_array(matches) - * del sp # <<<<<<<<<<<<<< - * return resultlist - * - */ - delete __pyx_v_sp; - - /* "/Users/maxiak/pyre2/src/re2.pyx":603 - * _re2.delete_StringPiece_array(matches) - * del sp - * return resultlist # <<<<<<<<<<<<<< - * - * def sub(self, repl, string, int count=0): - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(((PyObject *)__pyx_v_resultlist)); - __pyx_r = ((PyObject *)__pyx_v_resultlist); - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("re2.Pattern.split"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_resultlist); - __Pyx_DECREF(__pyx_v_match_start); - __Pyx_DECREF(__pyx_v_match_end); - __Pyx_DECREF(__pyx_v_group); - __Pyx_DECREF(__pyx_v_string); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":605 - * return resultlist - * - * def sub(self, repl, string, int count=0): # <<<<<<<<<<<<<< - * """ - * sub(repl, string[, count = 0]) --> newstring - */ - -static PyObject *__pyx_pf_3re2_7Pattern_sub(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static char __pyx_doc_3re2_7Pattern_sub[] = "\n sub(repl, string[, count = 0]) --> newstring\n Return the string obtained by replacing the leftmost non-overlapping\n occurrences of pattern in string by the replacement repl.\n "; -static PyObject *__pyx_pf_3re2_7Pattern_sub(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_repl = 0; - PyObject *__pyx_v_string = 0; - int __pyx_v_count; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__repl,&__pyx_n_s__string,&__pyx_n_s__count,0}; - __Pyx_RefNannySetupContext("sub"); - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[3] = {0,0,0}; - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__repl); - if (likely(values[0])) kw_args--; - else goto __pyx_L5_argtuple_error; - case 1: - values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__string); - if (likely(values[1])) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("sub", 0, 2, 3, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 605; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - case 2: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__count); - if (value) { values[2] = value; kw_args--; } - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "sub") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 605; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_repl = values[0]; - __pyx_v_string = values[1]; - if (values[2]) { - __pyx_v_count = __Pyx_PyInt_AsInt(values[2]); if (unlikely((__pyx_v_count == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 605; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_count = ((int)0); - } - } else { - __pyx_v_count = ((int)0); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: - __pyx_v_count = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 2)); if (unlikely((__pyx_v_count == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 605; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 2: - __pyx_v_string = PyTuple_GET_ITEM(__pyx_args, 1); - __pyx_v_repl = PyTuple_GET_ITEM(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("sub", 0, 2, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 605; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.Pattern.sub"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":611 - * occurrences of pattern in string by the replacement repl. - * """ - * return self.subn(repl, string, count)[0] # <<<<<<<<<<<<<< - * - * def subn(self, repl, string, int count=0): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyObject_GetAttr(__pyx_v_self, __pyx_n_s__subn); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 611; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyInt_FromLong(__pyx_v_count); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 611; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 611; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_INCREF(__pyx_v_repl); - PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_v_repl); - __Pyx_GIVEREF(__pyx_v_repl); - __Pyx_INCREF(__pyx_v_string); - PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_v_string); - __Pyx_GIVEREF(__pyx_v_string); - PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_t_2); - __Pyx_GIVEREF(__pyx_t_2); - __pyx_t_2 = 0; - __pyx_t_2 = PyObject_Call(__pyx_t_1, __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 611; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_GetItemInt(__pyx_t_2, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 611; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("re2.Pattern.sub"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":613 - * return self.subn(repl, string, count)[0] - * - * def subn(self, repl, string, int count=0): # <<<<<<<<<<<<<< - * """ - * subn(repl, string[, count = 0]) --> (newstring, number of subs) - */ - -static PyObject *__pyx_pf_3re2_7Pattern_subn(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static char __pyx_doc_3re2_7Pattern_subn[] = "\n subn(repl, string[, count = 0]) --> (newstring, number of subs)\n Return the tuple (new_string, number_of_subs_made) found by replacing\n the leftmost non-overlapping occurrences of pattern with the\n replacement repl.\n "; -static PyObject *__pyx_pf_3re2_7Pattern_subn(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_repl = 0; - PyObject *__pyx_v_string = 0; - int __pyx_v_count; - Py_ssize_t __pyx_v_size; - char *__pyx_v_cstring; - std::string *__pyx_v_fixed_repl; - re2::StringPiece *__pyx_v_sp; - std::string *__pyx_v_input_str; - PyObject *__pyx_v_total_replacements = 0; - int __pyx_v_string_encoded; - int __pyx_v_repl_encoded; - int __pyx_v_encoded; - const char* __pyx_v_s; - const char* __pyx_v_end; - int __pyx_v_c; - PyObject *__pyx_v_result; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - int __pyx_t_3; - PyObject *__pyx_t_4 = NULL; - int __pyx_t_5; - int __pyx_t_6; - int __pyx_t_7; - char *__pyx_t_8; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__repl,&__pyx_n_s__string,&__pyx_n_s__count,0}; - __Pyx_RefNannySetupContext("subn"); - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[3] = {0,0,0}; - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__repl); - if (likely(values[0])) kw_args--; - else goto __pyx_L5_argtuple_error; - case 1: - values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__string); - if (likely(values[1])) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("subn", 0, 2, 3, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 613; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - case 2: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__count); - if (value) { values[2] = value; kw_args--; } - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "subn") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 613; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_repl = values[0]; - __pyx_v_string = values[1]; - if (values[2]) { - __pyx_v_count = __Pyx_PyInt_AsInt(values[2]); if (unlikely((__pyx_v_count == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 613; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_count = ((int)0); - } - } else { - __pyx_v_count = ((int)0); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: - __pyx_v_count = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 2)); if (unlikely((__pyx_v_count == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 613; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 2: - __pyx_v_string = PyTuple_GET_ITEM(__pyx_args, 1); - __pyx_v_repl = PyTuple_GET_ITEM(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("subn", 0, 2, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 613; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.Pattern.subn"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __Pyx_INCREF(__pyx_v_repl); - __Pyx_INCREF(__pyx_v_string); - __pyx_v_result = Py_None; __Pyx_INCREF(Py_None); - - /* "/Users/maxiak/pyre2/src/re2.pyx":625 - * cdef _re2.StringPiece * sp - * cdef _re2.cpp_string * input_str - * cdef total_replacements = 0 # <<<<<<<<<<<<<< - * cdef int string_encoded = 0 - * cdef int repl_encoded = 0 - */ - __Pyx_INCREF(__pyx_int_0); - __pyx_v_total_replacements = __pyx_int_0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":626 - * cdef _re2.cpp_string * input_str - * cdef total_replacements = 0 - * cdef int string_encoded = 0 # <<<<<<<<<<<<<< - * cdef int repl_encoded = 0 - * cdef int encoded = 0 - */ - __pyx_v_string_encoded = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":627 - * cdef total_replacements = 0 - * cdef int string_encoded = 0 - * cdef int repl_encoded = 0 # <<<<<<<<<<<<<< - * cdef int encoded = 0 - * - */ - __pyx_v_repl_encoded = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":628 - * cdef int string_encoded = 0 - * cdef int repl_encoded = 0 - * cdef int encoded = 0 # <<<<<<<<<<<<<< - * - * if callable(repl): - */ - __pyx_v_encoded = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":630 - * cdef int encoded = 0 - * - * if callable(repl): # <<<<<<<<<<<<<< - * # This is a callback, so let's use the custom function - * return self._subn_callback(repl, string, count) - */ - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 630; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(__pyx_v_repl); - PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_repl); - __Pyx_GIVEREF(__pyx_v_repl); - __pyx_t_2 = PyObject_Call(__pyx_builtin_callable, __pyx_t_1, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 630; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_3 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 630; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (__pyx_t_3) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":632 - * if callable(repl): - * # This is a callback, so let's use the custom function - * return self._subn_callback(repl, string, count) # <<<<<<<<<<<<<< - * - * string = unicode_to_bytestring(string, &string_encoded) - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = PyObject_GetAttr(__pyx_v_self, __pyx_n_s___subn_callback); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 632; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = PyInt_FromLong(__pyx_v_count); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 632; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyTuple_New(3); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 632; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_INCREF(__pyx_v_repl); - PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_v_repl); - __Pyx_GIVEREF(__pyx_v_repl); - __Pyx_INCREF(__pyx_v_string); - PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_v_string); - __Pyx_GIVEREF(__pyx_v_string); - PyTuple_SET_ITEM(__pyx_t_4, 2, __pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - __pyx_t_1 = 0; - __pyx_t_1 = PyObject_Call(__pyx_t_2, __pyx_t_4, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 632; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - goto __pyx_L6; - } - __pyx_L6:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":634 - * return self._subn_callback(repl, string, count) - * - * string = unicode_to_bytestring(string, &string_encoded) # <<<<<<<<<<<<<< - * repl = unicode_to_bytestring(repl, &repl_encoded) - * if pystring_to_bytestring(repl, &cstring, &size) == -1: - */ - __pyx_t_1 = __pyx_f_3re2_unicode_to_bytestring(__pyx_v_string, (&__pyx_v_string_encoded)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 634; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_v_string); - __pyx_v_string = __pyx_t_1; - __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":635 - * - * string = unicode_to_bytestring(string, &string_encoded) - * repl = unicode_to_bytestring(repl, &repl_encoded) # <<<<<<<<<<<<<< - * if pystring_to_bytestring(repl, &cstring, &size) == -1: - * raise TypeError("expected string or buffer") - */ - __pyx_t_1 = __pyx_f_3re2_unicode_to_bytestring(__pyx_v_repl, (&__pyx_v_repl_encoded)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 635; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_v_repl); - __pyx_v_repl = __pyx_t_1; - __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":636 - * string = unicode_to_bytestring(string, &string_encoded) - * repl = unicode_to_bytestring(repl, &repl_encoded) - * if pystring_to_bytestring(repl, &cstring, &size) == -1: # <<<<<<<<<<<<<< - * raise TypeError("expected string or buffer") - * - */ - __pyx_t_3 = (__pyx_f_3re2_pystring_to_bytestring(__pyx_v_repl, (&__pyx_v_cstring), (&__pyx_v_size)) == -1); - if (__pyx_t_3) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":637 - * repl = unicode_to_bytestring(repl, &repl_encoded) - * if pystring_to_bytestring(repl, &cstring, &size) == -1: - * raise TypeError("expected string or buffer") # <<<<<<<<<<<<<< - * - * fixed_repl = NULL - */ - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 637; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_8)); - PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_kp_s_8)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_8)); - __pyx_t_4 = PyObject_Call(__pyx_builtin_TypeError, __pyx_t_1, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 637; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_Raise(__pyx_t_4, 0, 0); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 637; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L7; - } - __pyx_L7:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":639 - * raise TypeError("expected string or buffer") - * - * fixed_repl = NULL # <<<<<<<<<<<<<< - * cdef _re2.const_char_ptr s = cstring - * cdef _re2.const_char_ptr end = s + size - */ - __pyx_v_fixed_repl = NULL; - - /* "/Users/maxiak/pyre2/src/re2.pyx":640 - * - * fixed_repl = NULL - * cdef _re2.const_char_ptr s = cstring # <<<<<<<<<<<<<< - * cdef _re2.const_char_ptr end = s + size - * cdef int c = 0 - */ - __pyx_v_s = __pyx_v_cstring; - - /* "/Users/maxiak/pyre2/src/re2.pyx":641 - * fixed_repl = NULL - * cdef _re2.const_char_ptr s = cstring - * cdef _re2.const_char_ptr end = s + size # <<<<<<<<<<<<<< - * cdef int c = 0 - * while s < end: - */ - __pyx_v_end = (__pyx_v_s + __pyx_v_size); - - /* "/Users/maxiak/pyre2/src/re2.pyx":642 - * cdef _re2.const_char_ptr s = cstring - * cdef _re2.const_char_ptr end = s + size - * cdef int c = 0 # <<<<<<<<<<<<<< - * while s < end: - * c = s[0] - */ - __pyx_v_c = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":643 - * cdef _re2.const_char_ptr end = s + size - * cdef int c = 0 - * while s < end: # <<<<<<<<<<<<<< - * c = s[0] - * if (c == '\\'): - */ - while (1) { - __pyx_t_3 = (__pyx_v_s < __pyx_v_end); - if (!__pyx_t_3) break; - - /* "/Users/maxiak/pyre2/src/re2.pyx":644 - * cdef int c = 0 - * while s < end: - * c = s[0] # <<<<<<<<<<<<<< - * if (c == '\\'): - * s += 1 - */ - __pyx_v_c = (__pyx_v_s[0]); - - /* "/Users/maxiak/pyre2/src/re2.pyx":645 - * while s < end: - * c = s[0] - * if (c == '\\'): # <<<<<<<<<<<<<< - * s += 1 - * if s == end: - */ - __pyx_t_3 = (__pyx_v_c == '\\'); - if (__pyx_t_3) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":646 - * c = s[0] - * if (c == '\\'): - * s += 1 # <<<<<<<<<<<<<< - * if s == end: - * raise RegexError("Invalid rewrite pattern") - */ - __pyx_v_s += 1; - - /* "/Users/maxiak/pyre2/src/re2.pyx":647 - * if (c == '\\'): - * s += 1 - * if s == end: # <<<<<<<<<<<<<< - * raise RegexError("Invalid rewrite pattern") - * c = s[0] - */ - __pyx_t_3 = (__pyx_v_s == __pyx_v_end); - if (__pyx_t_3) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":648 - * s += 1 - * if s == end: - * raise RegexError("Invalid rewrite pattern") # <<<<<<<<<<<<<< - * c = s[0] - * if c == '\\' or (c >= '0' and c <= '9'): - */ - __pyx_t_4 = __Pyx_GetName(__pyx_m, __pyx_n_s__RegexError); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 648; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 648; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_11)); - PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_kp_s_11)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_11)); - __pyx_t_2 = PyObject_Call(__pyx_t_4, __pyx_t_1, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 648; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_Raise(__pyx_t_2, 0, 0); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 648; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L11; - } - __pyx_L11:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":649 - * if s == end: - * raise RegexError("Invalid rewrite pattern") - * c = s[0] # <<<<<<<<<<<<<< - * if c == '\\' or (c >= '0' and c <= '9'): - * if fixed_repl != NULL: - */ - __pyx_v_c = (__pyx_v_s[0]); - - /* "/Users/maxiak/pyre2/src/re2.pyx":650 - * raise RegexError("Invalid rewrite pattern") - * c = s[0] - * if c == '\\' or (c >= '0' and c <= '9'): # <<<<<<<<<<<<<< - * if fixed_repl != NULL: - * fixed_repl.push_back('\\') - */ - __pyx_t_3 = (__pyx_v_c == '\\'); - if (!__pyx_t_3) { - __pyx_t_5 = (__pyx_v_c >= '0'); - if (__pyx_t_5) { - __pyx_t_6 = (__pyx_v_c <= '9'); - __pyx_t_7 = __pyx_t_6; - } else { - __pyx_t_7 = __pyx_t_5; - } - __pyx_t_5 = __pyx_t_7; - } else { - __pyx_t_5 = __pyx_t_3; - } - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":651 - * c = s[0] - * if c == '\\' or (c >= '0' and c <= '9'): - * if fixed_repl != NULL: # <<<<<<<<<<<<<< - * fixed_repl.push_back('\\') - * fixed_repl.push_back(c) - */ - __pyx_t_5 = (__pyx_v_fixed_repl != NULL); - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":652 - * if c == '\\' or (c >= '0' and c <= '9'): - * if fixed_repl != NULL: - * fixed_repl.push_back('\\') # <<<<<<<<<<<<<< - * fixed_repl.push_back(c) - * else: - */ - __pyx_v_fixed_repl->push_back('\\'); - - /* "/Users/maxiak/pyre2/src/re2.pyx":653 - * if fixed_repl != NULL: - * fixed_repl.push_back('\\') - * fixed_repl.push_back(c) # <<<<<<<<<<<<<< - * else: - * if fixed_repl == NULL: - */ - __pyx_v_fixed_repl->push_back(__pyx_v_c); - goto __pyx_L13; - } - __pyx_L13:; - goto __pyx_L12; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":655 - * fixed_repl.push_back(c) - * else: - * if fixed_repl == NULL: # <<<<<<<<<<<<<< - * fixed_repl = new _re2.cpp_string(cstring, s - cstring - 1) - * if c == 'n': - */ - __pyx_t_5 = (__pyx_v_fixed_repl == NULL); - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":656 - * else: - * if fixed_repl == NULL: - * fixed_repl = new _re2.cpp_string(cstring, s - cstring - 1) # <<<<<<<<<<<<<< - * if c == 'n': - * fixed_repl.push_back('\n') - */ - __pyx_v_fixed_repl = new std::string(__pyx_v_cstring, ((__pyx_v_s - __pyx_v_cstring) - 1)); - goto __pyx_L14; - } - __pyx_L14:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":657 - * if fixed_repl == NULL: - * fixed_repl = new _re2.cpp_string(cstring, s - cstring - 1) - * if c == 'n': # <<<<<<<<<<<<<< - * fixed_repl.push_back('\n') - * else: - */ - __pyx_t_5 = (__pyx_v_c == 'n'); - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":658 - * fixed_repl = new _re2.cpp_string(cstring, s - cstring - 1) - * if c == 'n': - * fixed_repl.push_back('\n') # <<<<<<<<<<<<<< - * else: - * fixed_repl.push_back('\\') - */ - __pyx_v_fixed_repl->push_back('\n'); - goto __pyx_L15; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":660 - * fixed_repl.push_back('\n') - * else: - * fixed_repl.push_back('\\') # <<<<<<<<<<<<<< - * fixed_repl.push_back('\\') - * fixed_repl.push_back(c) - */ - __pyx_v_fixed_repl->push_back('\\'); - - /* "/Users/maxiak/pyre2/src/re2.pyx":661 - * else: - * fixed_repl.push_back('\\') - * fixed_repl.push_back('\\') # <<<<<<<<<<<<<< - * fixed_repl.push_back(c) - * else: - */ - __pyx_v_fixed_repl->push_back('\\'); - - /* "/Users/maxiak/pyre2/src/re2.pyx":662 - * fixed_repl.push_back('\\') - * fixed_repl.push_back('\\') - * fixed_repl.push_back(c) # <<<<<<<<<<<<<< - * else: - * if fixed_repl != NULL: - */ - __pyx_v_fixed_repl->push_back(__pyx_v_c); - } - __pyx_L15:; - } - __pyx_L12:; - goto __pyx_L10; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":664 - * fixed_repl.push_back(c) - * else: - * if fixed_repl != NULL: # <<<<<<<<<<<<<< - * fixed_repl.push_back(c) - * - */ - __pyx_t_5 = (__pyx_v_fixed_repl != NULL); - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":665 - * else: - * if fixed_repl != NULL: - * fixed_repl.push_back(c) # <<<<<<<<<<<<<< - * - * s += 1 - */ - __pyx_v_fixed_repl->push_back(__pyx_v_c); - goto __pyx_L16; - } - __pyx_L16:; - } - __pyx_L10:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":667 - * fixed_repl.push_back(c) - * - * s += 1 # <<<<<<<<<<<<<< - * if fixed_repl != NULL: - * sp = new _re2.StringPiece(fixed_repl.c_str()) - */ - __pyx_v_s += 1; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":668 - * - * s += 1 - * if fixed_repl != NULL: # <<<<<<<<<<<<<< - * sp = new _re2.StringPiece(fixed_repl.c_str()) - * else: - */ - __pyx_t_5 = (__pyx_v_fixed_repl != NULL); - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":669 - * s += 1 - * if fixed_repl != NULL: - * sp = new _re2.StringPiece(fixed_repl.c_str()) # <<<<<<<<<<<<<< - * else: - * sp = new _re2.StringPiece(cstring, size) - */ - __pyx_v_sp = new re2::StringPiece(__pyx_v_fixed_repl->c_str()); - goto __pyx_L17; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":671 - * sp = new _re2.StringPiece(fixed_repl.c_str()) - * else: - * sp = new _re2.StringPiece(cstring, size) # <<<<<<<<<<<<<< - * - * input_str = new _re2.cpp_string(string) - */ - __pyx_v_sp = new re2::StringPiece(__pyx_v_cstring, __pyx_v_size); - } - __pyx_L17:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":673 - * sp = new _re2.StringPiece(cstring, size) - * - * input_str = new _re2.cpp_string(string) # <<<<<<<<<<<<<< - * if not count: - * total_replacements = _re2.pattern_GlobalReplace(input_str, - */ - __pyx_t_8 = PyBytes_AsString(__pyx_v_string); if (unlikely((!__pyx_t_8) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 673; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_input_str = new std::string(__pyx_t_8); - - /* "/Users/maxiak/pyre2/src/re2.pyx":674 - * - * input_str = new _re2.cpp_string(string) - * if not count: # <<<<<<<<<<<<<< - * total_replacements = _re2.pattern_GlobalReplace(input_str, - * self.re_pattern[0], - */ - __pyx_t_5 = (!__pyx_v_count); - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":677 - * total_replacements = _re2.pattern_GlobalReplace(input_str, - * self.re_pattern[0], - * sp[0]) # <<<<<<<<<<<<<< - * elif count == 1: - * total_replacements = _re2.pattern_Replace(input_str, - */ - __pyx_t_2 = PyInt_FromLong(pattern_GlobalReplace(__pyx_v_input_str, (((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->re_pattern[0]), (__pyx_v_sp[0]))); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 675; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_v_total_replacements); - __pyx_v_total_replacements = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L18; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":678 - * self.re_pattern[0], - * sp[0]) - * elif count == 1: # <<<<<<<<<<<<<< - * total_replacements = _re2.pattern_Replace(input_str, - * self.re_pattern[0], - */ - __pyx_t_5 = (__pyx_v_count == 1); - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":681 - * total_replacements = _re2.pattern_Replace(input_str, - * self.re_pattern[0], - * sp[0]) # <<<<<<<<<<<<<< - * else: - * del fixed_repl - */ - __pyx_t_2 = PyInt_FromLong(pattern_Replace(__pyx_v_input_str, (((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->re_pattern[0]), (__pyx_v_sp[0]))); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 679; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_v_total_replacements); - __pyx_v_total_replacements = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L18; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":683 - * sp[0]) - * else: - * del fixed_repl # <<<<<<<<<<<<<< - * del input_str - * del sp - */ - delete __pyx_v_fixed_repl; - - /* "/Users/maxiak/pyre2/src/re2.pyx":684 - * else: - * del fixed_repl - * del input_str # <<<<<<<<<<<<<< - * del sp - * raise NotImplementedError("So far pyre2 does not support custom replacement counts") - */ - delete __pyx_v_input_str; - - /* "/Users/maxiak/pyre2/src/re2.pyx":685 - * del fixed_repl - * del input_str - * del sp # <<<<<<<<<<<<<< - * raise NotImplementedError("So far pyre2 does not support custom replacement counts") - * - */ - delete __pyx_v_sp; - - /* "/Users/maxiak/pyre2/src/re2.pyx":686 - * del input_str - * del sp - * raise NotImplementedError("So far pyre2 does not support custom replacement counts") # <<<<<<<<<<<<<< - * - * if string_encoded or (repl_encoded and total_replacements > 0): - */ - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 686; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_12)); - PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_12)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_12)); - __pyx_t_1 = PyObject_Call(__pyx_builtin_NotImplementedError, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 686; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_Raise(__pyx_t_1, 0, 0); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 686; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_L18:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":688 - * raise NotImplementedError("So far pyre2 does not support custom replacement counts") - * - * if string_encoded or (repl_encoded and total_replacements > 0): # <<<<<<<<<<<<<< - * result = cpp_to_utf8(input_str[0]) - * else: - */ - if (!__pyx_v_string_encoded) { - if (__pyx_v_repl_encoded) { - __pyx_t_1 = PyObject_RichCompare(__pyx_v_total_replacements, __pyx_int_0, Py_GT); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 688; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 688; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_3 = __pyx_t_5; - } else { - __pyx_t_3 = __pyx_v_repl_encoded; - } - __pyx_t_5 = __pyx_t_3; - } else { - __pyx_t_5 = __pyx_v_string_encoded; - } - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":689 - * - * if string_encoded or (repl_encoded and total_replacements > 0): - * result = cpp_to_utf8(input_str[0]) # <<<<<<<<<<<<<< - * else: - * result = cpp_to_pystring(input_str[0]) - */ - __pyx_t_1 = __pyx_f_3re2_cpp_to_utf8((__pyx_v_input_str[0])); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 689; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_v_result); - __pyx_v_result = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L19; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":691 - * result = cpp_to_utf8(input_str[0]) - * else: - * result = cpp_to_pystring(input_str[0]) # <<<<<<<<<<<<<< - * del fixed_repl - * del input_str - */ - __pyx_t_1 = __pyx_f_3re2_cpp_to_pystring((__pyx_v_input_str[0])); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 691; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_v_result); - __pyx_v_result = __pyx_t_1; - __pyx_t_1 = 0; - } - __pyx_L19:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":692 - * else: - * result = cpp_to_pystring(input_str[0]) - * del fixed_repl # <<<<<<<<<<<<<< - * del input_str - * del sp - */ - delete __pyx_v_fixed_repl; - - /* "/Users/maxiak/pyre2/src/re2.pyx":693 - * result = cpp_to_pystring(input_str[0]) - * del fixed_repl - * del input_str # <<<<<<<<<<<<<< - * del sp - * return (result, total_replacements) - */ - delete __pyx_v_input_str; - - /* "/Users/maxiak/pyre2/src/re2.pyx":694 - * del fixed_repl - * del input_str - * del sp # <<<<<<<<<<<<<< - * return (result, total_replacements) - * - */ - delete __pyx_v_sp; - - /* "/Users/maxiak/pyre2/src/re2.pyx":695 - * del input_str - * del sp - * return (result, total_replacements) # <<<<<<<<<<<<<< - * - * def _subn_callback(self, callback, string, int count=0): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 695; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(__pyx_v_result); - PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_result); - __Pyx_GIVEREF(__pyx_v_result); - __Pyx_INCREF(__pyx_v_total_replacements); - PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_v_total_replacements); - __Pyx_GIVEREF(__pyx_v_total_replacements); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_AddTraceback("re2.Pattern.subn"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_total_replacements); - __Pyx_DECREF(__pyx_v_result); - __Pyx_DECREF(__pyx_v_repl); - __Pyx_DECREF(__pyx_v_string); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":697 - * return (result, total_replacements) - * - * def _subn_callback(self, callback, string, int count=0): # <<<<<<<<<<<<<< - * """ - * This function is probably the hardest to implement correctly. - */ - -static PyObject *__pyx_pf_3re2_7Pattern__subn_callback(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static char __pyx_doc_3re2_7Pattern__subn_callback[] = "\n This function is probably the hardest to implement correctly.\n This is my first attempt, but if anybody has a better solution, please help out.\n "; -static PyObject *__pyx_pf_3re2_7Pattern__subn_callback(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_callback = 0; - PyObject *__pyx_v_string = 0; - int __pyx_v_count; - Py_ssize_t __pyx_v_size; - int __pyx_v_result; - int __pyx_v_endpos; - int __pyx_v_pos; - int __pyx_v_encoded; - int __pyx_v_num_repl; - char *__pyx_v_cstring; - re2::StringPiece *__pyx_v_sp; - struct __pyx_obj_3re2_Match *__pyx_v_m; - PyObject *__pyx_v_resultlist = 0; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - int __pyx_t_2; - PyObject *__pyx_t_3 = NULL; - int __pyx_t_4; - int __pyx_t_5; - PyObject *__pyx_t_6 = NULL; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__callback,&__pyx_n_s__string,&__pyx_n_s__count,0}; - __Pyx_RefNannySetupContext("_subn_callback"); - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[3] = {0,0,0}; - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__callback); - if (likely(values[0])) kw_args--; - else goto __pyx_L5_argtuple_error; - case 1: - values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__string); - if (likely(values[1])) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("_subn_callback", 0, 2, 3, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 697; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - case 2: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__count); - if (value) { values[2] = value; kw_args--; } - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "_subn_callback") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 697; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_callback = values[0]; - __pyx_v_string = values[1]; - if (values[2]) { - __pyx_v_count = __Pyx_PyInt_AsInt(values[2]); if (unlikely((__pyx_v_count == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 697; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_count = ((int)0); - } - } else { - __pyx_v_count = ((int)0); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: - __pyx_v_count = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 2)); if (unlikely((__pyx_v_count == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 697; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 2: - __pyx_v_string = PyTuple_GET_ITEM(__pyx_args, 1); - __pyx_v_callback = PyTuple_GET_ITEM(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("_subn_callback", 0, 2, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 697; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.Pattern._subn_callback"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __Pyx_INCREF(__pyx_v_string); - __pyx_v_m = ((struct __pyx_obj_3re2_Match *)Py_None); __Pyx_INCREF(Py_None); - - /* "/Users/maxiak/pyre2/src/re2.pyx":705 - * cdef int result - * cdef int endpos - * cdef int pos = 0 # <<<<<<<<<<<<<< - * cdef int encoded = 0 - * cdef int num_repl = 0 - */ - __pyx_v_pos = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":706 - * cdef int endpos - * cdef int pos = 0 - * cdef int encoded = 0 # <<<<<<<<<<<<<< - * cdef int num_repl = 0 - * cdef char * cstring - */ - __pyx_v_encoded = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":707 - * cdef int pos = 0 - * cdef int encoded = 0 - * cdef int num_repl = 0 # <<<<<<<<<<<<<< - * cdef char * cstring - * cdef _re2.StringPiece * sp - */ - __pyx_v_num_repl = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":711 - * cdef _re2.StringPiece * sp - * cdef Match m - * cdef list resultlist = [] # <<<<<<<<<<<<<< - * - * if count < 0: - */ - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 711; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - __pyx_v_resultlist = __pyx_t_1; - __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":713 - * cdef list resultlist = [] - * - * if count < 0: # <<<<<<<<<<<<<< - * count = 0 - * - */ - __pyx_t_2 = (__pyx_v_count < 0); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":714 - * - * if count < 0: - * count = 0 # <<<<<<<<<<<<<< - * - * string = unicode_to_bytestring(string, &encoded) - */ - __pyx_v_count = 0; - goto __pyx_L6; - } - __pyx_L6:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":716 - * count = 0 - * - * string = unicode_to_bytestring(string, &encoded) # <<<<<<<<<<<<<< - * if pystring_to_bytestring(string, &cstring, &size) == -1: - * raise TypeError("expected string or buffer") - */ - __pyx_t_1 = __pyx_f_3re2_unicode_to_bytestring(__pyx_v_string, (&__pyx_v_encoded)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 716; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_v_string); - __pyx_v_string = __pyx_t_1; - __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":717 - * - * string = unicode_to_bytestring(string, &encoded) - * if pystring_to_bytestring(string, &cstring, &size) == -1: # <<<<<<<<<<<<<< - * raise TypeError("expected string or buffer") - * encoded = encoded - */ - __pyx_t_2 = (__pyx_f_3re2_pystring_to_bytestring(__pyx_v_string, (&__pyx_v_cstring), (&__pyx_v_size)) == -1); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":718 - * string = unicode_to_bytestring(string, &encoded) - * if pystring_to_bytestring(string, &cstring, &size) == -1: - * raise TypeError("expected string or buffer") # <<<<<<<<<<<<<< - * encoded = encoded - * - */ - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 718; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_8)); - PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_kp_s_8)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_8)); - __pyx_t_3 = PyObject_Call(__pyx_builtin_TypeError, __pyx_t_1, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 718; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_Raise(__pyx_t_3, 0, 0); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 718; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L7; - } - __pyx_L7:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":719 - * if pystring_to_bytestring(string, &cstring, &size) == -1: - * raise TypeError("expected string or buffer") - * encoded = encoded # <<<<<<<<<<<<<< - * - * sp = new _re2.StringPiece(cstring, size) - */ - __pyx_v_encoded = ((int)__pyx_v_encoded); - - /* "/Users/maxiak/pyre2/src/re2.pyx":721 - * encoded = encoded - * - * sp = new _re2.StringPiece(cstring, size) # <<<<<<<<<<<<<< - * - * try: - */ - __pyx_v_sp = new re2::StringPiece(__pyx_v_cstring, __pyx_v_size); - - /* "/Users/maxiak/pyre2/src/re2.pyx":723 - * sp = new _re2.StringPiece(cstring, size) - * - * try: # <<<<<<<<<<<<<< - * while True: - * m = Match(self, self.ngroups + 1) - */ - /*try:*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":724 - * - * try: - * while True: # <<<<<<<<<<<<<< - * m = Match(self, self.ngroups + 1) - * with nogil: - */ - while (1) { - if (!1) break; - - /* "/Users/maxiak/pyre2/src/re2.pyx":725 - * try: - * while True: - * m = Match(self, self.ngroups + 1) # <<<<<<<<<<<<<< - * with nogil: - * result = self.re_pattern.Match(sp[0], pos, size, _re2.UNANCHORED, m.matches, self.ngroups + 1) - */ - __pyx_t_3 = PyInt_FromLong((((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->ngroups + 1)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 725; __pyx_clineno = __LINE__; goto __pyx_L9;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 725; __pyx_clineno = __LINE__; goto __pyx_L9;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(__pyx_v_self); - PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_self); - __Pyx_GIVEREF(__pyx_v_self); - PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_t_3); - __Pyx_GIVEREF(__pyx_t_3); - __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3re2_Match)), __pyx_t_1, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 725; __pyx_clineno = __LINE__; goto __pyx_L9;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(((PyObject *)__pyx_v_m)); - __pyx_v_m = ((struct __pyx_obj_3re2_Match *)__pyx_t_3); - __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":726 - * while True: - * m = Match(self, self.ngroups + 1) - * with nogil: # <<<<<<<<<<<<<< - * result = self.re_pattern.Match(sp[0], pos, size, _re2.UNANCHORED, m.matches, self.ngroups + 1) - * if result == 0: - */ - { PyThreadState *_save; - Py_UNBLOCK_THREADS - /*try:*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":727 - * m = Match(self, self.ngroups + 1) - * with nogil: - * result = self.re_pattern.Match(sp[0], pos, size, _re2.UNANCHORED, m.matches, self.ngroups + 1) # <<<<<<<<<<<<<< - * if result == 0: - * break - */ - __pyx_v_result = ((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->re_pattern->Match((__pyx_v_sp[0]), __pyx_v_pos, ((int)__pyx_v_size), RE2::UNANCHORED, __pyx_v_m->matches, (((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->ngroups + 1)); - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":726 - * while True: - * m = Match(self, self.ngroups + 1) - * with nogil: # <<<<<<<<<<<<<< - * result = self.re_pattern.Match(sp[0], pos, size, _re2.UNANCHORED, m.matches, self.ngroups + 1) - * if result == 0: - */ - /*finally:*/ { - Py_BLOCK_THREADS - } - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":728 - * with nogil: - * result = self.re_pattern.Match(sp[0], pos, size, _re2.UNANCHORED, m.matches, self.ngroups + 1) - * if result == 0: # <<<<<<<<<<<<<< - * break - * - */ - __pyx_t_2 = (__pyx_v_result == 0); - if (__pyx_t_2) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":729 - * result = self.re_pattern.Match(sp[0], pos, size, _re2.UNANCHORED, m.matches, self.ngroups + 1) - * if result == 0: - * break # <<<<<<<<<<<<<< - * - * endpos = m.matches[0].data() - cstring - */ - goto __pyx_L12_break; - goto __pyx_L18; - } - __pyx_L18:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":731 - * break - * - * endpos = m.matches[0].data() - cstring # <<<<<<<<<<<<<< - * if encoded: - * resultlist.append(char_to_utf8(&sp.data()[pos], endpos - pos)) - */ - __pyx_v_endpos = ((__pyx_v_m->matches[0]).data() - __pyx_v_cstring); - - /* "/Users/maxiak/pyre2/src/re2.pyx":732 - * - * endpos = m.matches[0].data() - cstring - * if encoded: # <<<<<<<<<<<<<< - * resultlist.append(char_to_utf8(&sp.data()[pos], endpos - pos)) - * else: - */ - if (__pyx_v_encoded) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":733 - * endpos = m.matches[0].data() - cstring - * if encoded: - * resultlist.append(char_to_utf8(&sp.data()[pos], endpos - pos)) # <<<<<<<<<<<<<< - * else: - * resultlist.append(sp.data()[pos:endpos]) - */ - if (unlikely(__pyx_v_resultlist == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 733; __pyx_clineno = __LINE__; goto __pyx_L9;} - } - __pyx_t_3 = __pyx_f_3re2_char_to_utf8((&(__pyx_v_sp->data()[__pyx_v_pos])), (__pyx_v_endpos - __pyx_v_pos)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 733; __pyx_clineno = __LINE__; goto __pyx_L9;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_resultlist), __pyx_t_3); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 733; __pyx_clineno = __LINE__; goto __pyx_L9;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - goto __pyx_L19; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":735 - * resultlist.append(char_to_utf8(&sp.data()[pos], endpos - pos)) - * else: - * resultlist.append(sp.data()[pos:endpos]) # <<<<<<<<<<<<<< - * pos = endpos + m.matches[0].length() - * - */ - if (unlikely(__pyx_v_resultlist == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 735; __pyx_clineno = __LINE__; goto __pyx_L9;} - } - __pyx_t_3 = PyBytes_FromStringAndSize(__pyx_v_sp->data() + __pyx_v_pos, __pyx_v_endpos - __pyx_v_pos); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 735; __pyx_clineno = __LINE__; goto __pyx_L9;} - __Pyx_GOTREF(((PyObject *)__pyx_t_3)); - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_resultlist), ((PyObject *)__pyx_t_3)); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 735; __pyx_clineno = __LINE__; goto __pyx_L9;} - __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; - } - __pyx_L19:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":736 - * else: - * resultlist.append(sp.data()[pos:endpos]) - * pos = endpos + m.matches[0].length() # <<<<<<<<<<<<<< - * - * m.encoded = encoded - */ - __pyx_v_pos = (__pyx_v_endpos + (__pyx_v_m->matches[0]).length()); - - /* "/Users/maxiak/pyre2/src/re2.pyx":738 - * pos = endpos + m.matches[0].length() - * - * m.encoded = encoded # <<<<<<<<<<<<<< - * m.named_groups = _re2.addressof(self.re_pattern.NamedCapturingGroups()) - * m.nmatches = self.ngroups + 1 - */ - __pyx_v_m->encoded = __pyx_v_encoded; - - /* "/Users/maxiak/pyre2/src/re2.pyx":739 - * - * m.encoded = encoded - * m.named_groups = _re2.addressof(self.re_pattern.NamedCapturingGroups()) # <<<<<<<<<<<<<< - * m.nmatches = self.ngroups + 1 - * m.match_string = string - */ - __pyx_v_m->named_groups = addressof(((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->re_pattern->NamedCapturingGroups()); - - /* "/Users/maxiak/pyre2/src/re2.pyx":740 - * m.encoded = encoded - * m.named_groups = _re2.addressof(self.re_pattern.NamedCapturingGroups()) - * m.nmatches = self.ngroups + 1 # <<<<<<<<<<<<<< - * m.match_string = string - * resultlist.append(callback(m) or '') - */ - __pyx_v_m->nmatches = (((struct __pyx_obj_3re2_Pattern *)__pyx_v_self)->ngroups + 1); - - /* "/Users/maxiak/pyre2/src/re2.pyx":741 - * m.named_groups = _re2.addressof(self.re_pattern.NamedCapturingGroups()) - * m.nmatches = self.ngroups + 1 - * m.match_string = string # <<<<<<<<<<<<<< - * resultlist.append(callback(m) or '') - * - */ - __Pyx_INCREF(__pyx_v_string); - __Pyx_GIVEREF(__pyx_v_string); - __Pyx_GOTREF(__pyx_v_m->match_string); - __Pyx_DECREF(__pyx_v_m->match_string); - __pyx_v_m->match_string = __pyx_v_string; - - /* "/Users/maxiak/pyre2/src/re2.pyx":742 - * m.nmatches = self.ngroups + 1 - * m.match_string = string - * resultlist.append(callback(m) or '') # <<<<<<<<<<<<<< - * - * num_repl += 1 - */ - if (unlikely(__pyx_v_resultlist == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 742; __pyx_clineno = __LINE__; goto __pyx_L9;} - } - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 742; __pyx_clineno = __LINE__; goto __pyx_L9;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_INCREF(((PyObject *)__pyx_v_m)); - PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_v_m)); - __Pyx_GIVEREF(((PyObject *)__pyx_v_m)); - __pyx_t_1 = PyObject_Call(__pyx_v_callback, __pyx_t_3, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 742; __pyx_clineno = __LINE__; goto __pyx_L9;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 742; __pyx_clineno = __LINE__; goto __pyx_L9;} - if (!__pyx_t_2) { - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_INCREF(((PyObject *)__pyx_kp_s_7)); - __pyx_t_3 = __pyx_kp_s_7; - } else { - __pyx_t_3 = __pyx_t_1; - __pyx_t_1 = 0; - } - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_resultlist), __pyx_t_3); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 742; __pyx_clineno = __LINE__; goto __pyx_L9;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":744 - * resultlist.append(callback(m) or '') - * - * num_repl += 1 # <<<<<<<<<<<<<< - * if count and num_repl >= count: - * break - */ - __pyx_v_num_repl += 1; - - /* "/Users/maxiak/pyre2/src/re2.pyx":745 - * - * num_repl += 1 - * if count and num_repl >= count: # <<<<<<<<<<<<<< - * break - * - */ - if (__pyx_v_count) { - __pyx_t_2 = (__pyx_v_num_repl >= __pyx_v_count); - __pyx_t_5 = __pyx_t_2; - } else { - __pyx_t_5 = __pyx_v_count; - } - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":746 - * num_repl += 1 - * if count and num_repl >= count: - * break # <<<<<<<<<<<<<< - * - * if encoded: - */ - goto __pyx_L12_break; - goto __pyx_L20; - } - __pyx_L20:; - } - __pyx_L12_break:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":748 - * break - * - * if encoded: # <<<<<<<<<<<<<< - * resultlist.append(char_to_utf8(&sp.data()[pos], sp.length() - pos)) - * return (u''.join(resultlist), num_repl) - */ - if (__pyx_v_encoded) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":749 - * - * if encoded: - * resultlist.append(char_to_utf8(&sp.data()[pos], sp.length() - pos)) # <<<<<<<<<<<<<< - * return (u''.join(resultlist), num_repl) - * else: - */ - if (unlikely(__pyx_v_resultlist == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 749; __pyx_clineno = __LINE__; goto __pyx_L9;} - } - __pyx_t_3 = __pyx_f_3re2_char_to_utf8((&(__pyx_v_sp->data()[__pyx_v_pos])), (__pyx_v_sp->length() - __pyx_v_pos)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 749; __pyx_clineno = __LINE__; goto __pyx_L9;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_resultlist), __pyx_t_3); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 749; __pyx_clineno = __LINE__; goto __pyx_L9;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":750 - * if encoded: - * resultlist.append(char_to_utf8(&sp.data()[pos], sp.length() - pos)) - * return (u''.join(resultlist), num_repl) # <<<<<<<<<<<<<< - * else: - * resultlist.append(sp.data()[pos:]) - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_3 = ((PyObject *)PyUnicode_Join(__pyx_kp_u_7, ((PyObject *)__pyx_v_resultlist))); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 750; __pyx_clineno = __LINE__; goto __pyx_L9;} - __Pyx_GOTREF(((PyObject *)__pyx_t_3)); - __pyx_t_1 = PyInt_FromLong(__pyx_v_num_repl); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 750; __pyx_clineno = __LINE__; goto __pyx_L9;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = PyTuple_New(2); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 750; __pyx_clineno = __LINE__; goto __pyx_L9;} - __Pyx_GOTREF(__pyx_t_6); - PyTuple_SET_ITEM(__pyx_t_6, 0, ((PyObject *)__pyx_t_3)); - __Pyx_GIVEREF(((PyObject *)__pyx_t_3)); - PyTuple_SET_ITEM(__pyx_t_6, 1, __pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - __pyx_t_3 = 0; - __pyx_t_1 = 0; - __pyx_r = __pyx_t_6; - __pyx_t_6 = 0; - goto __pyx_L8; - goto __pyx_L21; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":752 - * return (u''.join(resultlist), num_repl) - * else: - * resultlist.append(sp.data()[pos:]) # <<<<<<<<<<<<<< - * return (''.join(resultlist), num_repl) - * finally: - */ - if (unlikely(__pyx_v_resultlist == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 752; __pyx_clineno = __LINE__; goto __pyx_L9;} - } - __pyx_t_6 = PyBytes_FromString(__pyx_v_sp->data() + __pyx_v_pos); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 752; __pyx_clineno = __LINE__; goto __pyx_L9;} - __Pyx_GOTREF(((PyObject *)__pyx_t_6)); - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_resultlist), ((PyObject *)__pyx_t_6)); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 752; __pyx_clineno = __LINE__; goto __pyx_L9;} - __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":753 - * else: - * resultlist.append(sp.data()[pos:]) - * return (''.join(resultlist), num_repl) # <<<<<<<<<<<<<< - * finally: - * del sp - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_6 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_7), __pyx_n_s__join); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 753; __pyx_clineno = __LINE__; goto __pyx_L9;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 753; __pyx_clineno = __LINE__; goto __pyx_L9;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(((PyObject *)__pyx_v_resultlist)); - PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_v_resultlist)); - __Pyx_GIVEREF(((PyObject *)__pyx_v_resultlist)); - __pyx_t_3 = PyObject_Call(__pyx_t_6, __pyx_t_1, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 753; __pyx_clineno = __LINE__; goto __pyx_L9;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyInt_FromLong(__pyx_v_num_repl); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 753; __pyx_clineno = __LINE__; goto __pyx_L9;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = PyTuple_New(2); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 753; __pyx_clineno = __LINE__; goto __pyx_L9;} - __Pyx_GOTREF(__pyx_t_6); - PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_3); - __Pyx_GIVEREF(__pyx_t_3); - PyTuple_SET_ITEM(__pyx_t_6, 1, __pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - __pyx_t_3 = 0; - __pyx_t_1 = 0; - __pyx_r = __pyx_t_6; - __pyx_t_6 = 0; - goto __pyx_L8; - } - __pyx_L21:; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":755 - * return (''.join(resultlist), num_repl) - * finally: - * del sp # <<<<<<<<<<<<<< - * - * _cache = {} - */ - /*finally:*/ { - int __pyx_why; - PyObject *__pyx_exc_type, *__pyx_exc_value, *__pyx_exc_tb; - int __pyx_exc_lineno; - __pyx_exc_type = 0; __pyx_exc_value = 0; __pyx_exc_tb = 0; __pyx_exc_lineno = 0; - __pyx_why = 0; goto __pyx_L10; - __pyx_L8: __pyx_exc_type = 0; __pyx_exc_value = 0; __pyx_exc_tb = 0; __pyx_exc_lineno = 0; - __pyx_why = 3; goto __pyx_L10; - __pyx_L9: { - __pyx_why = 4; - __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; - __Pyx_ErrFetch(&__pyx_exc_type, &__pyx_exc_value, &__pyx_exc_tb); - __pyx_exc_lineno = __pyx_lineno; - goto __pyx_L10; - } - __pyx_L10:; - delete __pyx_v_sp; - switch (__pyx_why) { - case 3: goto __pyx_L0; - case 4: { - __Pyx_ErrRestore(__pyx_exc_type, __pyx_exc_value, __pyx_exc_tb); - __pyx_lineno = __pyx_exc_lineno; - __pyx_exc_type = 0; - __pyx_exc_value = 0; - __pyx_exc_tb = 0; - goto __pyx_L1_error; - } - } - } - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_AddTraceback("re2.Pattern._subn_callback"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_DECREF((PyObject *)__pyx_v_m); - __Pyx_XDECREF(__pyx_v_resultlist); - __Pyx_DECREF(__pyx_v_string); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":762 - * _MAXCACHE = 100 - * - * def compile(pattern, int flags=0, int max_mem=8388608): # <<<<<<<<<<<<<< - * cachekey = (type(pattern),) + (pattern, flags) - * p = _cache.get(cachekey) - */ - -static PyObject *__pyx_pf_3re2_compile(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static PyObject *__pyx_pf_3re2_compile(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_pattern = 0; - int __pyx_v_flags; - int __pyx_v_max_mem; - PyObject *__pyx_v_cachekey; - PyObject *__pyx_v_p; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - int __pyx_t_4; - PyObject *__pyx_t_5 = NULL; - Py_ssize_t __pyx_t_6; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__pattern,&__pyx_n_s__flags,&__pyx_n_s__max_mem,0}; - __Pyx_RefNannySetupContext("compile"); - __pyx_self = __pyx_self; - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[3] = {0,0,0}; - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__pattern); - if (likely(values[0])) kw_args--; - else goto __pyx_L5_argtuple_error; - case 1: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__flags); - if (value) { values[1] = value; kw_args--; } - } - case 2: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__max_mem); - if (value) { values[2] = value; kw_args--; } - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "compile") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 762; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_pattern = values[0]; - if (values[1]) { - __pyx_v_flags = __Pyx_PyInt_AsInt(values[1]); if (unlikely((__pyx_v_flags == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 762; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_flags = ((int)0); - } - if (values[2]) { - __pyx_v_max_mem = __Pyx_PyInt_AsInt(values[2]); if (unlikely((__pyx_v_max_mem == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 762; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_max_mem = ((int)8388608); - } - } else { - __pyx_v_flags = ((int)0); - __pyx_v_max_mem = ((int)8388608); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: __pyx_v_max_mem = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 2)); if (unlikely((__pyx_v_max_mem == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 762; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 2: __pyx_v_flags = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 1)); if (unlikely((__pyx_v_flags == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 762; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 1: __pyx_v_pattern = PyTuple_GET_ITEM(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("compile", 0, 1, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 762; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.compile"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_v_cachekey = ((PyObject *)Py_None); __Pyx_INCREF(Py_None); - __pyx_v_p = Py_None; __Pyx_INCREF(Py_None); - - /* "/Users/maxiak/pyre2/src/re2.pyx":763 - * - * def compile(pattern, int flags=0, int max_mem=8388608): - * cachekey = (type(pattern),) + (pattern, flags) # <<<<<<<<<<<<<< - * p = _cache.get(cachekey) - * if p is not None: - */ - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 763; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(((PyObject *)Py_TYPE(__pyx_v_pattern))); - PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)Py_TYPE(__pyx_v_pattern))); - __Pyx_GIVEREF(((PyObject *)Py_TYPE(__pyx_v_pattern))); - __pyx_t_2 = PyInt_FromLong(__pyx_v_flags); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 763; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 763; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_INCREF(__pyx_v_pattern); - PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_v_pattern); - __Pyx_GIVEREF(__pyx_v_pattern); - PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_2); - __Pyx_GIVEREF(__pyx_t_2); - __pyx_t_2 = 0; - __pyx_t_2 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 763; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(PyTuple_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected tuple, got %.200s", Py_TYPE(__pyx_t_2)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 763; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(((PyObject *)__pyx_v_cachekey)); - __pyx_v_cachekey = ((PyObject *)__pyx_t_2); - __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":764 - * def compile(pattern, int flags=0, int max_mem=8388608): - * cachekey = (type(pattern),) + (pattern, flags) - * p = _cache.get(cachekey) # <<<<<<<<<<<<<< - * if p is not None: - * return p - */ - __pyx_t_2 = __Pyx_GetName(__pyx_m, __pyx_n_s___cache); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 764; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__get); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 764; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 764; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(((PyObject *)__pyx_v_cachekey)); - PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_v_cachekey)); - __Pyx_GIVEREF(((PyObject *)__pyx_v_cachekey)); - __pyx_t_1 = PyObject_Call(__pyx_t_3, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 764; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_v_p); - __pyx_v_p = __pyx_t_1; - __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":765 - * cachekey = (type(pattern),) + (pattern, flags) - * p = _cache.get(cachekey) - * if p is not None: # <<<<<<<<<<<<<< - * return p - * p = _compile(pattern, flags, max_mem) - */ - __pyx_t_4 = (__pyx_v_p != Py_None); - if (__pyx_t_4) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":766 - * p = _cache.get(cachekey) - * if p is not None: - * return p # <<<<<<<<<<<<<< - * p = _compile(pattern, flags, max_mem) - * - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_p); - __pyx_r = __pyx_v_p; - goto __pyx_L0; - goto __pyx_L6; - } - __pyx_L6:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":767 - * if p is not None: - * return p - * p = _compile(pattern, flags, max_mem) # <<<<<<<<<<<<<< - * - * if len(_cache) >= _MAXCACHE: - */ - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s___compile); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 767; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyInt_FromLong(__pyx_v_flags); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 767; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyInt_FromLong(__pyx_v_max_mem); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 767; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = PyTuple_New(3); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 767; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_INCREF(__pyx_v_pattern); - PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_v_pattern); - __Pyx_GIVEREF(__pyx_v_pattern); - PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_2); - __Pyx_GIVEREF(__pyx_t_2); - PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_t_3); - __Pyx_GIVEREF(__pyx_t_3); - __pyx_t_2 = 0; - __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(__pyx_t_1, __pyx_t_5, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 767; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __Pyx_DECREF(__pyx_v_p); - __pyx_v_p = __pyx_t_3; - __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":769 - * p = _compile(pattern, flags, max_mem) - * - * if len(_cache) >= _MAXCACHE: # <<<<<<<<<<<<<< - * _cache.clear() - * _cache[cachekey] = p - */ - __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s___cache); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 769; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 769; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyInt_FromSsize_t(__pyx_t_6); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 769; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = __Pyx_GetName(__pyx_m, __pyx_n_s___MAXCACHE); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 769; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_1 = PyObject_RichCompare(__pyx_t_3, __pyx_t_5, Py_GE); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 769; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 769; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (__pyx_t_4) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":770 - * - * if len(_cache) >= _MAXCACHE: - * _cache.clear() # <<<<<<<<<<<<<< - * _cache[cachekey] = p - * return p - */ - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s___cache); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 770; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__clear); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 770; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_Call(__pyx_t_5, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 770; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - goto __pyx_L7; - } - __pyx_L7:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":771 - * if len(_cache) >= _MAXCACHE: - * _cache.clear() - * _cache[cachekey] = p # <<<<<<<<<<<<<< - * return p - * - */ - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s___cache); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 771; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (PyObject_SetItem(__pyx_t_1, ((PyObject *)__pyx_v_cachekey), __pyx_v_p) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 771; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":772 - * _cache.clear() - * _cache[cachekey] = p - * return p # <<<<<<<<<<<<<< - * - * class BackreferencesException(Exception): - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_p); - __pyx_r = __pyx_v_p; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_5); - __Pyx_AddTraceback("re2.compile"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_DECREF(__pyx_v_cachekey); - __Pyx_DECREF(__pyx_v_p); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":783 - * - * class Tokenizer: - * def __init__(self, string): # <<<<<<<<<<<<<< - * self.string = string - * self.index = 0 - */ - -static PyObject *__pyx_pf_3re2_9Tokenizer___init__(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static PyMethodDef __pyx_mdef_3re2_9Tokenizer___init__ = {__Pyx_NAMESTR("__init__"), (PyCFunction)__pyx_pf_3re2_9Tokenizer___init__, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}; -static PyObject *__pyx_pf_3re2_9Tokenizer___init__(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_self = 0; - PyObject *__pyx_v_string = 0; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__self,&__pyx_n_s__string,0}; - __Pyx_RefNannySetupContext("__init__"); - __pyx_self = __pyx_self; - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[2] = {0,0}; - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__self); - if (likely(values[0])) kw_args--; - else goto __pyx_L5_argtuple_error; - case 1: - values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__string); - if (likely(values[1])) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("__init__", 1, 2, 2, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 783; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "__init__") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 783; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_self = values[0]; - __pyx_v_string = values[1]; - } else if (PyTuple_GET_SIZE(__pyx_args) != 2) { - goto __pyx_L5_argtuple_error; - } else { - __pyx_v_self = PyTuple_GET_ITEM(__pyx_args, 0); - __pyx_v_string = PyTuple_GET_ITEM(__pyx_args, 1); - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__init__", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 783; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.Tokenizer.__init__"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":784 - * class Tokenizer: - * def __init__(self, string): - * self.string = string # <<<<<<<<<<<<<< - * self.index = 0 - * self.__next() - */ - if (PyObject_SetAttr(__pyx_v_self, __pyx_n_s__string, __pyx_v_string) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 784; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - - /* "/Users/maxiak/pyre2/src/re2.pyx":785 - * def __init__(self, string): - * self.string = string - * self.index = 0 # <<<<<<<<<<<<<< - * self.__next() - * def __next(self): - */ - if (PyObject_SetAttr(__pyx_v_self, __pyx_n_s__index, __pyx_int_0) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 785; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - - /* "/Users/maxiak/pyre2/src/re2.pyx":786 - * self.string = string - * self.index = 0 - * self.__next() # <<<<<<<<<<<<<< - * def __next(self): - * if self.index >= len(self.string): - */ - __pyx_t_1 = PyObject_GetAttr(__pyx_v_self, __pyx_n_s____next); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 786; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 786; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("re2.Tokenizer.__init__"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":787 - * self.index = 0 - * self.__next() - * def __next(self): # <<<<<<<<<<<<<< - * if self.index >= len(self.string): - * self.next = None - */ - -static PyObject *__pyx_pf_3re2_9Tokenizer___next(PyObject *__pyx_self, PyObject *__pyx_v_self); /*proto*/ -static PyMethodDef __pyx_mdef_3re2_9Tokenizer___next = {__Pyx_NAMESTR("__next"), (PyCFunction)__pyx_pf_3re2_9Tokenizer___next, METH_O, __Pyx_DOCSTR(0)}; -static PyObject *__pyx_pf_3re2_9Tokenizer___next(PyObject *__pyx_self, PyObject *__pyx_v_self) { - PyObject *__pyx_v_ch; - PyObject *__pyx_v_c; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - Py_ssize_t __pyx_t_3; - PyObject *__pyx_t_4 = NULL; - int __pyx_t_5; - int __pyx_t_6; - PyObject *__pyx_t_7 = NULL; - __Pyx_RefNannySetupContext("__next"); - __pyx_self = __pyx_self; - __pyx_v_ch = Py_None; __Pyx_INCREF(Py_None); - __pyx_v_c = Py_None; __Pyx_INCREF(Py_None); - - /* "/Users/maxiak/pyre2/src/re2.pyx":788 - * self.__next() - * def __next(self): - * if self.index >= len(self.string): # <<<<<<<<<<<<<< - * self.next = None - * return - */ - __pyx_t_1 = PyObject_GetAttr(__pyx_v_self, __pyx_n_s__index); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 788; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_GetAttr(__pyx_v_self, __pyx_n_s__string); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 788; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyObject_Length(__pyx_t_2); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 788; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyInt_FromSsize_t(__pyx_t_3); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 788; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_4 = PyObject_RichCompare(__pyx_t_1, __pyx_t_2, Py_GE); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 788; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 788; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":789 - * def __next(self): - * if self.index >= len(self.string): - * self.next = None # <<<<<<<<<<<<<< - * return - * ch = self.string[self.index] - */ - if (PyObject_SetAttr(__pyx_v_self, __pyx_n_s__next, Py_None) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 789; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - - /* "/Users/maxiak/pyre2/src/re2.pyx":790 - * if self.index >= len(self.string): - * self.next = None - * return # <<<<<<<<<<<<<< - * ch = self.string[self.index] - * if ch[0] == "\\": - */ - __Pyx_XDECREF(__pyx_r); - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - goto __pyx_L5; - } - __pyx_L5:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":791 - * self.next = None - * return - * ch = self.string[self.index] # <<<<<<<<<<<<<< - * if ch[0] == "\\": - * try: - */ - __pyx_t_4 = PyObject_GetAttr(__pyx_v_self, __pyx_n_s__string); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 791; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_2 = PyObject_GetAttr(__pyx_v_self, __pyx_n_s__index); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 791; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = PyObject_GetItem(__pyx_t_4, __pyx_t_2); if (!__pyx_t_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 791; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_v_ch); - __pyx_v_ch = __pyx_t_1; - __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":792 - * return - * ch = self.string[self.index] - * if ch[0] == "\\": # <<<<<<<<<<<<<< - * try: - * c = self.string[self.index + 1] - */ - __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_ch, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 792; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_RichCompare(__pyx_t_1, ((PyObject *)__pyx_kp_s_3), Py_EQ); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 792; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 792; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":793 - * ch = self.string[self.index] - * if ch[0] == "\\": - * try: # <<<<<<<<<<<<<< - * c = self.string[self.index + 1] - * except IndexError: - */ - { - PyObject *__pyx_save_exc_type, *__pyx_save_exc_value, *__pyx_save_exc_tb; - __Pyx_ExceptionSave(&__pyx_save_exc_type, &__pyx_save_exc_value, &__pyx_save_exc_tb); - __Pyx_XGOTREF(__pyx_save_exc_type); - __Pyx_XGOTREF(__pyx_save_exc_value); - __Pyx_XGOTREF(__pyx_save_exc_tb); - /*try:*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":794 - * if ch[0] == "\\": - * try: - * c = self.string[self.index + 1] # <<<<<<<<<<<<<< - * except IndexError: - * raise RegexError, "bogus escape (end of line)" - */ - __pyx_t_2 = PyObject_GetAttr(__pyx_v_self, __pyx_n_s__string); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 794; __pyx_clineno = __LINE__; goto __pyx_L7_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = PyObject_GetAttr(__pyx_v_self, __pyx_n_s__index); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 794; __pyx_clineno = __LINE__; goto __pyx_L7_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyNumber_Add(__pyx_t_1, __pyx_int_1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 794; __pyx_clineno = __LINE__; goto __pyx_L7_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_GetItem(__pyx_t_2, __pyx_t_4); if (!__pyx_t_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 794; __pyx_clineno = __LINE__; goto __pyx_L7_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_DECREF(__pyx_v_c); - __pyx_v_c = __pyx_t_1; - __pyx_t_1 = 0; - } - __Pyx_XDECREF(__pyx_save_exc_type); __pyx_save_exc_type = 0; - __Pyx_XDECREF(__pyx_save_exc_value); __pyx_save_exc_value = 0; - __Pyx_XDECREF(__pyx_save_exc_tb); __pyx_save_exc_tb = 0; - goto __pyx_L14_try_end; - __pyx_L7_error:; - __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":795 - * try: - * c = self.string[self.index + 1] - * except IndexError: # <<<<<<<<<<<<<< - * raise RegexError, "bogus escape (end of line)" - * ch = ch + c - */ - __pyx_t_6 = PyErr_ExceptionMatches(__pyx_builtin_IndexError); - if (__pyx_t_6) { - __Pyx_AddTraceback("re2.Tokenizer.__next"); - if (__Pyx_GetException(&__pyx_t_1, &__pyx_t_4, &__pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 795; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_GOTREF(__pyx_t_4); - __Pyx_GOTREF(__pyx_t_2); - - /* "/Users/maxiak/pyre2/src/re2.pyx":796 - * c = self.string[self.index + 1] - * except IndexError: - * raise RegexError, "bogus escape (end of line)" # <<<<<<<<<<<<<< - * ch = ch + c - * self.index = self.index + len(ch) - */ - __pyx_t_7 = __Pyx_GetName(__pyx_m, __pyx_n_s__RegexError); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 796; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_Raise(__pyx_t_7, ((PyObject *)__pyx_kp_s_13), 0); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 796; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - goto __pyx_L8_exception_handled; - } - __pyx_L9_except_error:; - __Pyx_XGIVEREF(__pyx_save_exc_type); - __Pyx_XGIVEREF(__pyx_save_exc_value); - __Pyx_XGIVEREF(__pyx_save_exc_tb); - __Pyx_ExceptionReset(__pyx_save_exc_type, __pyx_save_exc_value, __pyx_save_exc_tb); - goto __pyx_L1_error; - __pyx_L8_exception_handled:; - __Pyx_XGIVEREF(__pyx_save_exc_type); - __Pyx_XGIVEREF(__pyx_save_exc_value); - __Pyx_XGIVEREF(__pyx_save_exc_tb); - __Pyx_ExceptionReset(__pyx_save_exc_type, __pyx_save_exc_value, __pyx_save_exc_tb); - __pyx_L14_try_end:; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":797 - * except IndexError: - * raise RegexError, "bogus escape (end of line)" - * ch = ch + c # <<<<<<<<<<<<<< - * self.index = self.index + len(ch) - * self.next = ch - */ - __pyx_t_2 = PyNumber_Add(__pyx_v_ch, __pyx_v_c); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 797; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_v_ch); - __pyx_v_ch = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L6; - } - __pyx_L6:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":798 - * raise RegexError, "bogus escape (end of line)" - * ch = ch + c - * self.index = self.index + len(ch) # <<<<<<<<<<<<<< - * self.next = ch - * def get(self): - */ - __pyx_t_2 = PyObject_GetAttr(__pyx_v_self, __pyx_n_s__index); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 798; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyObject_Length(__pyx_v_ch); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 798; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_4 = PyInt_FromSsize_t(__pyx_t_3); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 798; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_1 = PyNumber_Add(__pyx_t_2, __pyx_t_4); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 798; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - if (PyObject_SetAttr(__pyx_v_self, __pyx_n_s__index, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 798; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":799 - * ch = ch + c - * self.index = self.index + len(ch) - * self.next = ch # <<<<<<<<<<<<<< - * def get(self): - * this = self.next - */ - if (PyObject_SetAttr(__pyx_v_self, __pyx_n_s__next, __pyx_v_ch) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 799; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_AddTraceback("re2.Tokenizer.__next"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_DECREF(__pyx_v_ch); - __Pyx_DECREF(__pyx_v_c); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":800 - * self.index = self.index + len(ch) - * self.next = ch - * def get(self): # <<<<<<<<<<<<<< - * this = self.next - * self.__next() - */ - -static PyObject *__pyx_pf_3re2_9Tokenizer_get(PyObject *__pyx_self, PyObject *__pyx_v_self); /*proto*/ -static PyMethodDef __pyx_mdef_3re2_9Tokenizer_get = {__Pyx_NAMESTR("get"), (PyCFunction)__pyx_pf_3re2_9Tokenizer_get, METH_O, __Pyx_DOCSTR(0)}; -static PyObject *__pyx_pf_3re2_9Tokenizer_get(PyObject *__pyx_self, PyObject *__pyx_v_self) { - PyObject *__pyx_v_this; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - __Pyx_RefNannySetupContext("get"); - __pyx_self = __pyx_self; - __pyx_v_this = Py_None; __Pyx_INCREF(Py_None); - - /* "/Users/maxiak/pyre2/src/re2.pyx":801 - * self.next = ch - * def get(self): - * this = self.next # <<<<<<<<<<<<<< - * self.__next() - * return this - */ - __pyx_t_1 = PyObject_GetAttr(__pyx_v_self, __pyx_n_s__next); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 801; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_v_this); - __pyx_v_this = __pyx_t_1; - __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":802 - * def get(self): - * this = self.next - * self.__next() # <<<<<<<<<<<<<< - * return this - * - */ - __pyx_t_1 = PyObject_GetAttr(__pyx_v_self, __pyx_n_s____next); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":803 - * this = self.next - * self.__next() - * return this # <<<<<<<<<<<<<< - * - * def prepare_pattern(pattern, int flags): - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_this); - __pyx_r = __pyx_v_this; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("re2.Tokenizer.get"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_DECREF(__pyx_v_this); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":805 - * return this - * - * def prepare_pattern(pattern, int flags): # <<<<<<<<<<<<<< - * source = Tokenizer(pattern) - * new_pattern = [] - */ - -static PyObject *__pyx_pf_3re2_prepare_pattern(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static PyObject *__pyx_pf_3re2_prepare_pattern(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_pattern = 0; - int __pyx_v_flags; - PyObject *__pyx_v_source; - PyObject *__pyx_v_new_pattern; - PyObject *__pyx_v_strflags = 0; - PyObject *__pyx_v_this; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - int __pyx_t_4; - int __pyx_t_5; - int __pyx_t_6; - int __pyx_t_7; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__pattern,&__pyx_n_s__flags,0}; - __Pyx_RefNannySetupContext("prepare_pattern"); - __pyx_self = __pyx_self; - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[2] = {0,0}; - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__pattern); - if (likely(values[0])) kw_args--; - else goto __pyx_L5_argtuple_error; - case 1: - values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__flags); - if (likely(values[1])) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("prepare_pattern", 1, 2, 2, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 805; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "prepare_pattern") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 805; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_pattern = values[0]; - __pyx_v_flags = __Pyx_PyInt_AsInt(values[1]); if (unlikely((__pyx_v_flags == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 805; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else if (PyTuple_GET_SIZE(__pyx_args) != 2) { - goto __pyx_L5_argtuple_error; - } else { - __pyx_v_pattern = PyTuple_GET_ITEM(__pyx_args, 0); - __pyx_v_flags = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 1)); if (unlikely((__pyx_v_flags == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 805; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("prepare_pattern", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 805; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.prepare_pattern"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_v_source = Py_None; __Pyx_INCREF(Py_None); - __pyx_v_new_pattern = ((PyObject *)Py_None); __Pyx_INCREF(Py_None); - __pyx_v_this = Py_None; __Pyx_INCREF(Py_None); - - /* "/Users/maxiak/pyre2/src/re2.pyx":806 - * - * def prepare_pattern(pattern, int flags): - * source = Tokenizer(pattern) # <<<<<<<<<<<<<< - * new_pattern = [] - * - */ - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__Tokenizer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 806; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 806; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(__pyx_v_pattern); - PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_pattern); - __Pyx_GIVEREF(__pyx_v_pattern); - __pyx_t_3 = PyObject_Call(__pyx_t_1, __pyx_t_2, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 806; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_v_source); - __pyx_v_source = __pyx_t_3; - __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":807 - * def prepare_pattern(pattern, int flags): - * source = Tokenizer(pattern) - * new_pattern = [] # <<<<<<<<<<<<<< - * - * cdef str strflags = '' - */ - __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 807; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_3)); - __Pyx_DECREF(((PyObject *)__pyx_v_new_pattern)); - __pyx_v_new_pattern = __pyx_t_3; - __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":809 - * new_pattern = [] - * - * cdef str strflags = '' # <<<<<<<<<<<<<< - * if flags & _S: - * strflags += 's' - */ - __Pyx_INCREF(((PyObject *)__pyx_kp_s_7)); - __pyx_v_strflags = __pyx_kp_s_7; - - /* "/Users/maxiak/pyre2/src/re2.pyx":810 - * - * cdef str strflags = '' - * if flags & _S: # <<<<<<<<<<<<<< - * strflags += 's' - * if flags & _M: - */ - __pyx_t_4 = (__pyx_v_flags & __pyx_v_3re2__S); - if (__pyx_t_4) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":811 - * cdef str strflags = '' - * if flags & _S: - * strflags += 's' # <<<<<<<<<<<<<< - * if flags & _M: - * strflags += 'm' - */ - __pyx_t_3 = PyNumber_InPlaceAdd(((PyObject *)__pyx_v_strflags), ((PyObject *)__pyx_n_s__s)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 811; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - if (!(likely(PyString_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected str, got %.200s", Py_TYPE(__pyx_t_3)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 811; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(((PyObject *)__pyx_v_strflags)); - __pyx_v_strflags = ((PyObject *)__pyx_t_3); - __pyx_t_3 = 0; - goto __pyx_L6; - } - __pyx_L6:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":812 - * if flags & _S: - * strflags += 's' - * if flags & _M: # <<<<<<<<<<<<<< - * strflags += 'm' - * - */ - __pyx_t_4 = (__pyx_v_flags & __pyx_v_3re2__M); - if (__pyx_t_4) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":813 - * strflags += 's' - * if flags & _M: - * strflags += 'm' # <<<<<<<<<<<<<< - * - * if strflags: - */ - __pyx_t_3 = PyNumber_InPlaceAdd(((PyObject *)__pyx_v_strflags), ((PyObject *)__pyx_n_s__m)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 813; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - if (!(likely(PyString_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected str, got %.200s", Py_TYPE(__pyx_t_3)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 813; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(((PyObject *)__pyx_v_strflags)); - __pyx_v_strflags = ((PyObject *)__pyx_t_3); - __pyx_t_3 = 0; - goto __pyx_L7; - } - __pyx_L7:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":815 - * strflags += 'm' - * - * if strflags: # <<<<<<<<<<<<<< - * new_pattern.append('(?' + strflags + ')') - * - */ - __pyx_t_5 = __Pyx_PyObject_IsTrue(((PyObject *)__pyx_v_strflags)); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 815; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":816 - * - * if strflags: - * new_pattern.append('(?' + strflags + ')') # <<<<<<<<<<<<<< - * - * while 1: - */ - if (unlikely(__pyx_v_new_pattern == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 816; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_3 = PyNumber_Add(((PyObject *)__pyx_kp_s_14), ((PyObject *)__pyx_v_strflags)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 816; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_3)); - __pyx_t_2 = PyNumber_Add(((PyObject *)__pyx_t_3), ((PyObject *)__pyx_kp_s_15)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 816; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_2)); - __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_new_pattern), ((PyObject *)__pyx_t_2)); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 816; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; - goto __pyx_L8; - } - __pyx_L8:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":818 - * new_pattern.append('(?' + strflags + ')') - * - * while 1: # <<<<<<<<<<<<<< - * this = source.get() - * if this is None: - */ - while (1) { - if (!1) break; - - /* "/Users/maxiak/pyre2/src/re2.pyx":819 - * - * while 1: - * this = source.get() # <<<<<<<<<<<<<< - * if this is None: - * break - */ - __pyx_t_2 = PyObject_GetAttr(__pyx_v_source, __pyx_n_s__get); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 819; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 819; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_v_this); - __pyx_v_this = __pyx_t_3; - __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":820 - * while 1: - * this = source.get() - * if this is None: # <<<<<<<<<<<<<< - * break - * if flags & _X: - */ - __pyx_t_5 = (__pyx_v_this == Py_None); - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":821 - * this = source.get() - * if this is None: - * break # <<<<<<<<<<<<<< - * if flags & _X: - * if this in WHITESPACE: - */ - goto __pyx_L10_break; - goto __pyx_L11; - } - __pyx_L11:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":822 - * if this is None: - * break - * if flags & _X: # <<<<<<<<<<<<<< - * if this in WHITESPACE: - * continue - */ - __pyx_t_4 = (__pyx_v_flags & __pyx_v_3re2__X); - if (__pyx_t_4) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":823 - * break - * if flags & _X: - * if this in WHITESPACE: # <<<<<<<<<<<<<< - * continue - * if this == "#": - */ - __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s__WHITESPACE); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 823; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = ((PySequence_Contains(__pyx_t_3, __pyx_v_this))); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 823; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":824 - * if flags & _X: - * if this in WHITESPACE: - * continue # <<<<<<<<<<<<<< - * if this == "#": - * while 1: - */ - goto __pyx_L9_continue; - goto __pyx_L13; - } - __pyx_L13:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":825 - * if this in WHITESPACE: - * continue - * if this == "#": # <<<<<<<<<<<<<< - * while 1: - * this = source.get() - */ - __pyx_t_3 = PyObject_RichCompare(__pyx_v_this, ((PyObject *)__pyx_kp_s_16), Py_EQ); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 825; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 825; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":826 - * continue - * if this == "#": - * while 1: # <<<<<<<<<<<<<< - * this = source.get() - * if this in (None, "\n"): - */ - while (1) { - if (!1) break; - - /* "/Users/maxiak/pyre2/src/re2.pyx":827 - * if this == "#": - * while 1: - * this = source.get() # <<<<<<<<<<<<<< - * if this in (None, "\n"): - * break - */ - __pyx_t_3 = PyObject_GetAttr(__pyx_v_source, __pyx_n_s__get); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 827; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 827; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_v_this); - __pyx_v_this = __pyx_t_2; - __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":828 - * while 1: - * this = source.get() - * if this in (None, "\n"): # <<<<<<<<<<<<<< - * break - * continue - */ - __Pyx_INCREF(__pyx_v_this); - __pyx_t_2 = __pyx_v_this; - __pyx_t_3 = PyObject_RichCompare(__pyx_t_2, Py_None, Py_EQ); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 828; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 828; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_6 = __pyx_t_5; - if (!__pyx_t_6) { - __pyx_t_3 = PyObject_RichCompare(__pyx_t_2, ((PyObject *)__pyx_kp_s_10), Py_EQ); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 828; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 828; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_7 = __pyx_t_5; - __pyx_t_5 = __pyx_t_7; - } else { - __pyx_t_5 = __pyx_t_6; - } - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_6 = __pyx_t_5; - if (__pyx_t_6) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":829 - * this = source.get() - * if this in (None, "\n"): - * break # <<<<<<<<<<<<<< - * continue - * - */ - goto __pyx_L16_break; - goto __pyx_L17; - } - __pyx_L17:; - } - __pyx_L16_break:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":830 - * if this in (None, "\n"): - * break - * continue # <<<<<<<<<<<<<< - * - * if this[0] not in '[\\': - */ - goto __pyx_L9_continue; - goto __pyx_L14; - } - __pyx_L14:; - goto __pyx_L12; - } - __pyx_L12:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":832 - * continue - * - * if this[0] not in '[\\': # <<<<<<<<<<<<<< - * new_pattern.append(this) - * continue - */ - __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_this, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 832; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_6 = (__Pyx_NegateNonNeg(PySequence_Contains(((PyObject *)__pyx_kp_s_17), __pyx_t_2))); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 832; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (__pyx_t_6) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":833 - * - * if this[0] not in '[\\': - * new_pattern.append(this) # <<<<<<<<<<<<<< - * continue - * - */ - if (unlikely(__pyx_v_new_pattern == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_new_pattern), __pyx_v_this); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - - /* "/Users/maxiak/pyre2/src/re2.pyx":834 - * if this[0] not in '[\\': - * new_pattern.append(this) - * continue # <<<<<<<<<<<<<< - * - * elif this == '[': - */ - goto __pyx_L9_continue; - goto __pyx_L18; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":836 - * continue - * - * elif this == '[': # <<<<<<<<<<<<<< - * new_pattern.append(this) - * while 1: - */ - __pyx_t_2 = PyObject_RichCompare(__pyx_v_this, ((PyObject *)__pyx_kp_s_18), Py_EQ); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 836; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 836; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (__pyx_t_6) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":837 - * - * elif this == '[': - * new_pattern.append(this) # <<<<<<<<<<<<<< - * while 1: - * this = source.get() - */ - if (unlikely(__pyx_v_new_pattern == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 837; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_new_pattern), __pyx_v_this); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 837; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - - /* "/Users/maxiak/pyre2/src/re2.pyx":838 - * elif this == '[': - * new_pattern.append(this) - * while 1: # <<<<<<<<<<<<<< - * this = source.get() - * if this is None: - */ - while (1) { - if (!1) break; - - /* "/Users/maxiak/pyre2/src/re2.pyx":839 - * new_pattern.append(this) - * while 1: - * this = source.get() # <<<<<<<<<<<<<< - * if this is None: - * raise RegexError, "unexpected end of regular expression" - */ - __pyx_t_2 = PyObject_GetAttr(__pyx_v_source, __pyx_n_s__get); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 839; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 839; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_v_this); - __pyx_v_this = __pyx_t_3; - __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":840 - * while 1: - * this = source.get() - * if this is None: # <<<<<<<<<<<<<< - * raise RegexError, "unexpected end of regular expression" - * elif this == ']': - */ - __pyx_t_6 = (__pyx_v_this == Py_None); - if (__pyx_t_6) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":841 - * this = source.get() - * if this is None: - * raise RegexError, "unexpected end of regular expression" # <<<<<<<<<<<<<< - * elif this == ']': - * new_pattern.append(this) - */ - __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s__RegexError); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 841; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_Raise(__pyx_t_3, ((PyObject *)__pyx_kp_s_19), 0); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 841; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L21; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":842 - * if this is None: - * raise RegexError, "unexpected end of regular expression" - * elif this == ']': # <<<<<<<<<<<<<< - * new_pattern.append(this) - * break - */ - __pyx_t_3 = PyObject_RichCompare(__pyx_v_this, ((PyObject *)__pyx_kp_s_20), Py_EQ); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (__pyx_t_6) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":843 - * raise RegexError, "unexpected end of regular expression" - * elif this == ']': - * new_pattern.append(this) # <<<<<<<<<<<<<< - * break - * elif this[0] == '\\': - */ - if (unlikely(__pyx_v_new_pattern == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 843; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_new_pattern), __pyx_v_this); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 843; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - - /* "/Users/maxiak/pyre2/src/re2.pyx":844 - * elif this == ']': - * new_pattern.append(this) - * break # <<<<<<<<<<<<<< - * elif this[0] == '\\': - * if flags & _U: - */ - goto __pyx_L20_break; - goto __pyx_L21; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":845 - * new_pattern.append(this) - * break - * elif this[0] == '\\': # <<<<<<<<<<<<<< - * if flags & _U: - * if this[1] == 'd': - */ - __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_this, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 845; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = PyObject_RichCompare(__pyx_t_3, ((PyObject *)__pyx_kp_s_3), Py_EQ); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 845; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 845; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (__pyx_t_6) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":846 - * break - * elif this[0] == '\\': - * if flags & _U: # <<<<<<<<<<<<<< - * if this[1] == 'd': - * new_pattern.append(r'\p{Nd}') - */ - __pyx_t_4 = (__pyx_v_flags & __pyx_v_3re2__U); - if (__pyx_t_4) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":847 - * elif this[0] == '\\': - * if flags & _U: - * if this[1] == 'd': # <<<<<<<<<<<<<< - * new_pattern.append(r'\p{Nd}') - * elif this[1] == 'w': - */ - __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_this, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 847; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyObject_RichCompare(__pyx_t_2, ((PyObject *)__pyx_n_s__d), Py_EQ); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 847; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 847; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (__pyx_t_6) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":848 - * if flags & _U: - * if this[1] == 'd': - * new_pattern.append(r'\p{Nd}') # <<<<<<<<<<<<<< - * elif this[1] == 'w': - * new_pattern.append(r'_\p{L}\p{Nd}') - */ - if (unlikely(__pyx_v_new_pattern == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_new_pattern), ((PyObject *)__pyx_kp_s_21)); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L23; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":849 - * if this[1] == 'd': - * new_pattern.append(r'\p{Nd}') - * elif this[1] == 'w': # <<<<<<<<<<<<<< - * new_pattern.append(r'_\p{L}\p{Nd}') - * elif this[1] == 's': - */ - __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_this, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 849; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = PyObject_RichCompare(__pyx_t_3, ((PyObject *)__pyx_n_s__w), Py_EQ); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 849; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 849; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (__pyx_t_6) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":850 - * new_pattern.append(r'\p{Nd}') - * elif this[1] == 'w': - * new_pattern.append(r'_\p{L}\p{Nd}') # <<<<<<<<<<<<<< - * elif this[1] == 's': - * new_pattern.append(r'\s\p{Z}') - */ - if (unlikely(__pyx_v_new_pattern == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 850; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_new_pattern), ((PyObject *)__pyx_kp_s_22)); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 850; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L23; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":851 - * elif this[1] == 'w': - * new_pattern.append(r'_\p{L}\p{Nd}') - * elif this[1] == 's': # <<<<<<<<<<<<<< - * new_pattern.append(r'\s\p{Z}') - * elif this[1] == 'D': - */ - __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_this, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 851; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyObject_RichCompare(__pyx_t_2, ((PyObject *)__pyx_n_s__s), Py_EQ); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 851; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 851; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (__pyx_t_6) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":852 - * new_pattern.append(r'_\p{L}\p{Nd}') - * elif this[1] == 's': - * new_pattern.append(r'\s\p{Z}') # <<<<<<<<<<<<<< - * elif this[1] == 'D': - * new_pattern.append(r'\P{Nd}') - */ - if (unlikely(__pyx_v_new_pattern == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 852; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_new_pattern), ((PyObject *)__pyx_kp_s_23)); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 852; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L23; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":853 - * elif this[1] == 's': - * new_pattern.append(r'\s\p{Z}') - * elif this[1] == 'D': # <<<<<<<<<<<<<< - * new_pattern.append(r'\P{Nd}') - * elif this[1] == 'W': - */ - __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_this, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 853; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = PyObject_RichCompare(__pyx_t_3, ((PyObject *)__pyx_n_s__D), Py_EQ); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 853; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 853; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (__pyx_t_6) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":854 - * new_pattern.append(r'\s\p{Z}') - * elif this[1] == 'D': - * new_pattern.append(r'\P{Nd}') # <<<<<<<<<<<<<< - * elif this[1] == 'W': - * # Since \w and \s are made out of several character groups, - */ - if (unlikely(__pyx_v_new_pattern == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 854; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_new_pattern), ((PyObject *)__pyx_kp_s_24)); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 854; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L23; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":855 - * elif this[1] == 'D': - * new_pattern.append(r'\P{Nd}') - * elif this[1] == 'W': # <<<<<<<<<<<<<< - * # Since \w and \s are made out of several character groups, - * # I don't see a way to convert their complements into a group - */ - __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_this, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 855; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyObject_RichCompare(__pyx_t_2, ((PyObject *)__pyx_n_s__W), Py_EQ); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 855; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 855; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (__pyx_t_6) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":860 - * # without rewriting the whole expression, which seems too complicated. - * - * raise CharClassProblemException() # <<<<<<<<<<<<<< - * elif this[1] == 'S': - * raise CharClassProblemException() - */ - __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s_25); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 860; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 860; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_Raise(__pyx_t_2, 0, 0); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 860; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L23; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":861 - * - * raise CharClassProblemException() - * elif this[1] == 'S': # <<<<<<<<<<<<<< - * raise CharClassProblemException() - * else: - */ - __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_this, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 861; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyObject_RichCompare(__pyx_t_2, ((PyObject *)__pyx_n_s__S), Py_EQ); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 861; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 861; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (__pyx_t_6) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":862 - * raise CharClassProblemException() - * elif this[1] == 'S': - * raise CharClassProblemException() # <<<<<<<<<<<<<< - * else: - * new_pattern.append(this) - */ - __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s_25); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 862; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 862; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_Raise(__pyx_t_2, 0, 0); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 862; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L23; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":864 - * raise CharClassProblemException() - * else: - * new_pattern.append(this) # <<<<<<<<<<<<<< - * else: - * new_pattern.append(this) - */ - if (unlikely(__pyx_v_new_pattern == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 864; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_new_pattern), __pyx_v_this); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 864; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_L23:; - goto __pyx_L22; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":866 - * new_pattern.append(this) - * else: - * new_pattern.append(this) # <<<<<<<<<<<<<< - * else: - * new_pattern.append(this) - */ - if (unlikely(__pyx_v_new_pattern == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 866; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_new_pattern), __pyx_v_this); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 866; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_L22:; - goto __pyx_L21; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":868 - * new_pattern.append(this) - * else: - * new_pattern.append(this) # <<<<<<<<<<<<<< - * elif this[0] == '\\': - * if this[1] in '89': - */ - if (unlikely(__pyx_v_new_pattern == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 868; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_new_pattern), __pyx_v_this); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 868; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_L21:; - } - __pyx_L20_break:; - goto __pyx_L18; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":869 - * else: - * new_pattern.append(this) - * elif this[0] == '\\': # <<<<<<<<<<<<<< - * if this[1] in '89': - * raise BackreferencesException() - */ - __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_this, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 869; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyObject_RichCompare(__pyx_t_2, ((PyObject *)__pyx_kp_s_3), Py_EQ); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 869; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 869; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (__pyx_t_6) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":870 - * new_pattern.append(this) - * elif this[0] == '\\': - * if this[1] in '89': # <<<<<<<<<<<<<< - * raise BackreferencesException() - * elif this[1] in '1234567': - */ - __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_this, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 870; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_6 = ((PySequence_Contains(((PyObject *)__pyx_kp_s__89), __pyx_t_3))); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 870; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (__pyx_t_6) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":871 - * elif this[0] == '\\': - * if this[1] in '89': - * raise BackreferencesException() # <<<<<<<<<<<<<< - * elif this[1] in '1234567': - * if source.next and source.next in '1234567': - */ - __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s_26); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 871; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 871; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_Raise(__pyx_t_2, 0, 0); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 871; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L24; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":872 - * if this[1] in '89': - * raise BackreferencesException() - * elif this[1] in '1234567': # <<<<<<<<<<<<<< - * if source.next and source.next in '1234567': - * this += source.get() - */ - __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_this, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 872; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_6 = ((PySequence_Contains(((PyObject *)__pyx_kp_s__1234567), __pyx_t_2))); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 872; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (__pyx_t_6) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":873 - * raise BackreferencesException() - * elif this[1] in '1234567': - * if source.next and source.next in '1234567': # <<<<<<<<<<<<<< - * this += source.get() - * if source.next and source.next in '1234567': - */ - __pyx_t_2 = PyObject_GetAttr(__pyx_v_source, __pyx_n_s__next); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 873; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 873; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (__pyx_t_6) { - __pyx_t_2 = PyObject_GetAttr(__pyx_v_source, __pyx_n_s__next); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 873; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_5 = ((PySequence_Contains(((PyObject *)__pyx_kp_s__1234567), __pyx_t_2))); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 873; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_7 = __pyx_t_5; - } else { - __pyx_t_7 = __pyx_t_6; - } - if (__pyx_t_7) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":874 - * elif this[1] in '1234567': - * if source.next and source.next in '1234567': - * this += source.get() # <<<<<<<<<<<<<< - * if source.next and source.next in '1234567': - * # all clear, this is an octal escape - */ - __pyx_t_2 = PyObject_GetAttr(__pyx_v_source, __pyx_n_s__get); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 874; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 874; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyNumber_InPlaceAdd(__pyx_v_this, __pyx_t_3); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 874; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_v_this); - __pyx_v_this = __pyx_t_2; - __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":875 - * if source.next and source.next in '1234567': - * this += source.get() - * if source.next and source.next in '1234567': # <<<<<<<<<<<<<< - * # all clear, this is an octal escape - * new_pattern.append(this) - */ - __pyx_t_2 = PyObject_GetAttr(__pyx_v_source, __pyx_n_s__next); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 875; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_7 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 875; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (__pyx_t_7) { - __pyx_t_2 = PyObject_GetAttr(__pyx_v_source, __pyx_n_s__next); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 875; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_6 = ((PySequence_Contains(((PyObject *)__pyx_kp_s__1234567), __pyx_t_2))); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 875; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_5 = __pyx_t_6; - } else { - __pyx_t_5 = __pyx_t_7; - } - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":877 - * if source.next and source.next in '1234567': - * # all clear, this is an octal escape - * new_pattern.append(this) # <<<<<<<<<<<<<< - * else: - * raise BackreferencesException() - */ - if (unlikely(__pyx_v_new_pattern == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 877; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_new_pattern), __pyx_v_this); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 877; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L26; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":879 - * new_pattern.append(this) - * else: - * raise BackreferencesException() # <<<<<<<<<<<<<< - * else: - * raise BackreferencesException() - */ - __pyx_t_2 = __Pyx_GetName(__pyx_m, __pyx_n_s_26); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 879; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 879; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_Raise(__pyx_t_3, 0, 0); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 879; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_L26:; - goto __pyx_L25; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":881 - * raise BackreferencesException() - * else: - * raise BackreferencesException() # <<<<<<<<<<<<<< - * elif flags & _U: - * if this[1] == 'd': - */ - __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s_26); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 881; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 881; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_Raise(__pyx_t_2, 0, 0); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 881; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_L25:; - goto __pyx_L24; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":882 - * else: - * raise BackreferencesException() - * elif flags & _U: # <<<<<<<<<<<<<< - * if this[1] == 'd': - * new_pattern.append(r'\p{Nd}') - */ - __pyx_t_4 = (__pyx_v_flags & __pyx_v_3re2__U); - if (__pyx_t_4) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":883 - * raise BackreferencesException() - * elif flags & _U: - * if this[1] == 'd': # <<<<<<<<<<<<<< - * new_pattern.append(r'\p{Nd}') - * elif this[1] == 'w': - */ - __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_this, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 883; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyObject_RichCompare(__pyx_t_2, ((PyObject *)__pyx_n_s__d), Py_EQ); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 883; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 883; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":884 - * elif flags & _U: - * if this[1] == 'd': - * new_pattern.append(r'\p{Nd}') # <<<<<<<<<<<<<< - * elif this[1] == 'w': - * new_pattern.append(r'[_\p{L}\p{Nd}]') - */ - if (unlikely(__pyx_v_new_pattern == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 884; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_new_pattern), ((PyObject *)__pyx_kp_s_21)); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 884; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L27; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":885 - * if this[1] == 'd': - * new_pattern.append(r'\p{Nd}') - * elif this[1] == 'w': # <<<<<<<<<<<<<< - * new_pattern.append(r'[_\p{L}\p{Nd}]') - * elif this[1] == 's': - */ - __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_this, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 885; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = PyObject_RichCompare(__pyx_t_3, ((PyObject *)__pyx_n_s__w), Py_EQ); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 885; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 885; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":886 - * new_pattern.append(r'\p{Nd}') - * elif this[1] == 'w': - * new_pattern.append(r'[_\p{L}\p{Nd}]') # <<<<<<<<<<<<<< - * elif this[1] == 's': - * new_pattern.append(r'[\s\p{Z}]') - */ - if (unlikely(__pyx_v_new_pattern == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 886; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_new_pattern), ((PyObject *)__pyx_kp_s_27)); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 886; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L27; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":887 - * elif this[1] == 'w': - * new_pattern.append(r'[_\p{L}\p{Nd}]') - * elif this[1] == 's': # <<<<<<<<<<<<<< - * new_pattern.append(r'[\s\p{Z}]') - * elif this[1] == 'D': - */ - __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_this, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 887; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyObject_RichCompare(__pyx_t_2, ((PyObject *)__pyx_n_s__s), Py_EQ); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 887; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 887; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":888 - * new_pattern.append(r'[_\p{L}\p{Nd}]') - * elif this[1] == 's': - * new_pattern.append(r'[\s\p{Z}]') # <<<<<<<<<<<<<< - * elif this[1] == 'D': - * new_pattern.append(r'[^\p{Nd}]') - */ - if (unlikely(__pyx_v_new_pattern == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 888; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_new_pattern), ((PyObject *)__pyx_kp_s_28)); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 888; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L27; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":889 - * elif this[1] == 's': - * new_pattern.append(r'[\s\p{Z}]') - * elif this[1] == 'D': # <<<<<<<<<<<<<< - * new_pattern.append(r'[^\p{Nd}]') - * elif this[1] == 'W': - */ - __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_this, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 889; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = PyObject_RichCompare(__pyx_t_3, ((PyObject *)__pyx_n_s__D), Py_EQ); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 889; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 889; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":890 - * new_pattern.append(r'[\s\p{Z}]') - * elif this[1] == 'D': - * new_pattern.append(r'[^\p{Nd}]') # <<<<<<<<<<<<<< - * elif this[1] == 'W': - * new_pattern.append(r'[^_\p{L}\p{Nd}]') - */ - if (unlikely(__pyx_v_new_pattern == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 890; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_new_pattern), ((PyObject *)__pyx_kp_s_29)); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 890; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L27; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":891 - * elif this[1] == 'D': - * new_pattern.append(r'[^\p{Nd}]') - * elif this[1] == 'W': # <<<<<<<<<<<<<< - * new_pattern.append(r'[^_\p{L}\p{Nd}]') - * elif this[1] == 'S': - */ - __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_this, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 891; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyObject_RichCompare(__pyx_t_2, ((PyObject *)__pyx_n_s__W), Py_EQ); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 891; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 891; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":892 - * new_pattern.append(r'[^\p{Nd}]') - * elif this[1] == 'W': - * new_pattern.append(r'[^_\p{L}\p{Nd}]') # <<<<<<<<<<<<<< - * elif this[1] == 'S': - * new_pattern.append(r'[^\s\p{Z}]') - */ - if (unlikely(__pyx_v_new_pattern == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_new_pattern), ((PyObject *)__pyx_kp_s_30)); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L27; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":893 - * elif this[1] == 'W': - * new_pattern.append(r'[^_\p{L}\p{Nd}]') - * elif this[1] == 'S': # <<<<<<<<<<<<<< - * new_pattern.append(r'[^\s\p{Z}]') - * else: - */ - __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_this, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 893; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = PyObject_RichCompare(__pyx_t_3, ((PyObject *)__pyx_n_s__S), Py_EQ); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 893; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 893; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":894 - * new_pattern.append(r'[^_\p{L}\p{Nd}]') - * elif this[1] == 'S': - * new_pattern.append(r'[^\s\p{Z}]') # <<<<<<<<<<<<<< - * else: - * new_pattern.append(this) - */ - if (unlikely(__pyx_v_new_pattern == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 894; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_new_pattern), ((PyObject *)__pyx_kp_s_31)); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 894; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L27; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":896 - * new_pattern.append(r'[^\s\p{Z}]') - * else: - * new_pattern.append(this) # <<<<<<<<<<<<<< - * else: - * new_pattern.append(this) - */ - if (unlikely(__pyx_v_new_pattern == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 896; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_new_pattern), __pyx_v_this); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 896; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_L27:; - goto __pyx_L24; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":898 - * new_pattern.append(this) - * else: - * new_pattern.append(this) # <<<<<<<<<<<<<< - * - * return ''.join(new_pattern) - */ - if (unlikely(__pyx_v_new_pattern == Py_None)) { - PyErr_SetString(PyExc_AttributeError, "'NoneType' object has no attribute 'append'"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 898; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_4 = PyList_Append(((PyObject *)__pyx_v_new_pattern), __pyx_v_this); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 898; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_L24:; - goto __pyx_L18; - } - __pyx_L18:; - __pyx_L9_continue:; - } - __pyx_L10_break:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":900 - * new_pattern.append(this) - * - * return ''.join(new_pattern) # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_7), __pyx_n_s__join); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 900; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 900; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_INCREF(((PyObject *)__pyx_v_new_pattern)); - PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_v_new_pattern)); - __Pyx_GIVEREF(((PyObject *)__pyx_v_new_pattern)); - __pyx_t_1 = PyObject_Call(__pyx_t_2, __pyx_t_3, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 900; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("re2.prepare_pattern"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_DECREF(__pyx_v_source); - __Pyx_DECREF(__pyx_v_new_pattern); - __Pyx_XDECREF(__pyx_v_strflags); - __Pyx_DECREF(__pyx_v_this); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":904 - * - * - * def _compile(pattern, int flags=0, int max_mem=8388608): # <<<<<<<<<<<<<< - * """ - * Compile a regular expression pattern, returning a pattern object. - */ - -static PyObject *__pyx_pf_3re2__compile(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static char __pyx_doc_3re2__compile[] = "\n Compile a regular expression pattern, returning a pattern object.\n "; -static PyObject *__pyx_pf_3re2__compile(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_pattern = 0; - int __pyx_v_flags; - int __pyx_v_max_mem; - char *__pyx_v_string; - Py_ssize_t __pyx_v_length; - re2::StringPiece *__pyx_v_s; - RE2::Options __pyx_v_opts; - int __pyx_v_error_code; - int __pyx_v_encoded; - PyObject *__pyx_v_original_pattern = 0; - PyObject *__pyx_v_error_msg; - re2::RE2 *__pyx_v_re_pattern; - struct __pyx_obj_3re2_Pattern *__pyx_v_pypattern = 0; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - int __pyx_t_3; - PyObject *__pyx_t_4 = NULL; - int __pyx_t_5; - PyObject *__pyx_t_6 = NULL; - PyObject *__pyx_t_7 = NULL; - PyObject *__pyx_t_8 = NULL; - int __pyx_t_9; - int __pyx_t_10; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__pattern,&__pyx_n_s__flags,&__pyx_n_s__max_mem,0}; - __Pyx_RefNannySetupContext("_compile"); - __pyx_self = __pyx_self; - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[3] = {0,0,0}; - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__pattern); - if (likely(values[0])) kw_args--; - else goto __pyx_L5_argtuple_error; - case 1: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__flags); - if (value) { values[1] = value; kw_args--; } - } - case 2: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__max_mem); - if (value) { values[2] = value; kw_args--; } - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "_compile") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 904; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_pattern = values[0]; - if (values[1]) { - __pyx_v_flags = __Pyx_PyInt_AsInt(values[1]); if (unlikely((__pyx_v_flags == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 904; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_flags = ((int)0); - } - if (values[2]) { - __pyx_v_max_mem = __Pyx_PyInt_AsInt(values[2]); if (unlikely((__pyx_v_max_mem == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 904; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_max_mem = ((int)8388608); - } - } else { - __pyx_v_flags = ((int)0); - __pyx_v_max_mem = ((int)8388608); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: __pyx_v_max_mem = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 2)); if (unlikely((__pyx_v_max_mem == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 904; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 2: __pyx_v_flags = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 1)); if (unlikely((__pyx_v_flags == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 904; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 1: __pyx_v_pattern = PyTuple_GET_ITEM(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("_compile", 0, 1, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 904; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2._compile"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __Pyx_INCREF(__pyx_v_pattern); - __pyx_v_error_msg = Py_None; __Pyx_INCREF(Py_None); - - /* "/Users/maxiak/pyre2/src/re2.pyx":913 - * cdef _re2.Options opts - * cdef int error_code - * cdef int encoded = 0 # <<<<<<<<<<<<<< - * - * if isinstance(pattern, (Pattern, SREPattern)): - */ - __pyx_v_encoded = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":915 - * cdef int encoded = 0 - * - * if isinstance(pattern, (Pattern, SREPattern)): # <<<<<<<<<<<<<< - * if flags: - * raise ValueError('Cannot process flags argument with a compiled pattern') - */ - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__SREPattern); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 915; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 915; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(((PyObject *)((PyObject*)__pyx_ptype_3re2_Pattern))); - PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)((PyObject*)__pyx_ptype_3re2_Pattern))); - __Pyx_GIVEREF(((PyObject *)((PyObject*)__pyx_ptype_3re2_Pattern))); - PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - __pyx_t_1 = 0; - __pyx_t_3 = PyObject_IsInstance(__pyx_v_pattern, __pyx_t_2); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 915; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (__pyx_t_3) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":916 - * - * if isinstance(pattern, (Pattern, SREPattern)): - * if flags: # <<<<<<<<<<<<<< - * raise ValueError('Cannot process flags argument with a compiled pattern') - * return pattern - */ - if (__pyx_v_flags) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":917 - * if isinstance(pattern, (Pattern, SREPattern)): - * if flags: - * raise ValueError('Cannot process flags argument with a compiled pattern') # <<<<<<<<<<<<<< - * return pattern - * - */ - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 917; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_32)); - PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_32)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_32)); - __pyx_t_1 = PyObject_Call(__pyx_builtin_ValueError, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 917; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_Raise(__pyx_t_1, 0, 0); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 917; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L7; - } - __pyx_L7:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":918 - * if flags: - * raise ValueError('Cannot process flags argument with a compiled pattern') - * return pattern # <<<<<<<<<<<<<< - * - * cdef object original_pattern = pattern - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_pattern); - __pyx_r = __pyx_v_pattern; - goto __pyx_L0; - goto __pyx_L6; - } - __pyx_L6:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":920 - * return pattern - * - * cdef object original_pattern = pattern # <<<<<<<<<<<<<< - * try: - * pattern = prepare_pattern(original_pattern, flags) - */ - __Pyx_INCREF(__pyx_v_pattern); - __pyx_v_original_pattern = __pyx_v_pattern; - - /* "/Users/maxiak/pyre2/src/re2.pyx":921 - * - * cdef object original_pattern = pattern - * try: # <<<<<<<<<<<<<< - * pattern = prepare_pattern(original_pattern, flags) - * except BackreferencesException: - */ - { - PyObject *__pyx_save_exc_type, *__pyx_save_exc_value, *__pyx_save_exc_tb; - __Pyx_ExceptionSave(&__pyx_save_exc_type, &__pyx_save_exc_value, &__pyx_save_exc_tb); - __Pyx_XGOTREF(__pyx_save_exc_type); - __Pyx_XGOTREF(__pyx_save_exc_value); - __Pyx_XGOTREF(__pyx_save_exc_tb); - /*try:*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":922 - * cdef object original_pattern = pattern - * try: - * pattern = prepare_pattern(original_pattern, flags) # <<<<<<<<<<<<<< - * except BackreferencesException: - * error_msg = "Backreferences not supported" - */ - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__prepare_pattern); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 922; __pyx_clineno = __LINE__; goto __pyx_L8_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyInt_FromLong(__pyx_v_flags); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 922; __pyx_clineno = __LINE__; goto __pyx_L8_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 922; __pyx_clineno = __LINE__; goto __pyx_L8_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_INCREF(__pyx_v_original_pattern); - PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_v_original_pattern); - __Pyx_GIVEREF(__pyx_v_original_pattern); - PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_t_2); - __Pyx_GIVEREF(__pyx_t_2); - __pyx_t_2 = 0; - __pyx_t_2 = PyObject_Call(__pyx_t_1, __pyx_t_4, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 922; __pyx_clineno = __LINE__; goto __pyx_L8_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_DECREF(__pyx_v_pattern); - __pyx_v_pattern = __pyx_t_2; - __pyx_t_2 = 0; - } - __Pyx_XDECREF(__pyx_save_exc_type); __pyx_save_exc_type = 0; - __Pyx_XDECREF(__pyx_save_exc_value); __pyx_save_exc_value = 0; - __Pyx_XDECREF(__pyx_save_exc_tb); __pyx_save_exc_tb = 0; - goto __pyx_L15_try_end; - __pyx_L8_error:; - __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":923 - * try: - * pattern = prepare_pattern(original_pattern, flags) - * except BackreferencesException: # <<<<<<<<<<<<<< - * error_msg = "Backreferences not supported" - * if current_notification == FALLBACK_EXCEPTION: - */ - __pyx_t_2 = __Pyx_GetName(__pyx_m, __pyx_n_s_26); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 923; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_5 = PyErr_ExceptionMatches(__pyx_t_2); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (__pyx_t_5) { - __Pyx_AddTraceback("re2._compile"); - if (__Pyx_GetException(&__pyx_t_2, &__pyx_t_4, &__pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 923; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_GOTREF(__pyx_t_4); - __Pyx_GOTREF(__pyx_t_1); - - /* "/Users/maxiak/pyre2/src/re2.pyx":924 - * pattern = prepare_pattern(original_pattern, flags) - * except BackreferencesException: - * error_msg = "Backreferences not supported" # <<<<<<<<<<<<<< - * if current_notification == FALLBACK_EXCEPTION: - * # Raise an exception regardless of the type of error. - */ - __Pyx_INCREF(((PyObject *)__pyx_kp_s_33)); - __Pyx_DECREF(__pyx_v_error_msg); - __pyx_v_error_msg = ((PyObject *)__pyx_kp_s_33); - - /* "/Users/maxiak/pyre2/src/re2.pyx":925 - * except BackreferencesException: - * error_msg = "Backreferences not supported" - * if current_notification == FALLBACK_EXCEPTION: # <<<<<<<<<<<<<< - * # Raise an exception regardless of the type of error. - * raise RegexError(error_msg) - */ - __pyx_t_6 = __Pyx_GetName(__pyx_m, __pyx_n_s__FALLBACK_EXCEPTION); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 925; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_5 = __Pyx_PyInt_AsInt(__pyx_t_6); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 925; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_3 = (__pyx_v_3re2_current_notification == __pyx_t_5); - if (__pyx_t_3) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":927 - * if current_notification == FALLBACK_EXCEPTION: - * # Raise an exception regardless of the type of error. - * raise RegexError(error_msg) # <<<<<<<<<<<<<< - * elif current_notification == FALLBACK_WARNING: - * warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) - */ - __pyx_t_6 = __Pyx_GetName(__pyx_m, __pyx_n_s__RegexError); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 927; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = PyTuple_New(1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 927; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_INCREF(__pyx_v_error_msg); - PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_v_error_msg); - __Pyx_GIVEREF(__pyx_v_error_msg); - __pyx_t_8 = PyObject_Call(__pyx_t_6, __pyx_t_7, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 927; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_8); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __Pyx_Raise(__pyx_t_8, 0, 0); - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 927; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - goto __pyx_L18; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":928 - * # Raise an exception regardless of the type of error. - * raise RegexError(error_msg) - * elif current_notification == FALLBACK_WARNING: # <<<<<<<<<<<<<< - * warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) - * return re.compile(original_pattern, flags) - */ - __pyx_t_8 = __Pyx_GetName(__pyx_m, __pyx_n_s__FALLBACK_WARNING); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 928; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_8); - __pyx_t_5 = __Pyx_PyInt_AsInt(__pyx_t_8); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 928; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_3 = (__pyx_v_3re2_current_notification == __pyx_t_5); - if (__pyx_t_3) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":929 - * raise RegexError(error_msg) - * elif current_notification == FALLBACK_WARNING: - * warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) # <<<<<<<<<<<<<< - * return re.compile(original_pattern, flags) - * except CharClassProblemException: - */ - __pyx_t_8 = __Pyx_GetName(__pyx_m, __pyx_n_s__warnings); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 929; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_8); - __pyx_t_7 = PyObject_GetAttr(__pyx_t_8, __pyx_n_s__warn); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 929; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_34), __pyx_v_error_msg); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 929; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_8)); - __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 929; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_6); - PyTuple_SET_ITEM(__pyx_t_6, 0, ((PyObject *)__pyx_t_8)); - __Pyx_GIVEREF(((PyObject *)__pyx_t_8)); - __pyx_t_8 = 0; - __pyx_t_8 = PyObject_Call(__pyx_t_7, __pyx_t_6, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 929; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_8); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - goto __pyx_L18; - } - __pyx_L18:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":930 - * elif current_notification == FALLBACK_WARNING: - * warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) - * return re.compile(original_pattern, flags) # <<<<<<<<<<<<<< - * except CharClassProblemException: - * error_msg = "\W and \S not supported inside character classes" - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_8 = __Pyx_GetName(__pyx_m, __pyx_n_s__re); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 930; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_8); - __pyx_t_6 = PyObject_GetAttr(__pyx_t_8, __pyx_n_s__compile); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 930; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyInt_FromLong(__pyx_v_flags); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 930; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_8); - __pyx_t_7 = PyTuple_New(2); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 930; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_INCREF(__pyx_v_original_pattern); - PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_v_original_pattern); - __Pyx_GIVEREF(__pyx_v_original_pattern); - PyTuple_SET_ITEM(__pyx_t_7, 1, __pyx_t_8); - __Pyx_GIVEREF(__pyx_t_8); - __pyx_t_8 = 0; - __pyx_t_8 = PyObject_Call(__pyx_t_6, __pyx_t_7, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 930; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_8); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_r = __pyx_t_8; - __pyx_t_8 = 0; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - goto __pyx_L11_except_return; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - goto __pyx_L9_exception_handled; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":931 - * warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) - * return re.compile(original_pattern, flags) - * except CharClassProblemException: # <<<<<<<<<<<<<< - * error_msg = "\W and \S not supported inside character classes" - * if current_notification == FALLBACK_EXCEPTION: - */ - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s_25); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 931; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyErr_ExceptionMatches(__pyx_t_1); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (__pyx_t_5) { - __Pyx_AddTraceback("re2._compile"); - if (__Pyx_GetException(&__pyx_t_1, &__pyx_t_4, &__pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 931; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_GOTREF(__pyx_t_4); - __Pyx_GOTREF(__pyx_t_2); - - /* "/Users/maxiak/pyre2/src/re2.pyx":932 - * return re.compile(original_pattern, flags) - * except CharClassProblemException: - * error_msg = "\W and \S not supported inside character classes" # <<<<<<<<<<<<<< - * if current_notification == FALLBACK_EXCEPTION: - * # Raise an exception regardless of the type of error. - */ - __Pyx_INCREF(((PyObject *)__pyx_kp_s_35)); - __Pyx_DECREF(__pyx_v_error_msg); - __pyx_v_error_msg = ((PyObject *)__pyx_kp_s_35); - - /* "/Users/maxiak/pyre2/src/re2.pyx":933 - * except CharClassProblemException: - * error_msg = "\W and \S not supported inside character classes" - * if current_notification == FALLBACK_EXCEPTION: # <<<<<<<<<<<<<< - * # Raise an exception regardless of the type of error. - * raise RegexError(error_msg) - */ - __pyx_t_8 = __Pyx_GetName(__pyx_m, __pyx_n_s__FALLBACK_EXCEPTION); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 933; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_8); - __pyx_t_5 = __Pyx_PyInt_AsInt(__pyx_t_8); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 933; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_3 = (__pyx_v_3re2_current_notification == __pyx_t_5); - if (__pyx_t_3) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":935 - * if current_notification == FALLBACK_EXCEPTION: - * # Raise an exception regardless of the type of error. - * raise RegexError(error_msg) # <<<<<<<<<<<<<< - * elif current_notification == FALLBACK_WARNING: - * warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) - */ - __pyx_t_8 = __Pyx_GetName(__pyx_m, __pyx_n_s__RegexError); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 935; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_8); - __pyx_t_7 = PyTuple_New(1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 935; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_INCREF(__pyx_v_error_msg); - PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_v_error_msg); - __Pyx_GIVEREF(__pyx_v_error_msg); - __pyx_t_6 = PyObject_Call(__pyx_t_8, __pyx_t_7, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 935; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __Pyx_Raise(__pyx_t_6, 0, 0); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 935; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - goto __pyx_L21; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":936 - * # Raise an exception regardless of the type of error. - * raise RegexError(error_msg) - * elif current_notification == FALLBACK_WARNING: # <<<<<<<<<<<<<< - * warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) - * return re.compile(original_pattern, flags) - */ - __pyx_t_6 = __Pyx_GetName(__pyx_m, __pyx_n_s__FALLBACK_WARNING); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 936; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_5 = __Pyx_PyInt_AsInt(__pyx_t_6); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 936; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_3 = (__pyx_v_3re2_current_notification == __pyx_t_5); - if (__pyx_t_3) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":937 - * raise RegexError(error_msg) - * elif current_notification == FALLBACK_WARNING: - * warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) # <<<<<<<<<<<<<< - * return re.compile(original_pattern, flags) - * - */ - __pyx_t_6 = __Pyx_GetName(__pyx_m, __pyx_n_s__warnings); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 937; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = PyObject_GetAttr(__pyx_t_6, __pyx_n_s__warn); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 937; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_34), __pyx_v_error_msg); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 937; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_6)); - __pyx_t_8 = PyTuple_New(1); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 937; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_8); - PyTuple_SET_ITEM(__pyx_t_8, 0, ((PyObject *)__pyx_t_6)); - __Pyx_GIVEREF(((PyObject *)__pyx_t_6)); - __pyx_t_6 = 0; - __pyx_t_6 = PyObject_Call(__pyx_t_7, __pyx_t_8, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 937; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - goto __pyx_L21; - } - __pyx_L21:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":938 - * elif current_notification == FALLBACK_WARNING: - * warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) - * return re.compile(original_pattern, flags) # <<<<<<<<<<<<<< - * - * # Set the options given the flags above. - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_6 = __Pyx_GetName(__pyx_m, __pyx_n_s__re); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 938; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_8 = PyObject_GetAttr(__pyx_t_6, __pyx_n_s__compile); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 938; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_8); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyInt_FromLong(__pyx_v_flags); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 938; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = PyTuple_New(2); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 938; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_INCREF(__pyx_v_original_pattern); - PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_v_original_pattern); - __Pyx_GIVEREF(__pyx_v_original_pattern); - PyTuple_SET_ITEM(__pyx_t_7, 1, __pyx_t_6); - __Pyx_GIVEREF(__pyx_t_6); - __pyx_t_6 = 0; - __pyx_t_6 = PyObject_Call(__pyx_t_8, __pyx_t_7, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 938; __pyx_clineno = __LINE__; goto __pyx_L10_except_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_r = __pyx_t_6; - __pyx_t_6 = 0; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - goto __pyx_L11_except_return; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - goto __pyx_L9_exception_handled; - } - __pyx_L10_except_error:; - __Pyx_XGIVEREF(__pyx_save_exc_type); - __Pyx_XGIVEREF(__pyx_save_exc_value); - __Pyx_XGIVEREF(__pyx_save_exc_tb); - __Pyx_ExceptionReset(__pyx_save_exc_type, __pyx_save_exc_value, __pyx_save_exc_tb); - goto __pyx_L1_error; - __pyx_L11_except_return:; - __Pyx_XGIVEREF(__pyx_save_exc_type); - __Pyx_XGIVEREF(__pyx_save_exc_value); - __Pyx_XGIVEREF(__pyx_save_exc_tb); - __Pyx_ExceptionReset(__pyx_save_exc_type, __pyx_save_exc_value, __pyx_save_exc_tb); - goto __pyx_L0; - __pyx_L9_exception_handled:; - __Pyx_XGIVEREF(__pyx_save_exc_type); - __Pyx_XGIVEREF(__pyx_save_exc_value); - __Pyx_XGIVEREF(__pyx_save_exc_tb); - __Pyx_ExceptionReset(__pyx_save_exc_type, __pyx_save_exc_value, __pyx_save_exc_tb); - __pyx_L15_try_end:; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":941 - * - * # Set the options given the flags above. - * if flags & _I: # <<<<<<<<<<<<<< - * opts.set_case_sensitive(0); - * - */ - __pyx_t_5 = (__pyx_v_flags & __pyx_v_3re2__I); - if (__pyx_t_5) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":942 - * # Set the options given the flags above. - * if flags & _I: - * opts.set_case_sensitive(0); # <<<<<<<<<<<<<< - * - * opts.set_max_mem(max_mem) - */ - __pyx_v_opts.set_case_sensitive(0); - goto __pyx_L22; - } - __pyx_L22:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":944 - * opts.set_case_sensitive(0); - * - * opts.set_max_mem(max_mem) # <<<<<<<<<<<<<< - * opts.set_log_errors(0) - * opts.set_encoding(_re2.EncodingUTF8) - */ - __pyx_v_opts.set_max_mem(__pyx_v_max_mem); - - /* "/Users/maxiak/pyre2/src/re2.pyx":945 - * - * opts.set_max_mem(max_mem) - * opts.set_log_errors(0) # <<<<<<<<<<<<<< - * opts.set_encoding(_re2.EncodingUTF8) - * - */ - __pyx_v_opts.set_log_errors(0); - - /* "/Users/maxiak/pyre2/src/re2.pyx":946 - * opts.set_max_mem(max_mem) - * opts.set_log_errors(0) - * opts.set_encoding(_re2.EncodingUTF8) # <<<<<<<<<<<<<< - * - * # We use this function to get the proper length of the string. - */ - __pyx_v_opts.set_encoding(RE2::Options::EncodingUTF8); - - /* "/Users/maxiak/pyre2/src/re2.pyx":950 - * # We use this function to get the proper length of the string. - * - * pattern = unicode_to_bytestring(pattern, &encoded) # <<<<<<<<<<<<<< - * if pystring_to_bytestring(pattern, &string, &length) == -1: - * raise TypeError("first argument must be a string or compiled pattern") - */ - __pyx_t_2 = __pyx_f_3re2_unicode_to_bytestring(__pyx_v_pattern, (&__pyx_v_encoded)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 950; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_v_pattern); - __pyx_v_pattern = __pyx_t_2; - __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":951 - * - * pattern = unicode_to_bytestring(pattern, &encoded) - * if pystring_to_bytestring(pattern, &string, &length) == -1: # <<<<<<<<<<<<<< - * raise TypeError("first argument must be a string or compiled pattern") - * - */ - __pyx_t_3 = (__pyx_f_3re2_pystring_to_bytestring(__pyx_v_pattern, (&__pyx_v_string), (&__pyx_v_length)) == -1); - if (__pyx_t_3) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":952 - * pattern = unicode_to_bytestring(pattern, &encoded) - * if pystring_to_bytestring(pattern, &string, &length) == -1: - * raise TypeError("first argument must be a string or compiled pattern") # <<<<<<<<<<<<<< - * - * s = new _re2.StringPiece(string, length) - */ - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 952; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_36)); - PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_36)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_36)); - __pyx_t_4 = PyObject_Call(__pyx_builtin_TypeError, __pyx_t_2, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 952; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_Raise(__pyx_t_4, 0, 0); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 952; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L23; - } - __pyx_L23:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":954 - * raise TypeError("first argument must be a string or compiled pattern") - * - * s = new _re2.StringPiece(string, length) # <<<<<<<<<<<<<< - * - * cdef _re2.RE2 *re_pattern - */ - __pyx_v_s = new re2::StringPiece(__pyx_v_string, __pyx_v_length); - - /* "/Users/maxiak/pyre2/src/re2.pyx":957 - * - * cdef _re2.RE2 *re_pattern - * with nogil: # <<<<<<<<<<<<<< - * re_pattern = new _re2.RE2(s[0], opts) - * - */ - { PyThreadState *_save; - Py_UNBLOCK_THREADS - /*try:*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":958 - * cdef _re2.RE2 *re_pattern - * with nogil: - * re_pattern = new _re2.RE2(s[0], opts) # <<<<<<<<<<<<<< - * - * if not re_pattern.ok(): - */ - __pyx_v_re_pattern = new re2::RE2((__pyx_v_s[0]), __pyx_v_opts); - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":957 - * - * cdef _re2.RE2 *re_pattern - * with nogil: # <<<<<<<<<<<<<< - * re_pattern = new _re2.RE2(s[0], opts) - * - */ - /*finally:*/ { - Py_BLOCK_THREADS - } - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":960 - * re_pattern = new _re2.RE2(s[0], opts) - * - * if not re_pattern.ok(): # <<<<<<<<<<<<<< - * # Something went wrong with the compilation. - * del s - */ - __pyx_t_3 = (!__pyx_v_re_pattern->ok()); - if (__pyx_t_3) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":962 - * if not re_pattern.ok(): - * # Something went wrong with the compilation. - * del s # <<<<<<<<<<<<<< - * error_msg = cpp_to_pystring(re_pattern.error()) - * error_code = re_pattern.error_code() - */ - delete __pyx_v_s; - - /* "/Users/maxiak/pyre2/src/re2.pyx":963 - * # Something went wrong with the compilation. - * del s - * error_msg = cpp_to_pystring(re_pattern.error()) # <<<<<<<<<<<<<< - * error_code = re_pattern.error_code() - * del re_pattern - */ - __pyx_t_4 = __pyx_f_3re2_cpp_to_pystring(__pyx_v_re_pattern->error()); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 963; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_v_error_msg); - __pyx_v_error_msg = __pyx_t_4; - __pyx_t_4 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":964 - * del s - * error_msg = cpp_to_pystring(re_pattern.error()) - * error_code = re_pattern.error_code() # <<<<<<<<<<<<<< - * del re_pattern - * if current_notification == FALLBACK_EXCEPTION: - */ - __pyx_v_error_code = __pyx_v_re_pattern->error_code(); - - /* "/Users/maxiak/pyre2/src/re2.pyx":965 - * error_msg = cpp_to_pystring(re_pattern.error()) - * error_code = re_pattern.error_code() - * del re_pattern # <<<<<<<<<<<<<< - * if current_notification == FALLBACK_EXCEPTION: - * # Raise an exception regardless of the type of error. - */ - delete __pyx_v_re_pattern; - - /* "/Users/maxiak/pyre2/src/re2.pyx":966 - * error_code = re_pattern.error_code() - * del re_pattern - * if current_notification == FALLBACK_EXCEPTION: # <<<<<<<<<<<<<< - * # Raise an exception regardless of the type of error. - * raise RegexError(error_msg) - */ - __pyx_t_4 = __Pyx_GetName(__pyx_m, __pyx_n_s__FALLBACK_EXCEPTION); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 966; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = __Pyx_PyInt_AsInt(__pyx_t_4); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 966; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_3 = (__pyx_v_3re2_current_notification == __pyx_t_5); - if (__pyx_t_3) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":968 - * if current_notification == FALLBACK_EXCEPTION: - * # Raise an exception regardless of the type of error. - * raise RegexError(error_msg) # <<<<<<<<<<<<<< - * elif error_code not in (_re2.ErrorBadPerlOp, _re2.ErrorRepeatSize, - * _re2.ErrorBadEscape): - */ - __pyx_t_4 = __Pyx_GetName(__pyx_m, __pyx_n_s__RegexError); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 968; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 968; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(__pyx_v_error_msg); - PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_error_msg); - __Pyx_GIVEREF(__pyx_v_error_msg); - __pyx_t_1 = PyObject_Call(__pyx_t_4, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 968; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_Raise(__pyx_t_1, 0, 0); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 968; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L28; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":969 - * # Raise an exception regardless of the type of error. - * raise RegexError(error_msg) - * elif error_code not in (_re2.ErrorBadPerlOp, _re2.ErrorRepeatSize, # <<<<<<<<<<<<<< - * _re2.ErrorBadEscape): - * # Raise an error because these will not be fixed by using the - */ - __pyx_t_5 = __pyx_v_error_code; - __pyx_t_3 = (__pyx_t_5 != RE2::ErrorBadPerlOp); - if (__pyx_t_3) { - __pyx_t_9 = (__pyx_t_5 != RE2::ErrorRepeatSize); - __pyx_t_10 = __pyx_t_9; - } else { - __pyx_t_10 = __pyx_t_3; - } - if (__pyx_t_10) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":970 - * raise RegexError(error_msg) - * elif error_code not in (_re2.ErrorBadPerlOp, _re2.ErrorRepeatSize, - * _re2.ErrorBadEscape): # <<<<<<<<<<<<<< - * # Raise an error because these will not be fixed by using the - * # ``re`` module. - */ - __pyx_t_3 = (__pyx_t_5 != RE2::ErrorBadEscape); - __pyx_t_9 = __pyx_t_3; - } else { - __pyx_t_9 = __pyx_t_10; - } - __pyx_t_10 = __pyx_t_9; - if (__pyx_t_10) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":973 - * # Raise an error because these will not be fixed by using the - * # ``re`` module. - * raise RegexError(error_msg) # <<<<<<<<<<<<<< - * elif current_notification == FALLBACK_WARNING: - * warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) - */ - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__RegexError); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 973; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 973; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(__pyx_v_error_msg); - PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_error_msg); - __Pyx_GIVEREF(__pyx_v_error_msg); - __pyx_t_4 = PyObject_Call(__pyx_t_1, __pyx_t_2, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 973; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_Raise(__pyx_t_4, 0, 0); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 973; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L28; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":974 - * # ``re`` module. - * raise RegexError(error_msg) - * elif current_notification == FALLBACK_WARNING: # <<<<<<<<<<<<<< - * warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) - * return re.compile(original_pattern, flags) - */ - __pyx_t_4 = __Pyx_GetName(__pyx_m, __pyx_n_s__FALLBACK_WARNING); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 974; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = __Pyx_PyInt_AsInt(__pyx_t_4); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 974; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_10 = (__pyx_v_3re2_current_notification == __pyx_t_5); - if (__pyx_t_10) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":975 - * raise RegexError(error_msg) - * elif current_notification == FALLBACK_WARNING: - * warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) # <<<<<<<<<<<<<< - * return re.compile(original_pattern, flags) - * - */ - __pyx_t_4 = __Pyx_GetName(__pyx_m, __pyx_n_s__warnings); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 975; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_2 = PyObject_GetAttr(__pyx_t_4, __pyx_n_s__warn); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 975; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_34), __pyx_v_error_msg); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 975; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_4)); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 975; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_t_4)); - __Pyx_GIVEREF(((PyObject *)__pyx_t_4)); - __pyx_t_4 = 0; - __pyx_t_4 = PyObject_Call(__pyx_t_2, __pyx_t_1, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 975; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - goto __pyx_L28; - } - __pyx_L28:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":976 - * elif current_notification == FALLBACK_WARNING: - * warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) - * return re.compile(original_pattern, flags) # <<<<<<<<<<<<<< - * - * cdef Pattern pypattern = Pattern() - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_4 = __Pyx_GetName(__pyx_m, __pyx_n_s__re); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 976; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_1 = PyObject_GetAttr(__pyx_t_4, __pyx_n_s__compile); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 976; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyInt_FromLong(__pyx_v_flags); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 976; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 976; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(__pyx_v_original_pattern); - PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_original_pattern); - __Pyx_GIVEREF(__pyx_v_original_pattern); - PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_t_4); - __Pyx_GIVEREF(__pyx_t_4); - __pyx_t_4 = 0; - __pyx_t_4 = PyObject_Call(__pyx_t_1, __pyx_t_2, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 976; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_r = __pyx_t_4; - __pyx_t_4 = 0; - goto __pyx_L0; - goto __pyx_L27; - } - __pyx_L27:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":978 - * return re.compile(original_pattern, flags) - * - * cdef Pattern pypattern = Pattern() # <<<<<<<<<<<<<< - * pypattern.pattern = original_pattern - * pypattern.re_pattern = re_pattern - */ - __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3re2_Pattern)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 978; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __pyx_v_pypattern = ((struct __pyx_obj_3re2_Pattern *)__pyx_t_4); - __pyx_t_4 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":979 - * - * cdef Pattern pypattern = Pattern() - * pypattern.pattern = original_pattern # <<<<<<<<<<<<<< - * pypattern.re_pattern = re_pattern - * pypattern.ngroups = re_pattern.NumberOfCapturingGroups() - */ - __Pyx_INCREF(__pyx_v_original_pattern); - __Pyx_GIVEREF(__pyx_v_original_pattern); - __Pyx_GOTREF(__pyx_v_pypattern->pattern); - __Pyx_DECREF(__pyx_v_pypattern->pattern); - __pyx_v_pypattern->pattern = __pyx_v_original_pattern; - - /* "/Users/maxiak/pyre2/src/re2.pyx":980 - * cdef Pattern pypattern = Pattern() - * pypattern.pattern = original_pattern - * pypattern.re_pattern = re_pattern # <<<<<<<<<<<<<< - * pypattern.ngroups = re_pattern.NumberOfCapturingGroups() - * pypattern.encoded = encoded - */ - __pyx_v_pypattern->re_pattern = __pyx_v_re_pattern; - - /* "/Users/maxiak/pyre2/src/re2.pyx":981 - * pypattern.pattern = original_pattern - * pypattern.re_pattern = re_pattern - * pypattern.ngroups = re_pattern.NumberOfCapturingGroups() # <<<<<<<<<<<<<< - * pypattern.encoded = encoded - * pypattern._flags = flags - */ - __pyx_v_pypattern->ngroups = __pyx_v_re_pattern->NumberOfCapturingGroups(); - - /* "/Users/maxiak/pyre2/src/re2.pyx":982 - * pypattern.re_pattern = re_pattern - * pypattern.ngroups = re_pattern.NumberOfCapturingGroups() - * pypattern.encoded = encoded # <<<<<<<<<<<<<< - * pypattern._flags = flags - * del s - */ - __pyx_v_pypattern->encoded = ((int)__pyx_v_encoded); - - /* "/Users/maxiak/pyre2/src/re2.pyx":983 - * pypattern.ngroups = re_pattern.NumberOfCapturingGroups() - * pypattern.encoded = encoded - * pypattern._flags = flags # <<<<<<<<<<<<<< - * del s - * return pypattern - */ - __pyx_v_pypattern->_flags = __pyx_v_flags; - - /* "/Users/maxiak/pyre2/src/re2.pyx":984 - * pypattern.encoded = encoded - * pypattern._flags = flags - * del s # <<<<<<<<<<<<<< - * return pypattern - * - */ - delete __pyx_v_s; - - /* "/Users/maxiak/pyre2/src/re2.pyx":985 - * pypattern._flags = flags - * del s - * return pypattern # <<<<<<<<<<<<<< - * - * def search(pattern, string, int flags=0): - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(((PyObject *)__pyx_v_pypattern)); - __pyx_r = ((PyObject *)__pyx_v_pypattern); - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_XDECREF(__pyx_t_8); - __Pyx_AddTraceback("re2._compile"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_original_pattern); - __Pyx_DECREF(__pyx_v_error_msg); - __Pyx_XDECREF((PyObject *)__pyx_v_pypattern); - __Pyx_DECREF(__pyx_v_pattern); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":987 - * return pypattern - * - * def search(pattern, string, int flags=0): # <<<<<<<<<<<<<< - * """ - * Scan through string looking for a match to the pattern, returning - */ - -static PyObject *__pyx_pf_3re2_search(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static char __pyx_doc_3re2_search[] = "\n Scan through string looking for a match to the pattern, returning\n a match object or none if no match was found.\n "; -static PyObject *__pyx_pf_3re2_search(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_pattern = 0; - PyObject *__pyx_v_string = 0; - int __pyx_v_flags; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__pattern,&__pyx_n_s__string,&__pyx_n_s__flags,0}; - __Pyx_RefNannySetupContext("search"); - __pyx_self = __pyx_self; - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[3] = {0,0,0}; - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__pattern); - if (likely(values[0])) kw_args--; - else goto __pyx_L5_argtuple_error; - case 1: - values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__string); - if (likely(values[1])) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("search", 0, 2, 3, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 987; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - case 2: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__flags); - if (value) { values[2] = value; kw_args--; } - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "search") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 987; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_pattern = values[0]; - __pyx_v_string = values[1]; - if (values[2]) { - __pyx_v_flags = __Pyx_PyInt_AsInt(values[2]); if (unlikely((__pyx_v_flags == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 987; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_flags = ((int)0); - } - } else { - __pyx_v_flags = ((int)0); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: - __pyx_v_flags = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 2)); if (unlikely((__pyx_v_flags == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 987; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 2: - __pyx_v_string = PyTuple_GET_ITEM(__pyx_args, 1); - __pyx_v_pattern = PyTuple_GET_ITEM(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("search", 0, 2, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 987; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.search"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":992 - * a match object or none if no match was found. - * """ - * return compile(pattern, flags).search(string) # <<<<<<<<<<<<<< - * - * def match(pattern, string, int flags=0): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__compile); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 992; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyInt_FromLong(__pyx_v_flags); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 992; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 992; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_INCREF(__pyx_v_pattern); - PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_v_pattern); - __Pyx_GIVEREF(__pyx_v_pattern); - PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_2); - __Pyx_GIVEREF(__pyx_t_2); - __pyx_t_2 = 0; - __pyx_t_2 = PyObject_Call(__pyx_t_1, __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 992; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__search); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 992; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 992; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(__pyx_v_string); - PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_string); - __Pyx_GIVEREF(__pyx_v_string); - __pyx_t_1 = PyObject_Call(__pyx_t_3, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 992; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("re2.search"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":994 - * return compile(pattern, flags).search(string) - * - * def match(pattern, string, int flags=0): # <<<<<<<<<<<<<< - * """ - * Try to apply the pattern at the start of the string, returning - */ - -static PyObject *__pyx_pf_3re2_match(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static char __pyx_doc_3re2_match[] = "\n Try to apply the pattern at the start of the string, returning\n a match object, or None if no match was found.\n "; -static PyObject *__pyx_pf_3re2_match(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_pattern = 0; - PyObject *__pyx_v_string = 0; - int __pyx_v_flags; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__pattern,&__pyx_n_s__string,&__pyx_n_s__flags,0}; - __Pyx_RefNannySetupContext("match"); - __pyx_self = __pyx_self; - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[3] = {0,0,0}; - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__pattern); - if (likely(values[0])) kw_args--; - else goto __pyx_L5_argtuple_error; - case 1: - values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__string); - if (likely(values[1])) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("match", 0, 2, 3, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 994; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - case 2: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__flags); - if (value) { values[2] = value; kw_args--; } - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "match") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 994; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_pattern = values[0]; - __pyx_v_string = values[1]; - if (values[2]) { - __pyx_v_flags = __Pyx_PyInt_AsInt(values[2]); if (unlikely((__pyx_v_flags == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 994; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_flags = ((int)0); - } - } else { - __pyx_v_flags = ((int)0); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: - __pyx_v_flags = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 2)); if (unlikely((__pyx_v_flags == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 994; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 2: - __pyx_v_string = PyTuple_GET_ITEM(__pyx_args, 1); - __pyx_v_pattern = PyTuple_GET_ITEM(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("match", 0, 2, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 994; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.match"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":999 - * a match object, or None if no match was found. - * """ - * return compile(pattern, flags).match(string) # <<<<<<<<<<<<<< - * - * def finditer(pattern, string, int flags=0): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__compile); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 999; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyInt_FromLong(__pyx_v_flags); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 999; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 999; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_INCREF(__pyx_v_pattern); - PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_v_pattern); - __Pyx_GIVEREF(__pyx_v_pattern); - PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_2); - __Pyx_GIVEREF(__pyx_t_2); - __pyx_t_2 = 0; - __pyx_t_2 = PyObject_Call(__pyx_t_1, __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 999; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__match); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 999; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 999; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(__pyx_v_string); - PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_string); - __Pyx_GIVEREF(__pyx_v_string); - __pyx_t_1 = PyObject_Call(__pyx_t_3, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 999; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("re2.match"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":1001 - * return compile(pattern, flags).match(string) - * - * def finditer(pattern, string, int flags=0): # <<<<<<<<<<<<<< - * """ - * Return an list of all non-overlapping matches in the - */ - -static PyObject *__pyx_pf_3re2_finditer(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static char __pyx_doc_3re2_finditer[] = "\n Return an list of all non-overlapping matches in the\n string. For each match, the iterator returns a match object.\n\n Empty matches are included in the result.\n "; -static PyObject *__pyx_pf_3re2_finditer(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_pattern = 0; - PyObject *__pyx_v_string = 0; - int __pyx_v_flags; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__pattern,&__pyx_n_s__string,&__pyx_n_s__flags,0}; - __Pyx_RefNannySetupContext("finditer"); - __pyx_self = __pyx_self; - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[3] = {0,0,0}; - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__pattern); - if (likely(values[0])) kw_args--; - else goto __pyx_L5_argtuple_error; - case 1: - values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__string); - if (likely(values[1])) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("finditer", 0, 2, 3, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1001; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - case 2: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__flags); - if (value) { values[2] = value; kw_args--; } - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "finditer") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1001; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_pattern = values[0]; - __pyx_v_string = values[1]; - if (values[2]) { - __pyx_v_flags = __Pyx_PyInt_AsInt(values[2]); if (unlikely((__pyx_v_flags == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1001; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_flags = ((int)0); - } - } else { - __pyx_v_flags = ((int)0); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: - __pyx_v_flags = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 2)); if (unlikely((__pyx_v_flags == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1001; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 2: - __pyx_v_string = PyTuple_GET_ITEM(__pyx_args, 1); - __pyx_v_pattern = PyTuple_GET_ITEM(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("finditer", 0, 2, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1001; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.finditer"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":1008 - * Empty matches are included in the result. - * """ - * return compile(pattern, flags).finditer(string) # <<<<<<<<<<<<<< - * - * def findall(pattern, string, int flags=0): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__compile); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1008; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyInt_FromLong(__pyx_v_flags); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1008; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1008; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_INCREF(__pyx_v_pattern); - PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_v_pattern); - __Pyx_GIVEREF(__pyx_v_pattern); - PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_2); - __Pyx_GIVEREF(__pyx_t_2); - __pyx_t_2 = 0; - __pyx_t_2 = PyObject_Call(__pyx_t_1, __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1008; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__finditer); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1008; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1008; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(__pyx_v_string); - PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_string); - __Pyx_GIVEREF(__pyx_v_string); - __pyx_t_1 = PyObject_Call(__pyx_t_3, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1008; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("re2.finditer"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":1010 - * return compile(pattern, flags).finditer(string) - * - * def findall(pattern, string, int flags=0): # <<<<<<<<<<<<<< - * """ - * Return an list of all non-overlapping matches in the - */ - -static PyObject *__pyx_pf_3re2_findall(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static char __pyx_doc_3re2_findall[] = "\n Return an list of all non-overlapping matches in the\n string. For each match, the iterator returns a match object.\n\n Empty matches are included in the result.\n "; -static PyObject *__pyx_pf_3re2_findall(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_pattern = 0; - PyObject *__pyx_v_string = 0; - int __pyx_v_flags; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__pattern,&__pyx_n_s__string,&__pyx_n_s__flags,0}; - __Pyx_RefNannySetupContext("findall"); - __pyx_self = __pyx_self; - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[3] = {0,0,0}; - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__pattern); - if (likely(values[0])) kw_args--; - else goto __pyx_L5_argtuple_error; - case 1: - values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__string); - if (likely(values[1])) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("findall", 0, 2, 3, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1010; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - case 2: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__flags); - if (value) { values[2] = value; kw_args--; } - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "findall") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1010; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_pattern = values[0]; - __pyx_v_string = values[1]; - if (values[2]) { - __pyx_v_flags = __Pyx_PyInt_AsInt(values[2]); if (unlikely((__pyx_v_flags == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1010; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_flags = ((int)0); - } - } else { - __pyx_v_flags = ((int)0); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: - __pyx_v_flags = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 2)); if (unlikely((__pyx_v_flags == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1010; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 2: - __pyx_v_string = PyTuple_GET_ITEM(__pyx_args, 1); - __pyx_v_pattern = PyTuple_GET_ITEM(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("findall", 0, 2, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1010; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.findall"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":1017 - * Empty matches are included in the result. - * """ - * return compile(pattern, flags).findall(string) # <<<<<<<<<<<<<< - * - * def split(pattern, string, int maxsplit=0): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__compile); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1017; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyInt_FromLong(__pyx_v_flags); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1017; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1017; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_INCREF(__pyx_v_pattern); - PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_v_pattern); - __Pyx_GIVEREF(__pyx_v_pattern); - PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_2); - __Pyx_GIVEREF(__pyx_t_2); - __pyx_t_2 = 0; - __pyx_t_2 = PyObject_Call(__pyx_t_1, __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1017; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__findall); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1017; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1017; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(__pyx_v_string); - PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_string); - __Pyx_GIVEREF(__pyx_v_string); - __pyx_t_1 = PyObject_Call(__pyx_t_3, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1017; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("re2.findall"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":1019 - * return compile(pattern, flags).findall(string) - * - * def split(pattern, string, int maxsplit=0): # <<<<<<<<<<<<<< - * """ - * Split the source string by the occurrences of the pattern, - */ - -static PyObject *__pyx_pf_3re2_split(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static char __pyx_doc_3re2_split[] = "\n Split the source string by the occurrences of the pattern,\n returning a list containing the resulting substrings.\n "; -static PyObject *__pyx_pf_3re2_split(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_pattern = 0; - PyObject *__pyx_v_string = 0; - int __pyx_v_maxsplit; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__pattern,&__pyx_n_s__string,&__pyx_n_s__maxsplit,0}; - __Pyx_RefNannySetupContext("split"); - __pyx_self = __pyx_self; - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[3] = {0,0,0}; - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__pattern); - if (likely(values[0])) kw_args--; - else goto __pyx_L5_argtuple_error; - case 1: - values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__string); - if (likely(values[1])) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("split", 0, 2, 3, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1019; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - case 2: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__maxsplit); - if (value) { values[2] = value; kw_args--; } - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "split") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1019; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_pattern = values[0]; - __pyx_v_string = values[1]; - if (values[2]) { - __pyx_v_maxsplit = __Pyx_PyInt_AsInt(values[2]); if (unlikely((__pyx_v_maxsplit == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1019; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_maxsplit = ((int)0); - } - } else { - __pyx_v_maxsplit = ((int)0); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 3: - __pyx_v_maxsplit = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 2)); if (unlikely((__pyx_v_maxsplit == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1019; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 2: - __pyx_v_string = PyTuple_GET_ITEM(__pyx_args, 1); - __pyx_v_pattern = PyTuple_GET_ITEM(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("split", 0, 2, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1019; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.split"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":1024 - * returning a list containing the resulting substrings. - * """ - * return compile(pattern).split(string, maxsplit) # <<<<<<<<<<<<<< - * - * def sub(pattern, repl, string, int count=0): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__compile); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1024; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1024; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(__pyx_v_pattern); - PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_pattern); - __Pyx_GIVEREF(__pyx_v_pattern); - __pyx_t_3 = PyObject_Call(__pyx_t_1, __pyx_t_2, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1024; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__split); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1024; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyInt_FromLong(__pyx_v_maxsplit); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1024; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1024; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(__pyx_v_string); - PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_string); - __Pyx_GIVEREF(__pyx_v_string); - PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_t_3); - __Pyx_GIVEREF(__pyx_t_3); - __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(__pyx_t_2, __pyx_t_1, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1024; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("re2.split"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":1026 - * return compile(pattern).split(string, maxsplit) - * - * def sub(pattern, repl, string, int count=0): # <<<<<<<<<<<<<< - * """ - * Return the string obtained by replacing the leftmost - */ - -static PyObject *__pyx_pf_3re2_sub(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static char __pyx_doc_3re2_sub[] = "\n Return the string obtained by replacing the leftmost\n non-overlapping occurrences of the pattern in string by the\n replacement repl. repl can be either a string or a callable;\n if a string, backslash escapes in it are processed. If it is\n a callable, it's passed the match object and must return\n a replacement string to be used.\n "; -static PyObject *__pyx_pf_3re2_sub(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_pattern = 0; - PyObject *__pyx_v_repl = 0; - PyObject *__pyx_v_string = 0; - int __pyx_v_count; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__pattern,&__pyx_n_s__repl,&__pyx_n_s__string,&__pyx_n_s__count,0}; - __Pyx_RefNannySetupContext("sub"); - __pyx_self = __pyx_self; - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[4] = {0,0,0,0}; - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3); - case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__pattern); - if (likely(values[0])) kw_args--; - else goto __pyx_L5_argtuple_error; - case 1: - values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__repl); - if (likely(values[1])) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("sub", 0, 3, 4, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1026; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - case 2: - values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__string); - if (likely(values[2])) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("sub", 0, 3, 4, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1026; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - case 3: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__count); - if (value) { values[3] = value; kw_args--; } - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "sub") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1026; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_pattern = values[0]; - __pyx_v_repl = values[1]; - __pyx_v_string = values[2]; - if (values[3]) { - __pyx_v_count = __Pyx_PyInt_AsInt(values[3]); if (unlikely((__pyx_v_count == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1026; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_count = ((int)0); - } - } else { - __pyx_v_count = ((int)0); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 4: - __pyx_v_count = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 3)); if (unlikely((__pyx_v_count == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1026; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 3: - __pyx_v_string = PyTuple_GET_ITEM(__pyx_args, 2); - __pyx_v_repl = PyTuple_GET_ITEM(__pyx_args, 1); - __pyx_v_pattern = PyTuple_GET_ITEM(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("sub", 0, 3, 4, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1026; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.sub"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":1035 - * a replacement string to be used. - * """ - * return compile(pattern).sub(repl, string, count) # <<<<<<<<<<<<<< - * - * def subn(pattern, repl, string, int count=0): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__compile); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1035; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1035; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(__pyx_v_pattern); - PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_pattern); - __Pyx_GIVEREF(__pyx_v_pattern); - __pyx_t_3 = PyObject_Call(__pyx_t_1, __pyx_t_2, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1035; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__sub); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1035; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyInt_FromLong(__pyx_v_count); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1035; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = PyTuple_New(3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1035; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(__pyx_v_repl); - PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_repl); - __Pyx_GIVEREF(__pyx_v_repl); - __Pyx_INCREF(__pyx_v_string); - PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_v_string); - __Pyx_GIVEREF(__pyx_v_string); - PyTuple_SET_ITEM(__pyx_t_1, 2, __pyx_t_3); - __Pyx_GIVEREF(__pyx_t_3); - __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(__pyx_t_2, __pyx_t_1, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1035; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("re2.sub"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":1037 - * return compile(pattern).sub(repl, string, count) - * - * def subn(pattern, repl, string, int count=0): # <<<<<<<<<<<<<< - * """ - * Return a 2-tuple containing (new_string, number). - */ - -static PyObject *__pyx_pf_3re2_subn(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static char __pyx_doc_3re2_subn[] = "\n Return a 2-tuple containing (new_string, number).\n new_string is the string obtained by replacing the leftmost\n non-overlapping occurrences of the pattern in the source\n string by the replacement repl. number is the number of\n substitutions that were made. repl can be either a string or a\n callable; if a string, backslash escapes in it are processed.\n If it is a callable, it's passed the match object and must\n return a replacement string to be used.\n "; -static PyObject *__pyx_pf_3re2_subn(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_pattern = 0; - PyObject *__pyx_v_repl = 0; - PyObject *__pyx_v_string = 0; - int __pyx_v_count; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__pattern,&__pyx_n_s__repl,&__pyx_n_s__string,&__pyx_n_s__count,0}; - __Pyx_RefNannySetupContext("subn"); - __pyx_self = __pyx_self; - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); - PyObject* values[4] = {0,0,0,0}; - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3); - case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 0: - values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__pattern); - if (likely(values[0])) kw_args--; - else goto __pyx_L5_argtuple_error; - case 1: - values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__repl); - if (likely(values[1])) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("subn", 0, 3, 4, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1037; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - case 2: - values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__string); - if (likely(values[2])) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("subn", 0, 3, 4, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1037; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - case 3: - if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__count); - if (value) { values[3] = value; kw_args--; } - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "subn") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1037; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - __pyx_v_pattern = values[0]; - __pyx_v_repl = values[1]; - __pyx_v_string = values[2]; - if (values[3]) { - __pyx_v_count = __Pyx_PyInt_AsInt(values[3]); if (unlikely((__pyx_v_count == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1037; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } else { - __pyx_v_count = ((int)0); - } - } else { - __pyx_v_count = ((int)0); - switch (PyTuple_GET_SIZE(__pyx_args)) { - case 4: - __pyx_v_count = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 3)); if (unlikely((__pyx_v_count == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1037; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - case 3: - __pyx_v_string = PyTuple_GET_ITEM(__pyx_args, 2); - __pyx_v_repl = PyTuple_GET_ITEM(__pyx_args, 1); - __pyx_v_pattern = PyTuple_GET_ITEM(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("subn", 0, 3, 4, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1037; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("re2.subn"); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - - /* "/Users/maxiak/pyre2/src/re2.pyx":1048 - * return a replacement string to be used. - * """ - * return compile(pattern).subn(repl, string, count) # <<<<<<<<<<<<<< - * - * _alphanum = {} - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__compile); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1048; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1048; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(__pyx_v_pattern); - PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_pattern); - __Pyx_GIVEREF(__pyx_v_pattern); - __pyx_t_3 = PyObject_Call(__pyx_t_1, __pyx_t_2, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1048; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__subn); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1048; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyInt_FromLong(__pyx_v_count); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1048; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = PyTuple_New(3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1048; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(__pyx_v_repl); - PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_repl); - __Pyx_GIVEREF(__pyx_v_repl); - __Pyx_INCREF(__pyx_v_string); - PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_v_string); - __Pyx_GIVEREF(__pyx_v_string); - PyTuple_SET_ITEM(__pyx_t_1, 2, __pyx_t_3); - __Pyx_GIVEREF(__pyx_t_3); - __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(__pyx_t_2, __pyx_t_1, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1048; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("re2.subn"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "/Users/maxiak/pyre2/src/re2.pyx":1055 - * del c - * - * def escape(pattern): # <<<<<<<<<<<<<< - * "Escape all non-alphanumeric characters in pattern." - * s = list(pattern) - */ - -static PyObject *__pyx_pf_3re2_escape(PyObject *__pyx_self, PyObject *__pyx_v_pattern); /*proto*/ -static char __pyx_doc_3re2_escape[] = "Escape all non-alphanumeric characters in pattern."; -static PyObject *__pyx_pf_3re2_escape(PyObject *__pyx_self, PyObject *__pyx_v_pattern) { - PyObject *__pyx_v_s; - PyObject *__pyx_v_alphanum; - Py_ssize_t __pyx_v_i; - PyObject *__pyx_v_c; - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - Py_ssize_t __pyx_t_3; - Py_ssize_t __pyx_t_4; - int __pyx_t_5; - int __pyx_t_6; - int __pyx_t_7; - PyObject *__pyx_t_8 = NULL; - __Pyx_RefNannySetupContext("escape"); - __pyx_self = __pyx_self; - __pyx_v_s = ((PyObject *)Py_None); __Pyx_INCREF(Py_None); - __pyx_v_alphanum = Py_None; __Pyx_INCREF(Py_None); - __pyx_v_c = Py_None; __Pyx_INCREF(Py_None); - - /* "/Users/maxiak/pyre2/src/re2.pyx":1057 - * def escape(pattern): - * "Escape all non-alphanumeric characters in pattern." - * s = list(pattern) # <<<<<<<<<<<<<< - * alphanum = _alphanum - * for i in range(len(pattern)): - */ - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1057; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(__pyx_v_pattern); - PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_pattern); - __Pyx_GIVEREF(__pyx_v_pattern); - __pyx_t_2 = PyObject_Call(((PyObject *)((PyObject*)&PyList_Type)), __pyx_t_1, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1057; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(((PyObject *)__pyx_v_s)); - __pyx_v_s = ((PyObject *)__pyx_t_2); - __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":1058 - * "Escape all non-alphanumeric characters in pattern." - * s = list(pattern) - * alphanum = _alphanum # <<<<<<<<<<<<<< - * for i in range(len(pattern)): - * c = pattern[i] - */ - __pyx_t_2 = __Pyx_GetName(__pyx_m, __pyx_n_s___alphanum); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1058; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_v_alphanum); - __pyx_v_alphanum = __pyx_t_2; - __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":1059 - * s = list(pattern) - * alphanum = _alphanum - * for i in range(len(pattern)): # <<<<<<<<<<<<<< - * c = pattern[i] - * if ord(c) < 0x80 and c not in alphanum: - */ - __pyx_t_3 = PyObject_Length(__pyx_v_pattern); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1059; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { - __pyx_v_i = __pyx_t_4; - - /* "/Users/maxiak/pyre2/src/re2.pyx":1060 - * alphanum = _alphanum - * for i in range(len(pattern)): - * c = pattern[i] # <<<<<<<<<<<<<< - * if ord(c) < 0x80 and c not in alphanum: - * if c == "\000": - */ - __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_pattern, __pyx_v_i, sizeof(Py_ssize_t), PyInt_FromSsize_t); if (!__pyx_t_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1060; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_v_c); - __pyx_v_c = __pyx_t_2; - __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":1061 - * for i in range(len(pattern)): - * c = pattern[i] - * if ord(c) < 0x80 and c not in alphanum: # <<<<<<<<<<<<<< - * if c == "\000": - * s[i] = "\\000" - */ - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1061; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(__pyx_v_c); - PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_c); - __Pyx_GIVEREF(__pyx_v_c); - __pyx_t_1 = PyObject_Call(__pyx_builtin_ord, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1061; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyObject_RichCompare(__pyx_t_1, __pyx_int_0x80, Py_LT); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1061; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1061; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (__pyx_t_5) { - __pyx_t_6 = (__Pyx_NegateNonNeg(PySequence_Contains(__pyx_v_alphanum, __pyx_v_c))); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1061; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_7 = __pyx_t_6; - } else { - __pyx_t_7 = __pyx_t_5; - } - if (__pyx_t_7) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":1062 - * c = pattern[i] - * if ord(c) < 0x80 and c not in alphanum: - * if c == "\000": # <<<<<<<<<<<<<< - * s[i] = "\\000" - * else: - */ - __pyx_t_2 = PyObject_RichCompare(__pyx_v_c, ((PyObject *)__pyx_kp_s_4), Py_EQ); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1062; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_7 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1062; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (__pyx_t_7) { - - /* "/Users/maxiak/pyre2/src/re2.pyx":1063 - * if ord(c) < 0x80 and c not in alphanum: - * if c == "\000": - * s[i] = "\\000" # <<<<<<<<<<<<<< - * else: - * s[i] = "\\" + c - */ - if (__Pyx_SetItemInt(((PyObject *)__pyx_v_s), __pyx_v_i, ((PyObject *)__pyx_kp_s_38), sizeof(Py_ssize_t), PyInt_FromSsize_t) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1063; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - goto __pyx_L8; - } - /*else*/ { - - /* "/Users/maxiak/pyre2/src/re2.pyx":1065 - * s[i] = "\\000" - * else: - * s[i] = "\\" + c # <<<<<<<<<<<<<< - * return pattern[:0].join(s) - * - */ - __pyx_t_2 = PyNumber_Add(((PyObject *)__pyx_kp_s_3), __pyx_v_c); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1065; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - if (__Pyx_SetItemInt(((PyObject *)__pyx_v_s), __pyx_v_i, __pyx_t_2, sizeof(Py_ssize_t), PyInt_FromSsize_t) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1065; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - } - __pyx_L8:; - goto __pyx_L7; - } - __pyx_L7:; - } - - /* "/Users/maxiak/pyre2/src/re2.pyx":1066 - * else: - * s[i] = "\\" + c - * return pattern[:0].join(s) # <<<<<<<<<<<<<< - * - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = PySequence_GetSlice(__pyx_v_pattern, 0, 0); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1066; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__join); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1066; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1066; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(((PyObject *)__pyx_v_s)); - PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_v_s)); - __Pyx_GIVEREF(((PyObject *)__pyx_v_s)); - __pyx_t_8 = PyObject_Call(__pyx_t_1, __pyx_t_2, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1066; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_8); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_r = __pyx_t_8; - __pyx_t_8 = 0; - goto __pyx_L0; - - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_8); - __Pyx_AddTraceback("re2.escape"); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_DECREF(__pyx_v_s); - __Pyx_DECREF(__pyx_v_alphanum); - __Pyx_DECREF(__pyx_v_c); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} -static struct __pyx_vtabstruct_3re2_Match __pyx_vtable_3re2_Match; - -static PyObject *__pyx_tp_new_3re2_Match(PyTypeObject *t, PyObject *a, PyObject *k) { - struct __pyx_obj_3re2_Match *p; - PyObject *o = (*t->tp_alloc)(t, 0); - if (!o) return 0; - p = ((struct __pyx_obj_3re2_Match *)o); - p->__pyx_vtab = __pyx_vtabptr_3re2_Match; - p->match_string = Py_None; Py_INCREF(Py_None); - p->_pattern_object = Py_None; Py_INCREF(Py_None); - p->_groups = ((PyObject *)Py_None); Py_INCREF(Py_None); - p->_spans = ((PyObject *)Py_None); Py_INCREF(Py_None); - p->_named_groups = ((PyObject *)Py_None); Py_INCREF(Py_None); - p->_named_indexes = ((PyObject *)Py_None); Py_INCREF(Py_None); - return o; -} - -static void __pyx_tp_dealloc_3re2_Match(PyObject *o) { - struct __pyx_obj_3re2_Match *p = (struct __pyx_obj_3re2_Match *)o; - { - PyObject *etype, *eval, *etb; - PyErr_Fetch(&etype, &eval, &etb); - ++Py_REFCNT(o); - __pyx_pf_3re2_5Match___dealloc__(o); - if (PyErr_Occurred()) PyErr_WriteUnraisable(o); - --Py_REFCNT(o); - PyErr_Restore(etype, eval, etb); - } - Py_XDECREF(p->match_string); - Py_XDECREF(p->_pattern_object); - Py_XDECREF(((PyObject *)p->_groups)); - Py_XDECREF(((PyObject *)p->_spans)); - Py_XDECREF(((PyObject *)p->_named_groups)); - Py_XDECREF(((PyObject *)p->_named_indexes)); - (*Py_TYPE(o)->tp_free)(o); -} - -static int __pyx_tp_traverse_3re2_Match(PyObject *o, visitproc v, void *a) { - int e; - struct __pyx_obj_3re2_Match *p = (struct __pyx_obj_3re2_Match *)o; - if (p->match_string) { - e = (*v)(p->match_string, a); if (e) return e; - } - if (p->_pattern_object) { - e = (*v)(p->_pattern_object, a); if (e) return e; - } - if (p->_groups) { - e = (*v)(p->_groups, a); if (e) return e; - } - if (p->_spans) { - e = (*v)(p->_spans, a); if (e) return e; - } - if (p->_named_groups) { - e = (*v)(p->_named_groups, a); if (e) return e; - } - if (p->_named_indexes) { - e = (*v)(p->_named_indexes, a); if (e) return e; - } - return 0; -} - -static int __pyx_tp_clear_3re2_Match(PyObject *o) { - struct __pyx_obj_3re2_Match *p = (struct __pyx_obj_3re2_Match *)o; - PyObject* tmp; - tmp = ((PyObject*)p->match_string); - p->match_string = Py_None; Py_INCREF(Py_None); - Py_XDECREF(tmp); - tmp = ((PyObject*)p->_pattern_object); - p->_pattern_object = Py_None; Py_INCREF(Py_None); - Py_XDECREF(tmp); - tmp = ((PyObject*)p->_groups); - p->_groups = ((PyObject *)Py_None); Py_INCREF(Py_None); - Py_XDECREF(tmp); - tmp = ((PyObject*)p->_spans); - p->_spans = ((PyObject *)Py_None); Py_INCREF(Py_None); - Py_XDECREF(tmp); - tmp = ((PyObject*)p->_named_groups); - p->_named_groups = ((PyObject *)Py_None); Py_INCREF(Py_None); - Py_XDECREF(tmp); - tmp = ((PyObject*)p->_named_indexes); - p->_named_indexes = ((PyObject *)Py_None); Py_INCREF(Py_None); - Py_XDECREF(tmp); - return 0; -} - -static PyObject *__pyx_getprop_3re2_5Match_re(PyObject *o, void *x) { - return __pyx_pf_3re2_5Match_2re___get__(o); -} - -static PyObject *__pyx_getprop_3re2_5Match_pos(PyObject *o, void *x) { - return __pyx_pf_3re2_5Match_3pos___get__(o); -} - -static PyObject *__pyx_getprop_3re2_5Match_endpos(PyObject *o, void *x) { - return __pyx_pf_3re2_5Match_6endpos___get__(o); -} - -static PyObject *__pyx_getprop_3re2_5Match_string(PyObject *o, void *x) { - return __pyx_pf_3re2_5Match_6string___get__(o); -} - -static PyObject *__pyx_getprop_3re2_5Match_regs(PyObject *o, void *x) { - return __pyx_pf_3re2_5Match_4regs___get__(o); -} - -static PyObject *__pyx_getprop_3re2_5Match_lastindex(PyObject *o, void *x) { - return __pyx_pf_3re2_5Match_9lastindex___get__(o); -} - -static PyObject *__pyx_getprop_3re2_5Match_lastgroup(PyObject *o, void *x) { - return __pyx_pf_3re2_5Match_9lastgroup___get__(o); -} - -static PyMethodDef __pyx_methods_3re2_Match[] = { - {__Pyx_NAMESTR("groups"), (PyCFunction)__pyx_pf_3re2_5Match_groups, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("group"), (PyCFunction)__pyx_pf_3re2_5Match_group, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("_convert_spans"), (PyCFunction)__pyx_pf_3re2_5Match__convert_spans, METH_O, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("expand"), (PyCFunction)__pyx_pf_3re2_5Match_expand, METH_O, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("groupdict"), (PyCFunction)__pyx_pf_3re2_5Match_groupdict, METH_NOARGS, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("end"), (PyCFunction)__pyx_pf_3re2_5Match_end, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("start"), (PyCFunction)__pyx_pf_3re2_5Match_start, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("span"), (PyCFunction)__pyx_pf_3re2_5Match_span, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}, - {0, 0, 0, 0} -}; - -static struct PyGetSetDef __pyx_getsets_3re2_Match[] = { - {(char *)"re", __pyx_getprop_3re2_5Match_re, 0, 0, 0}, - {(char *)"pos", __pyx_getprop_3re2_5Match_pos, 0, 0, 0}, - {(char *)"endpos", __pyx_getprop_3re2_5Match_endpos, 0, 0, 0}, - {(char *)"string", __pyx_getprop_3re2_5Match_string, 0, 0, 0}, - {(char *)"regs", __pyx_getprop_3re2_5Match_regs, 0, 0, 0}, - {(char *)"lastindex", __pyx_getprop_3re2_5Match_lastindex, 0, 0, 0}, - {(char *)"lastgroup", __pyx_getprop_3re2_5Match_lastgroup, 0, 0, 0}, - {0, 0, 0, 0, 0} -}; - -static PyNumberMethods __pyx_tp_as_number_Match = { - 0, /*nb_add*/ - 0, /*nb_subtract*/ - 0, /*nb_multiply*/ - #if PY_MAJOR_VERSION < 3 - 0, /*nb_divide*/ - #endif - 0, /*nb_remainder*/ - 0, /*nb_divmod*/ - 0, /*nb_power*/ - 0, /*nb_negative*/ - 0, /*nb_positive*/ - 0, /*nb_absolute*/ - 0, /*nb_nonzero*/ - 0, /*nb_invert*/ - 0, /*nb_lshift*/ - 0, /*nb_rshift*/ - 0, /*nb_and*/ - 0, /*nb_xor*/ - 0, /*nb_or*/ - #if PY_MAJOR_VERSION < 3 - 0, /*nb_coerce*/ - #endif - 0, /*nb_int*/ - #if PY_MAJOR_VERSION < 3 - 0, /*nb_long*/ - #else - 0, /*reserved*/ - #endif - 0, /*nb_float*/ - #if PY_MAJOR_VERSION < 3 - 0, /*nb_oct*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*nb_hex*/ - #endif - 0, /*nb_inplace_add*/ - 0, /*nb_inplace_subtract*/ - 0, /*nb_inplace_multiply*/ - #if PY_MAJOR_VERSION < 3 - 0, /*nb_inplace_divide*/ - #endif - 0, /*nb_inplace_remainder*/ - 0, /*nb_inplace_power*/ - 0, /*nb_inplace_lshift*/ - 0, /*nb_inplace_rshift*/ - 0, /*nb_inplace_and*/ - 0, /*nb_inplace_xor*/ - 0, /*nb_inplace_or*/ - 0, /*nb_floor_divide*/ - 0, /*nb_true_divide*/ - 0, /*nb_inplace_floor_divide*/ - 0, /*nb_inplace_true_divide*/ - #if PY_VERSION_HEX >= 0x02050000 - 0, /*nb_index*/ - #endif -}; - -static PySequenceMethods __pyx_tp_as_sequence_Match = { - 0, /*sq_length*/ - 0, /*sq_concat*/ - 0, /*sq_repeat*/ - 0, /*sq_item*/ - 0, /*sq_slice*/ - 0, /*sq_ass_item*/ - 0, /*sq_ass_slice*/ - 0, /*sq_contains*/ - 0, /*sq_inplace_concat*/ - 0, /*sq_inplace_repeat*/ -}; - -static PyMappingMethods __pyx_tp_as_mapping_Match = { - 0, /*mp_length*/ - 0, /*mp_subscript*/ - 0, /*mp_ass_subscript*/ -}; - -static PyBufferProcs __pyx_tp_as_buffer_Match = { - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getreadbuffer*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getwritebuffer*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getsegcount*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getcharbuffer*/ - #endif - #if PY_VERSION_HEX >= 0x02060000 - 0, /*bf_getbuffer*/ - #endif - #if PY_VERSION_HEX >= 0x02060000 - 0, /*bf_releasebuffer*/ - #endif -}; - -PyTypeObject __pyx_type_3re2_Match = { - PyVarObject_HEAD_INIT(0, 0) - __Pyx_NAMESTR("re2.Match"), /*tp_name*/ - sizeof(struct __pyx_obj_3re2_Match), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc_3re2_Match, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #else - 0, /*reserved*/ - #endif - 0, /*tp_repr*/ - &__pyx_tp_as_number_Match, /*tp_as_number*/ - &__pyx_tp_as_sequence_Match, /*tp_as_sequence*/ - &__pyx_tp_as_mapping_Match, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - &__pyx_tp_as_buffer_Match, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ - 0, /*tp_doc*/ - __pyx_tp_traverse_3re2_Match, /*tp_traverse*/ - __pyx_tp_clear_3re2_Match, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - __pyx_methods_3re2_Match, /*tp_methods*/ - 0, /*tp_members*/ - __pyx_getsets_3re2_Match, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - 0, /*tp_dictoffset*/ - __pyx_pf_3re2_5Match___init__, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new_3re2_Match, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - #if PY_VERSION_HEX >= 0x02060000 - 0, /*tp_version_tag*/ - #endif -}; -static struct __pyx_vtabstruct_3re2_Pattern __pyx_vtable_3re2_Pattern; - -static PyObject *__pyx_tp_new_3re2_Pattern(PyTypeObject *t, PyObject *a, PyObject *k) { - struct __pyx_obj_3re2_Pattern *p; - PyObject *o = (*t->tp_alloc)(t, 0); - if (!o) return 0; - p = ((struct __pyx_obj_3re2_Pattern *)o); - p->__pyx_vtab = __pyx_vtabptr_3re2_Pattern; - p->pattern = Py_None; Py_INCREF(Py_None); - p->__weakref__ = 0; - return o; -} - -static void __pyx_tp_dealloc_3re2_Pattern(PyObject *o) { - struct __pyx_obj_3re2_Pattern *p = (struct __pyx_obj_3re2_Pattern *)o; - { - PyObject *etype, *eval, *etb; - PyErr_Fetch(&etype, &eval, &etb); - ++Py_REFCNT(o); - __pyx_pf_3re2_7Pattern___dealloc__(o); - if (PyErr_Occurred()) PyErr_WriteUnraisable(o); - --Py_REFCNT(o); - PyErr_Restore(etype, eval, etb); - } - if (p->__weakref__) PyObject_ClearWeakRefs(o); - Py_XDECREF(p->pattern); - (*Py_TYPE(o)->tp_free)(o); -} - -static int __pyx_tp_traverse_3re2_Pattern(PyObject *o, visitproc v, void *a) { - int e; - struct __pyx_obj_3re2_Pattern *p = (struct __pyx_obj_3re2_Pattern *)o; - if (p->pattern) { - e = (*v)(p->pattern, a); if (e) return e; - } - return 0; -} - -static int __pyx_tp_clear_3re2_Pattern(PyObject *o) { - struct __pyx_obj_3re2_Pattern *p = (struct __pyx_obj_3re2_Pattern *)o; - PyObject* tmp; - tmp = ((PyObject*)p->pattern); - p->pattern = Py_None; Py_INCREF(Py_None); - Py_XDECREF(tmp); - return 0; -} - -static PyObject *__pyx_getprop_3re2_7Pattern_flags(PyObject *o, void *x) { - return __pyx_pf_3re2_7Pattern_5flags___get__(o); -} - -static PyObject *__pyx_getprop_3re2_7Pattern_groups(PyObject *o, void *x) { - return __pyx_pf_3re2_7Pattern_6groups___get__(o); -} - -static PyObject *__pyx_getprop_3re2_7Pattern_pattern(PyObject *o, void *x) { - return __pyx_pf_3re2_7Pattern_7pattern___get__(o); -} - -static int __pyx_setprop_3re2_7Pattern_pattern(PyObject *o, PyObject *v, void *x) { - if (v) { - return __pyx_pf_3re2_7Pattern_7pattern___set__(o, v); - } - else { - return __pyx_pf_3re2_7Pattern_7pattern___del__(o); - } -} - -static PyMethodDef __pyx_methods_3re2_Pattern[] = { - {__Pyx_NAMESTR("search"), (PyCFunction)__pyx_pf_3re2_7Pattern_search, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3re2_7Pattern_search)}, - {__Pyx_NAMESTR("match"), (PyCFunction)__pyx_pf_3re2_7Pattern_match, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3re2_7Pattern_match)}, - {__Pyx_NAMESTR("finditer"), (PyCFunction)__pyx_pf_3re2_7Pattern_finditer, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3re2_7Pattern_finditer)}, - {__Pyx_NAMESTR("findall"), (PyCFunction)__pyx_pf_3re2_7Pattern_findall, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3re2_7Pattern_findall)}, - {__Pyx_NAMESTR("split"), (PyCFunction)__pyx_pf_3re2_7Pattern_split, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3re2_7Pattern_split)}, - {__Pyx_NAMESTR("sub"), (PyCFunction)__pyx_pf_3re2_7Pattern_sub, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3re2_7Pattern_sub)}, - {__Pyx_NAMESTR("subn"), (PyCFunction)__pyx_pf_3re2_7Pattern_subn, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3re2_7Pattern_subn)}, - {__Pyx_NAMESTR("_subn_callback"), (PyCFunction)__pyx_pf_3re2_7Pattern__subn_callback, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3re2_7Pattern__subn_callback)}, - {0, 0, 0, 0} -}; - -static struct PyGetSetDef __pyx_getsets_3re2_Pattern[] = { - {(char *)"flags", __pyx_getprop_3re2_7Pattern_flags, 0, 0, 0}, - {(char *)"groups", __pyx_getprop_3re2_7Pattern_groups, 0, 0, 0}, - {(char *)"pattern", __pyx_getprop_3re2_7Pattern_pattern, __pyx_setprop_3re2_7Pattern_pattern, 0, 0}, - {0, 0, 0, 0, 0} -}; - -static PyNumberMethods __pyx_tp_as_number_Pattern = { - 0, /*nb_add*/ - 0, /*nb_subtract*/ - 0, /*nb_multiply*/ - #if PY_MAJOR_VERSION < 3 - 0, /*nb_divide*/ - #endif - 0, /*nb_remainder*/ - 0, /*nb_divmod*/ - 0, /*nb_power*/ - 0, /*nb_negative*/ - 0, /*nb_positive*/ - 0, /*nb_absolute*/ - 0, /*nb_nonzero*/ - 0, /*nb_invert*/ - 0, /*nb_lshift*/ - 0, /*nb_rshift*/ - 0, /*nb_and*/ - 0, /*nb_xor*/ - 0, /*nb_or*/ - #if PY_MAJOR_VERSION < 3 - 0, /*nb_coerce*/ - #endif - 0, /*nb_int*/ - #if PY_MAJOR_VERSION < 3 - 0, /*nb_long*/ - #else - 0, /*reserved*/ - #endif - 0, /*nb_float*/ - #if PY_MAJOR_VERSION < 3 - 0, /*nb_oct*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*nb_hex*/ - #endif - 0, /*nb_inplace_add*/ - 0, /*nb_inplace_subtract*/ - 0, /*nb_inplace_multiply*/ - #if PY_MAJOR_VERSION < 3 - 0, /*nb_inplace_divide*/ - #endif - 0, /*nb_inplace_remainder*/ - 0, /*nb_inplace_power*/ - 0, /*nb_inplace_lshift*/ - 0, /*nb_inplace_rshift*/ - 0, /*nb_inplace_and*/ - 0, /*nb_inplace_xor*/ - 0, /*nb_inplace_or*/ - 0, /*nb_floor_divide*/ - 0, /*nb_true_divide*/ - 0, /*nb_inplace_floor_divide*/ - 0, /*nb_inplace_true_divide*/ - #if PY_VERSION_HEX >= 0x02050000 - 0, /*nb_index*/ - #endif -}; - -static PySequenceMethods __pyx_tp_as_sequence_Pattern = { - 0, /*sq_length*/ - 0, /*sq_concat*/ - 0, /*sq_repeat*/ - 0, /*sq_item*/ - 0, /*sq_slice*/ - 0, /*sq_ass_item*/ - 0, /*sq_ass_slice*/ - 0, /*sq_contains*/ - 0, /*sq_inplace_concat*/ - 0, /*sq_inplace_repeat*/ -}; - -static PyMappingMethods __pyx_tp_as_mapping_Pattern = { - 0, /*mp_length*/ - 0, /*mp_subscript*/ - 0, /*mp_ass_subscript*/ -}; - -static PyBufferProcs __pyx_tp_as_buffer_Pattern = { - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getreadbuffer*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getwritebuffer*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getsegcount*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getcharbuffer*/ - #endif - #if PY_VERSION_HEX >= 0x02060000 - 0, /*bf_getbuffer*/ - #endif - #if PY_VERSION_HEX >= 0x02060000 - 0, /*bf_releasebuffer*/ - #endif -}; - -PyTypeObject __pyx_type_3re2_Pattern = { - PyVarObject_HEAD_INIT(0, 0) - __Pyx_NAMESTR("re2.Pattern"), /*tp_name*/ - sizeof(struct __pyx_obj_3re2_Pattern), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc_3re2_Pattern, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #else - 0, /*reserved*/ - #endif - 0, /*tp_repr*/ - &__pyx_tp_as_number_Pattern, /*tp_as_number*/ - &__pyx_tp_as_sequence_Pattern, /*tp_as_sequence*/ - &__pyx_tp_as_mapping_Pattern, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - &__pyx_tp_as_buffer_Pattern, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ - 0, /*tp_doc*/ - __pyx_tp_traverse_3re2_Pattern, /*tp_traverse*/ - __pyx_tp_clear_3re2_Pattern, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - __pyx_methods_3re2_Pattern, /*tp_methods*/ - 0, /*tp_members*/ - __pyx_getsets_3re2_Pattern, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - 0, /*tp_dictoffset*/ - 0, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new_3re2_Pattern, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - #if PY_VERSION_HEX >= 0x02060000 - 0, /*tp_version_tag*/ - #endif -}; - -static PyMethodDef __pyx_methods[] = { - {__Pyx_NAMESTR("set_fallback_notification"), (PyCFunction)__pyx_pf_3re2_set_fallback_notification, METH_O, __Pyx_DOCSTR(__pyx_doc_3re2_set_fallback_notification)}, - {__Pyx_NAMESTR("compile"), (PyCFunction)__pyx_pf_3re2_compile, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("prepare_pattern"), (PyCFunction)__pyx_pf_3re2_prepare_pattern, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("_compile"), (PyCFunction)__pyx_pf_3re2__compile, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3re2__compile)}, - {__Pyx_NAMESTR("search"), (PyCFunction)__pyx_pf_3re2_search, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3re2_search)}, - {__Pyx_NAMESTR("match"), (PyCFunction)__pyx_pf_3re2_match, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3re2_match)}, - {__Pyx_NAMESTR("finditer"), (PyCFunction)__pyx_pf_3re2_finditer, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3re2_finditer)}, - {__Pyx_NAMESTR("findall"), (PyCFunction)__pyx_pf_3re2_findall, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3re2_findall)}, - {__Pyx_NAMESTR("split"), (PyCFunction)__pyx_pf_3re2_split, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3re2_split)}, - {__Pyx_NAMESTR("sub"), (PyCFunction)__pyx_pf_3re2_sub, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3re2_sub)}, - {__Pyx_NAMESTR("subn"), (PyCFunction)__pyx_pf_3re2_subn, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3re2_subn)}, - {__Pyx_NAMESTR("escape"), (PyCFunction)__pyx_pf_3re2_escape, METH_O, __Pyx_DOCSTR(__pyx_doc_3re2_escape)}, - {0, 0, 0, 0} -}; - -#if PY_MAJOR_VERSION >= 3 -static struct PyModuleDef __pyx_moduledef = { - PyModuleDef_HEAD_INIT, - __Pyx_NAMESTR("re2"), - 0, /* m_doc */ - -1, /* m_size */ - __pyx_methods /* m_methods */, - NULL, /* m_reload */ - NULL, /* m_traverse */ - NULL, /* m_clear */ - NULL /* m_free */ -}; -#endif - -static __Pyx_StringTabEntry __pyx_string_tab[] = { - {&__pyx_kp_s_1, __pyx_k_1, sizeof(__pyx_k_1), 0, 0, 1, 0}, - {&__pyx_kp_s_10, __pyx_k_10, sizeof(__pyx_k_10), 0, 0, 1, 0}, - {&__pyx_kp_s_11, __pyx_k_11, sizeof(__pyx_k_11), 0, 0, 1, 0}, - {&__pyx_kp_s_12, __pyx_k_12, sizeof(__pyx_k_12), 0, 0, 1, 0}, - {&__pyx_kp_s_13, __pyx_k_13, sizeof(__pyx_k_13), 0, 0, 1, 0}, - {&__pyx_kp_s_14, __pyx_k_14, sizeof(__pyx_k_14), 0, 0, 1, 0}, - {&__pyx_kp_s_15, __pyx_k_15, sizeof(__pyx_k_15), 0, 0, 1, 0}, - {&__pyx_kp_s_16, __pyx_k_16, sizeof(__pyx_k_16), 0, 0, 1, 0}, - {&__pyx_kp_s_17, __pyx_k_17, sizeof(__pyx_k_17), 0, 0, 1, 0}, - {&__pyx_kp_s_18, __pyx_k_18, sizeof(__pyx_k_18), 0, 0, 1, 0}, - {&__pyx_kp_s_19, __pyx_k_19, sizeof(__pyx_k_19), 0, 0, 1, 0}, - {&__pyx_kp_s_2, __pyx_k_2, sizeof(__pyx_k_2), 0, 0, 1, 0}, - {&__pyx_kp_s_20, __pyx_k_20, sizeof(__pyx_k_20), 0, 0, 1, 0}, - {&__pyx_kp_s_21, __pyx_k_21, sizeof(__pyx_k_21), 0, 0, 1, 0}, - {&__pyx_kp_s_22, __pyx_k_22, sizeof(__pyx_k_22), 0, 0, 1, 0}, - {&__pyx_kp_s_23, __pyx_k_23, sizeof(__pyx_k_23), 0, 0, 1, 0}, - {&__pyx_kp_s_24, __pyx_k_24, sizeof(__pyx_k_24), 0, 0, 1, 0}, - {&__pyx_n_s_25, __pyx_k_25, sizeof(__pyx_k_25), 0, 0, 1, 1}, - {&__pyx_n_s_26, __pyx_k_26, sizeof(__pyx_k_26), 0, 0, 1, 1}, - {&__pyx_kp_s_27, __pyx_k_27, sizeof(__pyx_k_27), 0, 0, 1, 0}, - {&__pyx_kp_s_28, __pyx_k_28, sizeof(__pyx_k_28), 0, 0, 1, 0}, - {&__pyx_kp_s_29, __pyx_k_29, sizeof(__pyx_k_29), 0, 0, 1, 0}, - {&__pyx_kp_s_3, __pyx_k_3, sizeof(__pyx_k_3), 0, 0, 1, 0}, - {&__pyx_kp_s_30, __pyx_k_30, sizeof(__pyx_k_30), 0, 0, 1, 0}, - {&__pyx_kp_s_31, __pyx_k_31, sizeof(__pyx_k_31), 0, 0, 1, 0}, - {&__pyx_kp_s_32, __pyx_k_32, sizeof(__pyx_k_32), 0, 0, 1, 0}, - {&__pyx_kp_s_33, __pyx_k_33, sizeof(__pyx_k_33), 0, 0, 1, 0}, - {&__pyx_kp_s_34, __pyx_k_34, sizeof(__pyx_k_34), 0, 0, 1, 0}, - {&__pyx_kp_s_35, __pyx_k_35, sizeof(__pyx_k_35), 0, 0, 1, 0}, - {&__pyx_kp_s_36, __pyx_k_36, sizeof(__pyx_k_36), 0, 0, 1, 0}, - {&__pyx_n_s_37, __pyx_k_37, sizeof(__pyx_k_37), 0, 0, 1, 1}, - {&__pyx_kp_s_38, __pyx_k_38, sizeof(__pyx_k_38), 0, 0, 1, 0}, - {&__pyx_kp_s_39, __pyx_k_39, sizeof(__pyx_k_39), 0, 0, 1, 0}, - {&__pyx_kp_s_4, __pyx_k_4, sizeof(__pyx_k_4), 0, 0, 1, 0}, - {&__pyx_kp_s_40, __pyx_k_40, sizeof(__pyx_k_40), 0, 0, 1, 0}, - {&__pyx_n_s_41, __pyx_k_41, sizeof(__pyx_k_41), 0, 0, 1, 1}, - {&__pyx_kp_u_42, __pyx_k_42, sizeof(__pyx_k_42), 0, 1, 0, 0}, - {&__pyx_n_s_43, __pyx_k_43, sizeof(__pyx_k_43), 0, 0, 1, 1}, - {&__pyx_kp_u_44, __pyx_k_44, sizeof(__pyx_k_44), 0, 1, 0, 0}, - {&__pyx_kp_u_45, __pyx_k_45, sizeof(__pyx_k_45), 0, 1, 0, 0}, - {&__pyx_kp_u_46, __pyx_k_46, sizeof(__pyx_k_46), 0, 1, 0, 0}, - {&__pyx_kp_u_47, __pyx_k_47, sizeof(__pyx_k_47), 0, 1, 0, 0}, - {&__pyx_kp_u_48, __pyx_k_48, sizeof(__pyx_k_48), 0, 1, 0, 0}, - {&__pyx_kp_u_49, __pyx_k_49, sizeof(__pyx_k_49), 0, 1, 0, 0}, - {&__pyx_kp_s_5, __pyx_k_5, sizeof(__pyx_k_5), 0, 0, 1, 0}, - {&__pyx_kp_u_50, __pyx_k_50, sizeof(__pyx_k_50), 0, 1, 0, 0}, - {&__pyx_kp_u_51, __pyx_k_51, sizeof(__pyx_k_51), 0, 1, 0, 0}, - {&__pyx_kp_u_52, __pyx_k_52, sizeof(__pyx_k_52), 0, 1, 0, 0}, - {&__pyx_kp_u_53, __pyx_k_53, sizeof(__pyx_k_53), 0, 1, 0, 0}, - {&__pyx_kp_u_54, __pyx_k_54, sizeof(__pyx_k_54), 0, 1, 0, 0}, - {&__pyx_kp_u_55, __pyx_k_55, sizeof(__pyx_k_55), 0, 1, 0, 0}, - {&__pyx_kp_u_56, __pyx_k_56, sizeof(__pyx_k_56), 0, 1, 0, 0}, - {&__pyx_kp_u_57, __pyx_k_57, sizeof(__pyx_k_57), 0, 1, 0, 0}, - {&__pyx_kp_u_58, __pyx_k_58, sizeof(__pyx_k_58), 0, 1, 0, 0}, - {&__pyx_kp_u_59, __pyx_k_59, sizeof(__pyx_k_59), 0, 1, 0, 0}, - {&__pyx_kp_s_6, __pyx_k_6, sizeof(__pyx_k_6), 0, 0, 1, 0}, - {&__pyx_kp_u_60, __pyx_k_60, sizeof(__pyx_k_60), 0, 1, 0, 0}, - {&__pyx_kp_s_7, __pyx_k_7, sizeof(__pyx_k_7), 0, 0, 1, 0}, - {&__pyx_kp_u_7, __pyx_k_7, sizeof(__pyx_k_7), 0, 1, 0, 0}, - {&__pyx_kp_s_8, __pyx_k_8, sizeof(__pyx_k_8), 0, 0, 1, 0}, - {&__pyx_n_s_9, __pyx_k_9, sizeof(__pyx_k_9), 0, 0, 1, 1}, - {&__pyx_kp_s__0, __pyx_k__0, sizeof(__pyx_k__0), 0, 0, 1, 0}, - {&__pyx_kp_s__1234567, __pyx_k__1234567, sizeof(__pyx_k__1234567), 0, 0, 1, 0}, - {&__pyx_kp_s__89, __pyx_k__89, sizeof(__pyx_k__89), 0, 0, 1, 0}, - {&__pyx_n_s__D, __pyx_k__D, sizeof(__pyx_k__D), 0, 0, 1, 1}, - {&__pyx_n_s__DOTALL, __pyx_k__DOTALL, sizeof(__pyx_k__DOTALL), 0, 0, 1, 1}, - {&__pyx_n_s__Exception, __pyx_k__Exception, sizeof(__pyx_k__Exception), 0, 0, 1, 1}, - {&__pyx_n_s__FALLBACK_EXCEPTION, __pyx_k__FALLBACK_EXCEPTION, sizeof(__pyx_k__FALLBACK_EXCEPTION), 0, 0, 1, 1}, - {&__pyx_n_s__FALLBACK_QUIETLY, __pyx_k__FALLBACK_QUIETLY, sizeof(__pyx_k__FALLBACK_QUIETLY), 0, 0, 1, 1}, - {&__pyx_n_s__FALLBACK_WARNING, __pyx_k__FALLBACK_WARNING, sizeof(__pyx_k__FALLBACK_WARNING), 0, 0, 1, 1}, - {&__pyx_n_s__I, __pyx_k__I, sizeof(__pyx_k__I), 0, 0, 1, 1}, - {&__pyx_n_s__IGNORECASE, __pyx_k__IGNORECASE, sizeof(__pyx_k__IGNORECASE), 0, 0, 1, 1}, - {&__pyx_n_s__IndexError, __pyx_k__IndexError, sizeof(__pyx_k__IndexError), 0, 0, 1, 1}, - {&__pyx_n_s__L, __pyx_k__L, sizeof(__pyx_k__L), 0, 0, 1, 1}, - {&__pyx_n_s__LOCALE, __pyx_k__LOCALE, sizeof(__pyx_k__LOCALE), 0, 0, 1, 1}, - {&__pyx_n_s__M, __pyx_k__M, sizeof(__pyx_k__M), 0, 0, 1, 1}, - {&__pyx_n_s__MULTILINE, __pyx_k__MULTILINE, sizeof(__pyx_k__MULTILINE), 0, 0, 1, 1}, - {&__pyx_n_s__Match, __pyx_k__Match, sizeof(__pyx_k__Match), 0, 0, 1, 1}, - {&__pyx_n_s__NotImplementedError, __pyx_k__NotImplementedError, sizeof(__pyx_k__NotImplementedError), 0, 0, 1, 1}, - {&__pyx_n_s__Pattern, __pyx_k__Pattern, sizeof(__pyx_k__Pattern), 0, 0, 1, 1}, - {&__pyx_n_s__RegexError, __pyx_k__RegexError, sizeof(__pyx_k__RegexError), 0, 0, 1, 1}, - {&__pyx_n_s__S, __pyx_k__S, sizeof(__pyx_k__S), 0, 0, 1, 1}, - {&__pyx_n_s__SREPattern, __pyx_k__SREPattern, sizeof(__pyx_k__SREPattern), 0, 0, 1, 1}, - {&__pyx_n_s__Tokenizer, __pyx_k__Tokenizer, sizeof(__pyx_k__Tokenizer), 0, 0, 1, 1}, - {&__pyx_n_s__TypeError, __pyx_k__TypeError, sizeof(__pyx_k__TypeError), 0, 0, 1, 1}, - {&__pyx_n_s__U, __pyx_k__U, sizeof(__pyx_k__U), 0, 0, 1, 1}, - {&__pyx_n_s__UNICODE, __pyx_k__UNICODE, sizeof(__pyx_k__UNICODE), 0, 0, 1, 1}, - {&__pyx_n_s__VERBOSE, __pyx_k__VERBOSE, sizeof(__pyx_k__VERBOSE), 0, 0, 1, 1}, - {&__pyx_n_s__VERSION, __pyx_k__VERSION, sizeof(__pyx_k__VERSION), 0, 0, 1, 1}, - {&__pyx_n_s__VERSION_HEX, __pyx_k__VERSION_HEX, sizeof(__pyx_k__VERSION_HEX), 0, 0, 1, 1}, - {&__pyx_n_s__ValueError, __pyx_k__ValueError, sizeof(__pyx_k__ValueError), 0, 0, 1, 1}, - {&__pyx_n_s__W, __pyx_k__W, sizeof(__pyx_k__W), 0, 0, 1, 1}, - {&__pyx_n_s__WHITESPACE, __pyx_k__WHITESPACE, sizeof(__pyx_k__WHITESPACE), 0, 0, 1, 1}, - {&__pyx_n_s__X, __pyx_k__X, sizeof(__pyx_k__X), 0, 0, 1, 1}, - {&__pyx_n_s___MAXCACHE, __pyx_k___MAXCACHE, sizeof(__pyx_k___MAXCACHE), 0, 0, 1, 1}, - {&__pyx_n_s____init__, __pyx_k____init__, sizeof(__pyx_k____init__), 0, 0, 1, 1}, - {&__pyx_n_s____iter__, __pyx_k____iter__, sizeof(__pyx_k____iter__), 0, 0, 1, 1}, - {&__pyx_n_s____main__, __pyx_k____main__, sizeof(__pyx_k____main__), 0, 0, 1, 1}, - {&__pyx_n_s____next, __pyx_k____next, sizeof(__pyx_k____next), 0, 0, 1, 1}, - {&__pyx_n_s____test__, __pyx_k____test__, sizeof(__pyx_k____test__), 0, 0, 1, 1}, - {&__pyx_n_s___alphanum, __pyx_k___alphanum, sizeof(__pyx_k___alphanum), 0, 0, 1, 1}, - {&__pyx_n_s___cache, __pyx_k___cache, sizeof(__pyx_k___cache), 0, 0, 1, 1}, - {&__pyx_n_s___cache_repl, __pyx_k___cache_repl, sizeof(__pyx_k___cache_repl), 0, 0, 1, 1}, - {&__pyx_n_s___compile, __pyx_k___compile, sizeof(__pyx_k___compile), 0, 0, 1, 1}, - {&__pyx_n_s___convert_positions, __pyx_k___convert_positions, sizeof(__pyx_k___convert_positions), 0, 0, 1, 1}, - {&__pyx_n_s___convert_spans, __pyx_k___convert_spans, sizeof(__pyx_k___convert_spans), 0, 0, 1, 1}, - {&__pyx_n_s___endpos, __pyx_k___endpos, sizeof(__pyx_k___endpos), 0, 0, 1, 1}, - {&__pyx_n_s___finditer, __pyx_k___finditer, sizeof(__pyx_k___finditer), 0, 0, 1, 1}, - {&__pyx_n_s___flags, __pyx_k___flags, sizeof(__pyx_k___flags), 0, 0, 1, 1}, - {&__pyx_n_s___groups, __pyx_k___groups, sizeof(__pyx_k___groups), 0, 0, 1, 1}, - {&__pyx_n_s___lastindex, __pyx_k___lastindex, sizeof(__pyx_k___lastindex), 0, 0, 1, 1}, - {&__pyx_n_s___make_spans, __pyx_k___make_spans, sizeof(__pyx_k___make_spans), 0, 0, 1, 1}, - {&__pyx_n_s___named_groups, __pyx_k___named_groups, sizeof(__pyx_k___named_groups), 0, 0, 1, 1}, - {&__pyx_n_s___named_indexes, __pyx_k___named_indexes, sizeof(__pyx_k___named_indexes), 0, 0, 1, 1}, - {&__pyx_n_s___pattern_object, __pyx_k___pattern_object, sizeof(__pyx_k___pattern_object), 0, 0, 1, 1}, - {&__pyx_n_s___pos, __pyx_k___pos, sizeof(__pyx_k___pos), 0, 0, 1, 1}, - {&__pyx_n_s___search, __pyx_k___search, sizeof(__pyx_k___search), 0, 0, 1, 1}, - {&__pyx_n_s___spans, __pyx_k___spans, sizeof(__pyx_k___spans), 0, 0, 1, 1}, - {&__pyx_n_s___subn_callback, __pyx_k___subn_callback, sizeof(__pyx_k___subn_callback), 0, 0, 1, 1}, - {&__pyx_n_s__basestring, __pyx_k__basestring, sizeof(__pyx_k__basestring), 0, 0, 1, 1}, - {&__pyx_n_s__begin, __pyx_k__begin, sizeof(__pyx_k__begin), 0, 0, 1, 1}, - {&__pyx_n_s__c, __pyx_k__c, sizeof(__pyx_k__c), 0, 0, 1, 1}, - {&__pyx_n_s__c_str, __pyx_k__c_str, sizeof(__pyx_k__c_str), 0, 0, 1, 1}, - {&__pyx_n_s__callable, __pyx_k__callable, sizeof(__pyx_k__callable), 0, 0, 1, 1}, - {&__pyx_n_s__callback, __pyx_k__callback, sizeof(__pyx_k__callback), 0, 0, 1, 1}, - {&__pyx_n_s__clear, __pyx_k__clear, sizeof(__pyx_k__clear), 0, 0, 1, 1}, - {&__pyx_n_s__compile, __pyx_k__compile, sizeof(__pyx_k__compile), 0, 0, 1, 1}, - {&__pyx_n_s__count, __pyx_k__count, sizeof(__pyx_k__count), 0, 0, 1, 1}, - {&__pyx_n_s__d, __pyx_k__d, sizeof(__pyx_k__d), 0, 0, 1, 1}, - {&__pyx_n_s__data, __pyx_k__data, sizeof(__pyx_k__data), 0, 0, 1, 1}, - {&__pyx_n_s__default, __pyx_k__default, sizeof(__pyx_k__default), 0, 0, 1, 1}, - {&__pyx_n_s__encoded, __pyx_k__encoded, sizeof(__pyx_k__encoded), 0, 0, 1, 1}, - {&__pyx_n_s__end, __pyx_k__end, sizeof(__pyx_k__end), 0, 0, 1, 1}, - {&__pyx_n_s__endpos, __pyx_k__endpos, sizeof(__pyx_k__endpos), 0, 0, 1, 1}, - {&__pyx_n_s__enumerate, __pyx_k__enumerate, sizeof(__pyx_k__enumerate), 0, 0, 1, 1}, - {&__pyx_n_s__error, __pyx_k__error, sizeof(__pyx_k__error), 0, 0, 1, 1}, - {&__pyx_n_s__error_code, __pyx_k__error_code, sizeof(__pyx_k__error_code), 0, 0, 1, 1}, - {&__pyx_n_s__escape, __pyx_k__escape, sizeof(__pyx_k__escape), 0, 0, 1, 1}, - {&__pyx_n_s__findall, __pyx_k__findall, sizeof(__pyx_k__findall), 0, 0, 1, 1}, - {&__pyx_n_s__finditer, __pyx_k__finditer, sizeof(__pyx_k__finditer), 0, 0, 1, 1}, - {&__pyx_n_s__first, __pyx_k__first, sizeof(__pyx_k__first), 0, 0, 1, 1}, - {&__pyx_n_s__flags, __pyx_k__flags, sizeof(__pyx_k__flags), 0, 0, 1, 1}, - {&__pyx_n_s__flush, __pyx_k__flush, sizeof(__pyx_k__flush), 0, 0, 1, 1}, - {&__pyx_n_s__get, __pyx_k__get, sizeof(__pyx_k__get), 0, 0, 1, 1}, - {&__pyx_n_s__group, __pyx_k__group, sizeof(__pyx_k__group), 0, 0, 1, 1}, - {&__pyx_n_s__groupdict, __pyx_k__groupdict, sizeof(__pyx_k__groupdict), 0, 0, 1, 1}, - {&__pyx_n_s__groups, __pyx_k__groups, sizeof(__pyx_k__groups), 0, 0, 1, 1}, - {&__pyx_n_s__index, __pyx_k__index, sizeof(__pyx_k__index), 0, 0, 1, 1}, - {&__pyx_n_s__init_groups, __pyx_k__init_groups, sizeof(__pyx_k__init_groups), 0, 0, 1, 1}, - {&__pyx_n_s__isdigit, __pyx_k__isdigit, sizeof(__pyx_k__isdigit), 0, 0, 1, 1}, - {&__pyx_n_s__join, __pyx_k__join, sizeof(__pyx_k__join), 0, 0, 1, 1}, - {&__pyx_n_s__length, __pyx_k__length, sizeof(__pyx_k__length), 0, 0, 1, 1}, - {&__pyx_n_s__m, __pyx_k__m, sizeof(__pyx_k__m), 0, 0, 1, 1}, - {&__pyx_n_s__match, __pyx_k__match, sizeof(__pyx_k__match), 0, 0, 1, 1}, - {&__pyx_n_s__match_string, __pyx_k__match_string, sizeof(__pyx_k__match_string), 0, 0, 1, 1}, - {&__pyx_n_s__matches, __pyx_k__matches, sizeof(__pyx_k__matches), 0, 0, 1, 1}, - {&__pyx_n_s__max_mem, __pyx_k__max_mem, sizeof(__pyx_k__max_mem), 0, 0, 1, 1}, - {&__pyx_n_s__maxsplit, __pyx_k__maxsplit, sizeof(__pyx_k__maxsplit), 0, 0, 1, 1}, - {&__pyx_n_s__named_groups, __pyx_k__named_groups, sizeof(__pyx_k__named_groups), 0, 0, 1, 1}, - {&__pyx_n_s__next, __pyx_k__next, sizeof(__pyx_k__next), 0, 0, 1, 1}, - {&__pyx_n_s__ngroups, __pyx_k__ngroups, sizeof(__pyx_k__ngroups), 0, 0, 1, 1}, - {&__pyx_n_s__nmatches, __pyx_k__nmatches, sizeof(__pyx_k__nmatches), 0, 0, 1, 1}, - {&__pyx_n_s__num_groups, __pyx_k__num_groups, sizeof(__pyx_k__num_groups), 0, 0, 1, 1}, - {&__pyx_n_s__ok, __pyx_k__ok, sizeof(__pyx_k__ok), 0, 0, 1, 1}, - {&__pyx_n_s__ord, __pyx_k__ord, sizeof(__pyx_k__ord), 0, 0, 1, 1}, - {&__pyx_n_s__pattern, __pyx_k__pattern, sizeof(__pyx_k__pattern), 0, 0, 1, 1}, - {&__pyx_n_s__pattern_object, __pyx_k__pattern_object, sizeof(__pyx_k__pattern_object), 0, 0, 1, 1}, - {&__pyx_n_s__pos, __pyx_k__pos, sizeof(__pyx_k__pos), 0, 0, 1, 1}, - {&__pyx_n_s__prepare_pattern, __pyx_k__prepare_pattern, sizeof(__pyx_k__prepare_pattern), 0, 0, 1, 1}, - {&__pyx_n_s__push_back, __pyx_k__push_back, sizeof(__pyx_k__push_back), 0, 0, 1, 1}, - {&__pyx_n_s__range, __pyx_k__range, sizeof(__pyx_k__range), 0, 0, 1, 1}, - {&__pyx_n_s__re, __pyx_k__re, sizeof(__pyx_k__re), 0, 0, 1, 1}, - {&__pyx_n_s__re_pattern, __pyx_k__re_pattern, sizeof(__pyx_k__re_pattern), 0, 0, 1, 1}, - {&__pyx_n_s__repl, __pyx_k__repl, sizeof(__pyx_k__repl), 0, 0, 1, 1}, - {&__pyx_n_s__s, __pyx_k__s, sizeof(__pyx_k__s), 0, 0, 1, 1}, - {&__pyx_n_s__search, __pyx_k__search, sizeof(__pyx_k__search), 0, 0, 1, 1}, - {&__pyx_n_s__second, __pyx_k__second, sizeof(__pyx_k__second), 0, 0, 1, 1}, - {&__pyx_n_s__self, __pyx_k__self, sizeof(__pyx_k__self), 0, 0, 1, 1}, - {&__pyx_n_s__set_case_sensitive, __pyx_k__set_case_sensitive, sizeof(__pyx_k__set_case_sensitive), 0, 0, 1, 1}, - {&__pyx_n_s__set_encoding, __pyx_k__set_encoding, sizeof(__pyx_k__set_encoding), 0, 0, 1, 1}, - {&__pyx_n_s__set_log_errors, __pyx_k__set_log_errors, sizeof(__pyx_k__set_log_errors), 0, 0, 1, 1}, - {&__pyx_n_s__set_max_mem, __pyx_k__set_max_mem, sizeof(__pyx_k__set_max_mem), 0, 0, 1, 1}, - {&__pyx_n_s__sorted, __pyx_k__sorted, sizeof(__pyx_k__sorted), 0, 0, 1, 1}, - {&__pyx_n_s__span, __pyx_k__span, sizeof(__pyx_k__span), 0, 0, 1, 1}, - {&__pyx_n_s__split, __pyx_k__split, sizeof(__pyx_k__split), 0, 0, 1, 1}, - {&__pyx_n_s__stdout, __pyx_k__stdout, sizeof(__pyx_k__stdout), 0, 0, 1, 1}, - {&__pyx_n_s__string, __pyx_k__string, sizeof(__pyx_k__string), 0, 0, 1, 1}, - {&__pyx_n_s__sub, __pyx_k__sub, sizeof(__pyx_k__sub), 0, 0, 1, 1}, - {&__pyx_n_s__subn, __pyx_k__subn, sizeof(__pyx_k__subn), 0, 0, 1, 1}, - {&__pyx_n_s__sys, __pyx_k__sys, sizeof(__pyx_k__sys), 0, 0, 1, 1}, - {&__pyx_n_s__tostring, __pyx_k__tostring, sizeof(__pyx_k__tostring), 0, 0, 1, 1}, - {&__pyx_n_s__w, __pyx_k__w, sizeof(__pyx_k__w), 0, 0, 1, 1}, - {&__pyx_n_s__warn, __pyx_k__warn, sizeof(__pyx_k__warn), 0, 0, 1, 1}, - {&__pyx_n_s__warnings, __pyx_k__warnings, sizeof(__pyx_k__warnings), 0, 0, 1, 1}, - {&__pyx_n_s__zip, __pyx_k__zip, sizeof(__pyx_k__zip), 0, 0, 1, 1}, - {0, 0, 0, 0, 0, 0, 0} -}; -static int __Pyx_InitCachedBuiltins(void) { - __pyx_builtin_Exception = __Pyx_GetName(__pyx_b, __pyx_n_s__Exception); if (!__pyx_builtin_Exception) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 774; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_ValueError = __Pyx_GetName(__pyx_b, __pyx_n_s__ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_range = __Pyx_GetName(__pyx_b, __pyx_n_s__range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 152; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_basestring = __Pyx_GetName(__pyx_b, __pyx_n_s__basestring); if (!__pyx_builtin_basestring) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 194; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_IndexError = __Pyx_GetName(__pyx_b, __pyx_n_s__IndexError); if (!__pyx_builtin_IndexError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 200; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_sorted = __Pyx_GetName(__pyx_b, __pyx_n_s__sorted); if (!__pyx_builtin_sorted) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 252; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_zip = __Pyx_GetName(__pyx_b, __pyx_n_s__zip); if (!__pyx_builtin_zip) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 253; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_enumerate = __Pyx_GetName(__pyx_b, __pyx_n_s__enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_TypeError = __Pyx_GetName(__pyx_b, __pyx_n_s__TypeError); if (!__pyx_builtin_TypeError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 411; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_callable = __Pyx_GetName(__pyx_b, __pyx_n_s__callable); if (!__pyx_builtin_callable) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 630; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_NotImplementedError = __Pyx_GetName(__pyx_b, __pyx_n_s__NotImplementedError); if (!__pyx_builtin_NotImplementedError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 686; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_ord = __Pyx_GetName(__pyx_b, __pyx_n_s__ord); if (!__pyx_builtin_ord) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1061; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - return 0; - __pyx_L1_error:; - return -1; -} - -static int __Pyx_InitGlobals(void) { - #if PY_VERSION_HEX < 0x02040000 - if (unlikely(__Pyx_Py23SetsImport() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - #endif - if (__Pyx_InitStrings(__pyx_string_tab) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __pyx_int_0 = PyInt_FromLong(0); if (unlikely(!__pyx_int_0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __pyx_int_1 = PyInt_FromLong(1); if (unlikely(!__pyx_int_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __pyx_int_2 = PyInt_FromLong(2); if (unlikely(!__pyx_int_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __pyx_int_neg_1 = PyInt_FromLong(-1); if (unlikely(!__pyx_int_neg_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __pyx_int_23 = PyInt_FromLong(23); if (unlikely(!__pyx_int_23)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __pyx_int_100 = PyInt_FromLong(100); if (unlikely(!__pyx_int_100)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __pyx_int_0x80 = PyInt_FromLong(0x80); if (unlikely(!__pyx_int_0x80)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __pyx_int_0x000217 = PyInt_FromLong(0x000217); if (unlikely(!__pyx_int_0x000217)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - return 0; - __pyx_L1_error:; - return -1; -} - -#if PY_MAJOR_VERSION < 3 -PyMODINIT_FUNC initre2(void); /*proto*/ -PyMODINIT_FUNC initre2(void) -#else -PyMODINIT_FUNC PyInit_re2(void); /*proto*/ -PyMODINIT_FUNC PyInit_re2(void) -#endif -{ - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - int __pyx_t_4; - PyObject *__pyx_t_5 = NULL; - Py_ssize_t __pyx_t_6; - #if CYTHON_REFNANNY - void* __pyx_refnanny = NULL; - __Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny"); - if (!__Pyx_RefNanny) { - PyErr_Clear(); - __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny"); - if (!__Pyx_RefNanny) - Py_FatalError("failed to import 'refnanny' module"); - } - __pyx_refnanny = __Pyx_RefNanny->SetupContext("PyMODINIT_FUNC PyInit_re2(void)", __LINE__, __FILE__); - #endif - __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - #ifdef __pyx_binding_PyCFunctionType_USED - if (__pyx_binding_PyCFunctionType_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - #endif - /*--- Library function declarations ---*/ - /*--- Threads initialization code ---*/ - #if defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS - #ifdef WITH_THREAD /* Python build with threading support? */ - PyEval_InitThreads(); - #endif - #endif - /*--- Module creation code ---*/ - #if PY_MAJOR_VERSION < 3 - __pyx_m = Py_InitModule4(__Pyx_NAMESTR("re2"), __pyx_methods, 0, 0, PYTHON_API_VERSION); - #else - __pyx_m = PyModule_Create(&__pyx_moduledef); - #endif - if (!__pyx_m) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - #if PY_MAJOR_VERSION < 3 - Py_INCREF(__pyx_m); - #endif - __pyx_b = PyImport_AddModule(__Pyx_NAMESTR(__Pyx_BUILTIN_MODULE_NAME)); - if (!__pyx_b) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - if (__Pyx_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - /*--- Initialize various global constants etc. ---*/ - if (unlikely(__Pyx_InitGlobals() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (__pyx_module_is_main_re2) { - if (__Pyx_SetAttrString(__pyx_m, "__name__", __pyx_n_s____main__) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - } - /*--- Builtin init code ---*/ - if (unlikely(__Pyx_InitCachedBuiltins() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /*--- Global init code ---*/ - /*--- Function export code ---*/ - /*--- Type init code ---*/ - __pyx_vtabptr_3re2_Match = &__pyx_vtable_3re2_Match; - #if PY_MAJOR_VERSION >= 3 - __pyx_vtable_3re2_Match.init_groups = (PyObject *(*)(struct __pyx_obj_3re2_Match *))__pyx_f_3re2_5Match_init_groups; - __pyx_vtable_3re2_Match._convert_positions = (PyObject *(*)(struct __pyx_obj_3re2_Match *, PyObject *))__pyx_f_3re2_5Match__convert_positions; - __pyx_vtable_3re2_Match._make_spans = (PyObject *(*)(struct __pyx_obj_3re2_Match *))__pyx_f_3re2_5Match__make_spans; - #else - *(void(**)(void))&__pyx_vtable_3re2_Match.init_groups = (void(*)(void))__pyx_f_3re2_5Match_init_groups; - *(void(**)(void))&__pyx_vtable_3re2_Match._convert_positions = (void(*)(void))__pyx_f_3re2_5Match__convert_positions; - *(void(**)(void))&__pyx_vtable_3re2_Match._make_spans = (void(*)(void))__pyx_f_3re2_5Match__make_spans; - #endif - if (PyType_Ready(&__pyx_type_3re2_Match) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 97; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (__Pyx_SetVtable(__pyx_type_3re2_Match.tp_dict, __pyx_vtabptr_3re2_Match) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 97; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (__Pyx_SetAttrString(__pyx_m, "Match", (PyObject *)&__pyx_type_3re2_Match) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 97; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_ptype_3re2_Match = &__pyx_type_3re2_Match; - __pyx_vtabptr_3re2_Pattern = &__pyx_vtable_3re2_Pattern; - #if PY_MAJOR_VERSION >= 3 - __pyx_vtable_3re2_Pattern._search = (PyObject *(*)(struct __pyx_obj_3re2_Pattern *, PyObject *, int, int, RE2::Anchor))__pyx_f_3re2_7Pattern__search; - __pyx_vtable_3re2_Pattern._print_pattern = (PyObject *(*)(struct __pyx_obj_3re2_Pattern *))__pyx_f_3re2_7Pattern__print_pattern; - __pyx_vtable_3re2_Pattern._finditer = (PyObject *(*)(struct __pyx_obj_3re2_Pattern *, PyObject *, struct __pyx_opt_args_3re2_7Pattern__finditer *__pyx_optional_args))__pyx_f_3re2_7Pattern__finditer; - #else - *(void(**)(void))&__pyx_vtable_3re2_Pattern._search = (void(*)(void))__pyx_f_3re2_7Pattern__search; - *(void(**)(void))&__pyx_vtable_3re2_Pattern._print_pattern = (void(*)(void))__pyx_f_3re2_7Pattern__print_pattern; - *(void(**)(void))&__pyx_vtable_3re2_Pattern._finditer = (void(*)(void))__pyx_f_3re2_7Pattern__finditer; - #endif - if (PyType_Ready(&__pyx_type_3re2_Pattern) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 374; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (__Pyx_SetVtable(__pyx_type_3re2_Pattern.tp_dict, __pyx_vtabptr_3re2_Pattern) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 374; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (__Pyx_SetAttrString(__pyx_m, "Pattern", (PyObject *)&__pyx_type_3re2_Pattern) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 374; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (__pyx_type_3re2_Pattern.tp_weaklistoffset == 0) __pyx_type_3re2_Pattern.tp_weaklistoffset = offsetof(struct __pyx_obj_3re2_Pattern, __weakref__); - __pyx_ptype_3re2_Pattern = &__pyx_type_3re2_Pattern; - /*--- Type import code ---*/ - /*--- Function import code ---*/ - /*--- Execution code ---*/ - - /* "/Users/maxiak/pyre2/src/re2.pyx":3 - * # cython: infer_types(False) - * # Import re flags to be compatible. - * import sys # <<<<<<<<<<<<<< - * import re - * - */ - __pyx_t_1 = __Pyx_Import(((PyObject *)__pyx_n_s__sys), 0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__sys, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":4 - * # Import re flags to be compatible. - * import sys - * import re # <<<<<<<<<<<<<< - * - * I = re.I - */ - __pyx_t_1 = __Pyx_Import(((PyObject *)__pyx_n_s__re), 0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__re, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":6 - * import re - * - * I = re.I # <<<<<<<<<<<<<< - * IGNORECASE = re.IGNORECASE - * M = re.M - */ - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__re); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__I); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__I, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":7 - * - * I = re.I - * IGNORECASE = re.IGNORECASE # <<<<<<<<<<<<<< - * M = re.M - * MULTILINE = re.MULTILINE - */ - __pyx_t_2 = __Pyx_GetName(__pyx_m, __pyx_n_s__re); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__IGNORECASE); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__IGNORECASE, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":8 - * I = re.I - * IGNORECASE = re.IGNORECASE - * M = re.M # <<<<<<<<<<<<<< - * MULTILINE = re.MULTILINE - * S = re.S - */ - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__re); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__M); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__M, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":9 - * IGNORECASE = re.IGNORECASE - * M = re.M - * MULTILINE = re.MULTILINE # <<<<<<<<<<<<<< - * S = re.S - * DOTALL = re.DOTALL - */ - __pyx_t_2 = __Pyx_GetName(__pyx_m, __pyx_n_s__re); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__MULTILINE); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__MULTILINE, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":10 - * M = re.M - * MULTILINE = re.MULTILINE - * S = re.S # <<<<<<<<<<<<<< - * DOTALL = re.DOTALL - * U = re.U - */ - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__re); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 10; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__S); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 10; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__S, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 10; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":11 - * MULTILINE = re.MULTILINE - * S = re.S - * DOTALL = re.DOTALL # <<<<<<<<<<<<<< - * U = re.U - * UNICODE = re.UNICODE - */ - __pyx_t_2 = __Pyx_GetName(__pyx_m, __pyx_n_s__re); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__DOTALL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__DOTALL, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":12 - * S = re.S - * DOTALL = re.DOTALL - * U = re.U # <<<<<<<<<<<<<< - * UNICODE = re.UNICODE - * X = re.X - */ - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__re); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__U); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__U, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":13 - * DOTALL = re.DOTALL - * U = re.U - * UNICODE = re.UNICODE # <<<<<<<<<<<<<< - * X = re.X - * VERBOSE = re.VERBOSE - */ - __pyx_t_2 = __Pyx_GetName(__pyx_m, __pyx_n_s__re); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__UNICODE); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__UNICODE, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":14 - * U = re.U - * UNICODE = re.UNICODE - * X = re.X # <<<<<<<<<<<<<< - * VERBOSE = re.VERBOSE - * L = re.L - */ - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__re); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__X); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__X, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":15 - * UNICODE = re.UNICODE - * X = re.X - * VERBOSE = re.VERBOSE # <<<<<<<<<<<<<< - * L = re.L - * LOCALE = re.LOCALE - */ - __pyx_t_2 = __Pyx_GetName(__pyx_m, __pyx_n_s__re); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__VERBOSE); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__VERBOSE, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":16 - * X = re.X - * VERBOSE = re.VERBOSE - * L = re.L # <<<<<<<<<<<<<< - * LOCALE = re.LOCALE - * - */ - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__re); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__L); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__L, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":17 - * VERBOSE = re.VERBOSE - * L = re.L - * LOCALE = re.LOCALE # <<<<<<<<<<<<<< - * - * FALLBACK_QUIETLY = 0 - */ - __pyx_t_2 = __Pyx_GetName(__pyx_m, __pyx_n_s__re); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__LOCALE); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__LOCALE, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":19 - * LOCALE = re.LOCALE - * - * FALLBACK_QUIETLY = 0 # <<<<<<<<<<<<<< - * FALLBACK_WARNING = 1 - * FALLBACK_EXCEPTION = 2 - */ - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__FALLBACK_QUIETLY, __pyx_int_0) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 19; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - - /* "/Users/maxiak/pyre2/src/re2.pyx":20 - * - * FALLBACK_QUIETLY = 0 - * FALLBACK_WARNING = 1 # <<<<<<<<<<<<<< - * FALLBACK_EXCEPTION = 2 - * - */ - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__FALLBACK_WARNING, __pyx_int_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - - /* "/Users/maxiak/pyre2/src/re2.pyx":21 - * FALLBACK_QUIETLY = 0 - * FALLBACK_WARNING = 1 - * FALLBACK_EXCEPTION = 2 # <<<<<<<<<<<<<< - * - * VERSION = (0, 2, 23) - */ - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__FALLBACK_EXCEPTION, __pyx_int_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - - /* "/Users/maxiak/pyre2/src/re2.pyx":23 - * FALLBACK_EXCEPTION = 2 - * - * VERSION = (0, 2, 23) # <<<<<<<<<<<<<< - * VERSION_HEX = 0x000217 - * - */ - __pyx_t_1 = PyTuple_New(3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 23; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(__pyx_int_0); - PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_int_0); - __Pyx_GIVEREF(__pyx_int_0); - __Pyx_INCREF(__pyx_int_2); - PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_int_2); - __Pyx_GIVEREF(__pyx_int_2); - __Pyx_INCREF(__pyx_int_23); - PyTuple_SET_ITEM(__pyx_t_1, 2, __pyx_int_23); - __Pyx_GIVEREF(__pyx_int_23); - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__VERSION, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 23; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":24 - * - * VERSION = (0, 2, 23) - * VERSION_HEX = 0x000217 # <<<<<<<<<<<<<< - * - * # Type of compiled re object from Python stdlib - */ - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__VERSION_HEX, __pyx_int_0x000217) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - - /* "/Users/maxiak/pyre2/src/re2.pyx":27 - * - * # Type of compiled re object from Python stdlib - * SREPattern = type(re.compile('')) # <<<<<<<<<<<<<< - * - * cdef int current_notification = FALLBACK_QUIETLY - */ - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__re); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__compile); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_7)); - PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_kp_s_7)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_7)); - __pyx_t_3 = PyObject_Call(__pyx_t_2, __pyx_t_1, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__SREPattern, ((PyObject *)Py_TYPE(__pyx_t_3))) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":29 - * SREPattern = type(re.compile('')) - * - * cdef int current_notification = FALLBACK_QUIETLY # <<<<<<<<<<<<<< - * - * def set_fallback_notification(level): - */ - __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s__FALLBACK_QUIETLY); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_t_3); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_v_3re2_current_notification = __pyx_t_4; - - /* "/Users/maxiak/pyre2/src/re2.pyx":45 - * - * - * class RegexError(re.error): # <<<<<<<<<<<<<< - * """ - * Some error has occured in compilation of the regex. - */ - __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_3)); - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__re); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__error); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_2); - __Pyx_GIVEREF(__pyx_t_2); - __pyx_t_2 = 0; - if (PyDict_SetItemString(((PyObject *)__pyx_t_3), "__doc__", ((PyObject *)__pyx_kp_s_39)) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_2 = __Pyx_CreateClass(__pyx_t_1, ((PyObject *)__pyx_t_3), __pyx_n_s__RegexError, "re2"); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__RegexError, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":51 - * pass - * - * error = RegexError # <<<<<<<<<<<<<< - * - * cdef int _I = I, _M = M, _S = S, _U = U, _X = X, _L = L - */ - __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s__RegexError); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__error, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":53 - * error = RegexError - * - * cdef int _I = I, _M = M, _S = S, _U = U, _X = X, _L = L # <<<<<<<<<<<<<< - * - * cimport _re2 - */ - __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s__I); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_t_3); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_v_3re2__I = __pyx_t_4; - __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s__M); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_t_3); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_v_3re2__M = __pyx_t_4; - __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s__S); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_t_3); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_v_3re2__S = __pyx_t_4; - __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s__U); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_t_3); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_v_3re2__U = __pyx_t_4; - __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s__X); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_t_3); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_v_3re2__X = __pyx_t_4; - __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s__L); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_t_3); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_v_3re2__L = __pyx_t_4; - - /* "/Users/maxiak/pyre2/src/re2.pyx":58 - * cimport cpython.unicode - * from cython.operator cimport preincrement as inc, dereference as deref - * import warnings # <<<<<<<<<<<<<< - * - * cdef object cpp_to_pystring(_re2.cpp_string input): - */ - __pyx_t_3 = __Pyx_Import(((PyObject *)__pyx_n_s__warnings), 0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__warnings, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":757 - * del sp - * - * _cache = {} # <<<<<<<<<<<<<< - * _cache_repl = {} - * - */ - __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 757; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_3)); - if (PyObject_SetAttr(__pyx_m, __pyx_n_s___cache, ((PyObject *)__pyx_t_3)) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 757; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":758 - * - * _cache = {} - * _cache_repl = {} # <<<<<<<<<<<<<< - * - * _MAXCACHE = 100 - */ - __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 758; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_3)); - if (PyObject_SetAttr(__pyx_m, __pyx_n_s___cache_repl, ((PyObject *)__pyx_t_3)) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 758; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":760 - * _cache_repl = {} - * - * _MAXCACHE = 100 # <<<<<<<<<<<<<< - * - * def compile(pattern, int flags=0, int max_mem=8388608): - */ - if (PyObject_SetAttr(__pyx_m, __pyx_n_s___MAXCACHE, __pyx_int_100) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 760; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - - /* "/Users/maxiak/pyre2/src/re2.pyx":774 - * return p - * - * class BackreferencesException(Exception): # <<<<<<<<<<<<<< - * pass - * - */ - __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 774; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_3)); - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 774; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(__pyx_builtin_Exception); - PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_builtin_Exception); - __Pyx_GIVEREF(__pyx_builtin_Exception); - __pyx_t_1 = __Pyx_CreateClass(__pyx_t_2, ((PyObject *)__pyx_t_3), __pyx_n_s_26, "re2"); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 774; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (PyObject_SetAttr(__pyx_m, __pyx_n_s_26, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 774; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":777 - * pass - * - * class CharClassProblemException(Exception): # <<<<<<<<<<<<<< - * pass - * - */ - __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 777; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_3)); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 777; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(__pyx_builtin_Exception); - PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_builtin_Exception); - __Pyx_GIVEREF(__pyx_builtin_Exception); - __pyx_t_2 = __Pyx_CreateClass(__pyx_t_1, ((PyObject *)__pyx_t_3), __pyx_n_s_25, "re2"); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 777; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyObject_SetAttr(__pyx_m, __pyx_n_s_25, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 777; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":780 - * pass - * - * WHITESPACE = set(" \t\n\r\v\f") # <<<<<<<<<<<<<< - * - * class Tokenizer: - */ - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 780; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_40)); - PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_kp_s_40)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_40)); - __pyx_t_2 = PyObject_Call(((PyObject *)((PyObject*)&PySet_Type)), __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 780; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__WHITESPACE, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 780; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":782 - * WHITESPACE = set(" \t\n\r\v\f") - * - * class Tokenizer: # <<<<<<<<<<<<<< - * def __init__(self, string): - * self.string = string - */ - __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 782; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_2)); - __pyx_t_3 = __Pyx_CreateClass(((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_2), __pyx_n_s__Tokenizer, "re2"); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 782; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - - /* "/Users/maxiak/pyre2/src/re2.pyx":783 - * - * class Tokenizer: - * def __init__(self, string): # <<<<<<<<<<<<<< - * self.string = string - * self.index = 0 - */ - __pyx_t_1 = PyCFunction_New(&__pyx_mdef_3re2_9Tokenizer___init__, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 783; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyMethod_New(__pyx_t_1, 0, __pyx_t_3); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 783; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyObject_SetAttr(__pyx_t_3, __pyx_n_s____init__, __pyx_t_5) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 783; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":787 - * self.index = 0 - * self.__next() - * def __next(self): # <<<<<<<<<<<<<< - * if self.index >= len(self.string): - * self.next = None - */ - __pyx_t_5 = PyCFunction_New(&__pyx_mdef_3re2_9Tokenizer___next, NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 787; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_1 = PyMethod_New(__pyx_t_5, 0, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 787; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - if (PyObject_SetAttr(__pyx_t_3, __pyx_n_s____next, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 787; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":800 - * self.index = self.index + len(ch) - * self.next = ch - * def get(self): # <<<<<<<<<<<<<< - * this = self.next - * self.__next() - */ - __pyx_t_1 = PyCFunction_New(&__pyx_mdef_3re2_9Tokenizer_get, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 800; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyMethod_New(__pyx_t_1, 0, __pyx_t_3); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 800; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyObject_SetAttr(__pyx_t_3, __pyx_n_s__get, __pyx_t_5) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 800; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__Tokenizer, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 782; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":1050 - * return compile(pattern).subn(repl, string, count) - * - * _alphanum = {} # <<<<<<<<<<<<<< - * for c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890': - * _alphanum[c] = 1 - */ - __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1050; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_2)); - if (PyObject_SetAttr(__pyx_m, __pyx_n_s___alphanum, ((PyObject *)__pyx_t_2)) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1050; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":1051 - * - * _alphanum = {} - * for c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890': # <<<<<<<<<<<<<< - * _alphanum[c] = 1 - * del c - */ - __pyx_t_6 = -1; __pyx_t_2 = PyObject_GetIter(((PyObject *)__pyx_n_s_41)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1051; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - for (;;) { - { - __pyx_t_3 = PyIter_Next(__pyx_t_2); - if (!__pyx_t_3) { - if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1051; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - break; - } - __Pyx_GOTREF(__pyx_t_3); - } - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__c, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1051; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":1052 - * _alphanum = {} - * for c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890': - * _alphanum[c] = 1 # <<<<<<<<<<<<<< - * del c - * - */ - __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s___alphanum); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1052; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = __Pyx_GetName(__pyx_m, __pyx_n_s__c); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1052; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - if (PyObject_SetItem(__pyx_t_3, __pyx_t_5, __pyx_int_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1052; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - } - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "/Users/maxiak/pyre2/src/re2.pyx":1053 - * for c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890': - * _alphanum[c] = 1 - * del c # <<<<<<<<<<<<<< - * - * def escape(pattern): - */ - if (__Pyx_DelAttrString(__pyx_m, "c") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1053; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - - /* "/Users/maxiak/pyre2/src/re2.pyx":1 - * # cython: infer_types(False) # <<<<<<<<<<<<<< - * # Import re flags to be compatible. - * import sys - */ - __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_2)); - __pyx_t_5 = PyObject_GetAttr(__pyx_m, __pyx_n_s_43); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_3 = __Pyx_GetAttrString(__pyx_t_5, "__doc__"); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_kp_u_42), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetAttr(__pyx_m, __pyx_n_s__Pattern); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__search); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_GetAttrString(__pyx_t_5, "__doc__"); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_kp_u_44), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetAttr(__pyx_m, __pyx_n_s__Pattern); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__match); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_GetAttrString(__pyx_t_5, "__doc__"); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_kp_u_45), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetAttr(__pyx_m, __pyx_n_s__Pattern); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__finditer); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_GetAttrString(__pyx_t_5, "__doc__"); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_kp_u_46), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetAttr(__pyx_m, __pyx_n_s__Pattern); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__findall); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_GetAttrString(__pyx_t_5, "__doc__"); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_kp_u_47), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetAttr(__pyx_m, __pyx_n_s__Pattern); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__split); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_GetAttrString(__pyx_t_5, "__doc__"); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_kp_u_48), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetAttr(__pyx_m, __pyx_n_s__Pattern); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__sub); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_GetAttrString(__pyx_t_5, "__doc__"); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_kp_u_49), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetAttr(__pyx_m, __pyx_n_s__Pattern); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__subn); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_GetAttrString(__pyx_t_5, "__doc__"); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_kp_u_50), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetAttr(__pyx_m, __pyx_n_s__Pattern); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s___subn_callback); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_GetAttrString(__pyx_t_5, "__doc__"); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_kp_u_51), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetAttr(__pyx_m, __pyx_n_s___compile); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = __Pyx_GetAttrString(__pyx_t_3, "__doc__"); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_kp_u_52), __pyx_t_5) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = PyObject_GetAttr(__pyx_m, __pyx_n_s__search); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_3 = __Pyx_GetAttrString(__pyx_t_5, "__doc__"); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_kp_u_53), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetAttr(__pyx_m, __pyx_n_s__match); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = __Pyx_GetAttrString(__pyx_t_3, "__doc__"); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_kp_u_54), __pyx_t_5) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = PyObject_GetAttr(__pyx_m, __pyx_n_s__finditer); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_3 = __Pyx_GetAttrString(__pyx_t_5, "__doc__"); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_kp_u_55), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetAttr(__pyx_m, __pyx_n_s__findall); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = __Pyx_GetAttrString(__pyx_t_3, "__doc__"); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_kp_u_56), __pyx_t_5) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = PyObject_GetAttr(__pyx_m, __pyx_n_s__split); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_3 = __Pyx_GetAttrString(__pyx_t_5, "__doc__"); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_kp_u_57), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetAttr(__pyx_m, __pyx_n_s__sub); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = __Pyx_GetAttrString(__pyx_t_3, "__doc__"); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_kp_u_58), __pyx_t_5) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = PyObject_GetAttr(__pyx_m, __pyx_n_s__subn); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_3 = __Pyx_GetAttrString(__pyx_t_5, "__doc__"); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_kp_u_59), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetAttr(__pyx_m, __pyx_n_s__escape); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = __Pyx_GetAttrString(__pyx_t_3, "__doc__"); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_kp_u_60), __pyx_t_5) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - if (PyObject_SetAttr(__pyx_m, __pyx_n_s____test__, ((PyObject *)__pyx_t_2)) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_5); - if (__pyx_m) { - __Pyx_AddTraceback("init re2"); - Py_DECREF(__pyx_m); __pyx_m = 0; - } else if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ImportError, "init re2"); - } - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - #if PY_MAJOR_VERSION < 3 - return; - #else - return __pyx_m; - #endif -} - -/* Runtime support code */ - -static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name) { - PyObject *result; - result = PyObject_GetAttr(dict, name); - if (!result) - PyErr_SetObject(PyExc_NameError, name); - return result; -} - -static void __Pyx_RaiseDoubleKeywordsError( - const char* func_name, - PyObject* kw_name) -{ - PyErr_Format(PyExc_TypeError, - #if PY_MAJOR_VERSION >= 3 - "%s() got multiple values for keyword argument '%U'", func_name, kw_name); - #else - "%s() got multiple values for keyword argument '%s'", func_name, - PyString_AS_STRING(kw_name)); - #endif -} - -static void __Pyx_RaiseArgtupleInvalid( - const char* func_name, - int exact, - Py_ssize_t num_min, - Py_ssize_t num_max, - Py_ssize_t num_found) -{ - Py_ssize_t num_expected; - const char *number, *more_or_less; - - if (num_found < num_min) { - num_expected = num_min; - more_or_less = "at least"; - } else { - num_expected = num_max; - more_or_less = "at most"; - } - if (exact) { - more_or_less = "exactly"; - } - number = (num_expected == 1) ? "" : "s"; - PyErr_Format(PyExc_TypeError, - #if PY_VERSION_HEX < 0x02050000 - "%s() takes %s %d positional argument%s (%d given)", - #else - "%s() takes %s %zd positional argument%s (%zd given)", - #endif - func_name, more_or_less, num_expected, number, num_found); -} - -static int __Pyx_ParseOptionalKeywords( - PyObject *kwds, - PyObject **argnames[], - PyObject *kwds2, - PyObject *values[], - Py_ssize_t num_pos_args, - const char* function_name) -{ - PyObject *key = 0, *value = 0; - Py_ssize_t pos = 0; - PyObject*** name; - PyObject*** first_kw_arg = argnames + num_pos_args; - - while (PyDict_Next(kwds, &pos, &key, &value)) { - name = first_kw_arg; - while (*name && (**name != key)) name++; - if (*name) { - values[name-argnames] = value; - } else { - #if PY_MAJOR_VERSION < 3 - if (unlikely(!PyString_CheckExact(key)) && unlikely(!PyString_Check(key))) { - #else - if (unlikely(!PyUnicode_CheckExact(key)) && unlikely(!PyUnicode_Check(key))) { - #endif - goto invalid_keyword_type; - } else { - for (name = first_kw_arg; *name; name++) { - #if PY_MAJOR_VERSION >= 3 - if (PyUnicode_GET_SIZE(**name) == PyUnicode_GET_SIZE(key) && - PyUnicode_Compare(**name, key) == 0) break; - #else - if (PyString_GET_SIZE(**name) == PyString_GET_SIZE(key) && - _PyString_Eq(**name, key)) break; - #endif - } - if (*name) { - values[name-argnames] = value; - } else { - /* unexpected keyword found */ - for (name=argnames; name != first_kw_arg; name++) { - if (**name == key) goto arg_passed_twice; - #if PY_MAJOR_VERSION >= 3 - if (PyUnicode_GET_SIZE(**name) == PyUnicode_GET_SIZE(key) && - PyUnicode_Compare(**name, key) == 0) goto arg_passed_twice; - #else - if (PyString_GET_SIZE(**name) == PyString_GET_SIZE(key) && - _PyString_Eq(**name, key)) goto arg_passed_twice; - #endif - } - if (kwds2) { - if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad; - } else { - goto invalid_keyword; - } - } - } - } - } - return 0; -arg_passed_twice: - __Pyx_RaiseDoubleKeywordsError(function_name, **name); - goto bad; -invalid_keyword_type: - PyErr_Format(PyExc_TypeError, - "%s() keywords must be strings", function_name); - goto bad; -invalid_keyword: - PyErr_Format(PyExc_TypeError, - #if PY_MAJOR_VERSION < 3 - "%s() got an unexpected keyword argument '%s'", - function_name, PyString_AsString(key)); - #else - "%s() got an unexpected keyword argument '%U'", - function_name, key); - #endif -bad: - return -1; -} - -static CYTHON_INLINE int __Pyx_CheckKeywordStrings( - PyObject *kwdict, - const char* function_name, - int kw_allowed) -{ - PyObject* key = 0; - Py_ssize_t pos = 0; - while (PyDict_Next(kwdict, &pos, &key, 0)) { - #if PY_MAJOR_VERSION < 3 - if (unlikely(!PyString_CheckExact(key)) && unlikely(!PyString_Check(key))) - #else - if (unlikely(!PyUnicode_CheckExact(key)) && unlikely(!PyUnicode_Check(key))) - #endif - goto invalid_keyword_type; - } - if ((!kw_allowed) && unlikely(key)) - goto invalid_keyword; - return 1; -invalid_keyword_type: - PyErr_Format(PyExc_TypeError, - "%s() keywords must be strings", function_name); - return 0; -invalid_keyword: - PyErr_Format(PyExc_TypeError, - #if PY_MAJOR_VERSION < 3 - "%s() got an unexpected keyword argument '%s'", - function_name, PyString_AsString(key)); - #else - "%s() got an unexpected keyword argument '%U'", - function_name, key); - #endif - return 0; -} - - -static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index) { - PyErr_Format(PyExc_ValueError, - #if PY_VERSION_HEX < 0x02050000 - "need more than %d value%s to unpack", (int)index, - #else - "need more than %zd value%s to unpack", index, - #endif - (index == 1) ? "" : "s"); -} - -static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected) { - PyErr_Format(PyExc_ValueError, - #if PY_VERSION_HEX < 0x02050000 - "too many values to unpack (expected %d)", (int)expected); - #else - "too many values to unpack (expected %zd)", expected); - #endif -} - -static PyObject *__Pyx_UnpackItem(PyObject *iter, Py_ssize_t index) { - PyObject *item; - if (!(item = PyIter_Next(iter))) { - if (!PyErr_Occurred()) { - __Pyx_RaiseNeedMoreValuesError(index); - } - } - return item; -} - -static int __Pyx_EndUnpack(PyObject *iter, Py_ssize_t expected) { - PyObject *item; - if ((item = PyIter_Next(iter))) { - Py_DECREF(item); - __Pyx_RaiseTooManyValuesError(expected); - return -1; - } - else if (!PyErr_Occurred()) - return 0; - else - return -1; -} - -static CYTHON_INLINE void __Pyx_RaiseNoneIndexingError(void) { - PyErr_SetString(PyExc_TypeError, "'NoneType' object is unsubscriptable"); -} - -static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void) { - PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); -} - -static CYTHON_INLINE void __Pyx_ErrRestore(PyObject *type, PyObject *value, PyObject *tb) { - PyObject *tmp_type, *tmp_value, *tmp_tb; - PyThreadState *tstate = PyThreadState_GET(); - - tmp_type = tstate->curexc_type; - tmp_value = tstate->curexc_value; - tmp_tb = tstate->curexc_traceback; - tstate->curexc_type = type; - tstate->curexc_value = value; - tstate->curexc_traceback = tb; - Py_XDECREF(tmp_type); - Py_XDECREF(tmp_value); - Py_XDECREF(tmp_tb); -} - -static CYTHON_INLINE void __Pyx_ErrFetch(PyObject **type, PyObject **value, PyObject **tb) { - PyThreadState *tstate = PyThreadState_GET(); - *type = tstate->curexc_type; - *value = tstate->curexc_value; - *tb = tstate->curexc_traceback; - - tstate->curexc_type = 0; - tstate->curexc_value = 0; - tstate->curexc_traceback = 0; -} - - -static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb) { - PyObject *local_type, *local_value, *local_tb; - PyObject *tmp_type, *tmp_value, *tmp_tb; - PyThreadState *tstate = PyThreadState_GET(); - local_type = tstate->curexc_type; - local_value = tstate->curexc_value; - local_tb = tstate->curexc_traceback; - tstate->curexc_type = 0; - tstate->curexc_value = 0; - tstate->curexc_traceback = 0; - PyErr_NormalizeException(&local_type, &local_value, &local_tb); - if (unlikely(tstate->curexc_type)) - goto bad; - #if PY_MAJOR_VERSION >= 3 - if (unlikely(PyException_SetTraceback(local_value, local_tb) < 0)) - goto bad; - #endif - *type = local_type; - *value = local_value; - *tb = local_tb; - Py_INCREF(local_type); - Py_INCREF(local_value); - Py_INCREF(local_tb); - tmp_type = tstate->exc_type; - tmp_value = tstate->exc_value; - tmp_tb = tstate->exc_traceback; - tstate->exc_type = local_type; - tstate->exc_value = local_value; - tstate->exc_traceback = local_tb; - /* Make sure tstate is in a consistent state when we XDECREF - these objects (XDECREF may run arbitrary code). */ - Py_XDECREF(tmp_type); - Py_XDECREF(tmp_value); - Py_XDECREF(tmp_tb); - return 0; -bad: - *type = 0; - *value = 0; - *tb = 0; - Py_XDECREF(local_type); - Py_XDECREF(local_value); - Py_XDECREF(local_tb); - return -1; -} - - - -static CYTHON_INLINE void __Pyx_ExceptionSave(PyObject **type, PyObject **value, PyObject **tb) { - PyThreadState *tstate = PyThreadState_GET(); - *type = tstate->exc_type; - *value = tstate->exc_value; - *tb = tstate->exc_traceback; - Py_XINCREF(*type); - Py_XINCREF(*value); - Py_XINCREF(*tb); -} - -static void __Pyx_ExceptionReset(PyObject *type, PyObject *value, PyObject *tb) { - PyObject *tmp_type, *tmp_value, *tmp_tb; - PyThreadState *tstate = PyThreadState_GET(); - tmp_type = tstate->exc_type; - tmp_value = tstate->exc_value; - tmp_tb = tstate->exc_traceback; - tstate->exc_type = type; - tstate->exc_value = value; - tstate->exc_traceback = tb; - Py_XDECREF(tmp_type); - Py_XDECREF(tmp_value); - Py_XDECREF(tmp_tb); -} - -static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list) { - PyObject *py_import = 0; - PyObject *empty_list = 0; - PyObject *module = 0; - PyObject *global_dict = 0; - PyObject *empty_dict = 0; - PyObject *list; - py_import = __Pyx_GetAttrString(__pyx_b, "__import__"); - if (!py_import) - goto bad; - if (from_list) - list = from_list; - else { - empty_list = PyList_New(0); - if (!empty_list) - goto bad; - list = empty_list; - } - global_dict = PyModule_GetDict(__pyx_m); - if (!global_dict) - goto bad; - empty_dict = PyDict_New(); - if (!empty_dict) - goto bad; - module = PyObject_CallFunctionObjArgs(py_import, - name, global_dict, empty_dict, list, NULL); -bad: - Py_XDECREF(empty_list); - Py_XDECREF(py_import); - Py_XDECREF(empty_dict); - return module; -} - -static PyObject *__Pyx_CreateClass( - PyObject *bases, PyObject *dict, PyObject *name, const char *modname) -{ - PyObject *py_modname; - PyObject *result = 0; - - #if PY_MAJOR_VERSION < 3 - py_modname = PyString_FromString(modname); - #else - py_modname = PyUnicode_FromString(modname); - #endif - if (!py_modname) - goto bad; - if (PyDict_SetItemString(dict, "__module__", py_modname) < 0) - goto bad; - #if PY_MAJOR_VERSION < 3 - result = PyClass_New(bases, dict, name); - #else - result = PyObject_CallFunctionObjArgs((PyObject *)&PyType_Type, name, bases, dict, NULL); - #endif -bad: - Py_XDECREF(py_modname); - return result; -} - -#if PY_MAJOR_VERSION < 3 -static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb) { - Py_XINCREF(type); - Py_XINCREF(value); - Py_XINCREF(tb); - /* First, check the traceback argument, replacing None with NULL. */ - if (tb == Py_None) { - Py_DECREF(tb); - tb = 0; - } - else if (tb != NULL && !PyTraceBack_Check(tb)) { - PyErr_SetString(PyExc_TypeError, - "raise: arg 3 must be a traceback or None"); - goto raise_error; - } - /* Next, replace a missing value with None */ - if (value == NULL) { - value = Py_None; - Py_INCREF(value); - } - #if PY_VERSION_HEX < 0x02050000 - if (!PyClass_Check(type)) - #else - if (!PyType_Check(type)) - #endif - { - /* Raising an instance. The value should be a dummy. */ - if (value != Py_None) { - PyErr_SetString(PyExc_TypeError, - "instance exception may not have a separate value"); - goto raise_error; - } - /* Normalize to raise , */ - Py_DECREF(value); - value = type; - #if PY_VERSION_HEX < 0x02050000 - if (PyInstance_Check(type)) { - type = (PyObject*) ((PyInstanceObject*)type)->in_class; - Py_INCREF(type); - } - else { - type = 0; - PyErr_SetString(PyExc_TypeError, - "raise: exception must be an old-style class or instance"); - goto raise_error; - } - #else - type = (PyObject*) Py_TYPE(type); - Py_INCREF(type); - if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) { - PyErr_SetString(PyExc_TypeError, - "raise: exception class must be a subclass of BaseException"); - goto raise_error; - } - #endif - } - - __Pyx_ErrRestore(type, value, tb); - return; -raise_error: - Py_XDECREF(value); - Py_XDECREF(type); - Py_XDECREF(tb); - return; -} - -#else /* Python 3+ */ - -static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb) { - if (tb == Py_None) { - tb = 0; - } else if (tb && !PyTraceBack_Check(tb)) { - PyErr_SetString(PyExc_TypeError, - "raise: arg 3 must be a traceback or None"); - goto bad; - } - if (value == Py_None) - value = 0; - - if (PyExceptionInstance_Check(type)) { - if (value) { - PyErr_SetString(PyExc_TypeError, - "instance exception may not have a separate value"); - goto bad; - } - value = type; - type = (PyObject*) Py_TYPE(value); - } else if (!PyExceptionClass_Check(type)) { - PyErr_SetString(PyExc_TypeError, - "raise: exception class must be a subclass of BaseException"); - goto bad; - } - - PyErr_SetObject(type, value); - - if (tb) { - PyThreadState *tstate = PyThreadState_GET(); - PyObject* tmp_tb = tstate->curexc_traceback; - if (tb != tmp_tb) { - Py_INCREF(tb); - tstate->curexc_traceback = tb; - Py_XDECREF(tmp_tb); - } - } - -bad: - return; -} -#endif - -#if PY_MAJOR_VERSION < 3 -static PyObject *__Pyx_GetStdout(void) { - PyObject *f = PySys_GetObject((char *)"stdout"); - if (!f) { - PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout"); - } - return f; -} - -static int __Pyx_Print(PyObject* f, PyObject *arg_tuple, int newline) { - PyObject* v; - int i; - - if (!f) { - if (!(f = __Pyx_GetStdout())) - return -1; - } - for (i=0; i < PyTuple_GET_SIZE(arg_tuple); i++) { - if (PyFile_SoftSpace(f, 1)) { - if (PyFile_WriteString(" ", f) < 0) - return -1; - } - v = PyTuple_GET_ITEM(arg_tuple, i); - if (PyFile_WriteObject(v, f, Py_PRINT_RAW) < 0) - return -1; - if (PyString_Check(v)) { - char *s = PyString_AsString(v); - Py_ssize_t len = PyString_Size(v); - if (len > 0 && - isspace(Py_CHARMASK(s[len-1])) && - s[len-1] != ' ') - PyFile_SoftSpace(f, 0); - } - } - if (newline) { - if (PyFile_WriteString("\n", f) < 0) - return -1; - PyFile_SoftSpace(f, 0); - } - return 0; -} - -#else /* Python 3 has a print function */ - -static int __Pyx_Print(PyObject* stream, PyObject *arg_tuple, int newline) { - PyObject* kwargs = 0; - PyObject* result = 0; - PyObject* end_string; - if (unlikely(!__pyx_print)) { - __pyx_print = __Pyx_GetAttrString(__pyx_b, "print"); - if (!__pyx_print) - return -1; - } - if (stream) { - kwargs = PyDict_New(); - if (unlikely(!kwargs)) - return -1; - if (unlikely(PyDict_SetItemString(kwargs, "file", stream) < 0)) - goto bad; - if (!newline) { - end_string = PyUnicode_FromStringAndSize(" ", 1); - if (unlikely(!end_string)) - goto bad; - if (PyDict_SetItemString(kwargs, "end", end_string) < 0) { - Py_DECREF(end_string); - goto bad; - } - Py_DECREF(end_string); - } - } else if (!newline) { - if (unlikely(!__pyx_print_kwargs)) { - __pyx_print_kwargs = PyDict_New(); - if (unlikely(!__pyx_print_kwargs)) - return -1; - end_string = PyUnicode_FromStringAndSize(" ", 1); - if (unlikely(!end_string)) - return -1; - if (PyDict_SetItemString(__pyx_print_kwargs, "end", end_string) < 0) { - Py_DECREF(end_string); - return -1; - } - Py_DECREF(end_string); - } - kwargs = __pyx_print_kwargs; - } - result = PyObject_Call(__pyx_print, arg_tuple, kwargs); - if (unlikely(kwargs) && (kwargs != __pyx_print_kwargs)) - Py_DECREF(kwargs); - if (!result) - return -1; - Py_DECREF(result); - return 0; -bad: - if (kwargs != __pyx_print_kwargs) - Py_XDECREF(kwargs); - return -1; -} - -#endif - -#if PY_MAJOR_VERSION < 3 - -static int __Pyx_PrintOne(PyObject* f, PyObject *o) { - if (!f) { - if (!(f = __Pyx_GetStdout())) - return -1; - } - if (PyFile_SoftSpace(f, 0)) { - if (PyFile_WriteString(" ", f) < 0) - return -1; - } - if (PyFile_WriteObject(o, f, Py_PRINT_RAW) < 0) - return -1; - if (PyFile_WriteString("\n", f) < 0) - return -1; - return 0; - /* the line below is just to avoid compiler - * compiler warnings about unused functions */ - return __Pyx_Print(f, NULL, 0); -} - -#else /* Python 3 has a print function */ - -static int __Pyx_PrintOne(PyObject* stream, PyObject *o) { - int res; - PyObject* arg_tuple = PyTuple_New(1); - if (unlikely(!arg_tuple)) - return -1; - Py_INCREF(o); - PyTuple_SET_ITEM(arg_tuple, 0, o); - res = __Pyx_Print(stream, arg_tuple, 1); - Py_DECREF(arg_tuple); - return res; -} - -#endif - -static CYTHON_INLINE unsigned char __Pyx_PyInt_AsUnsignedChar(PyObject* x) { - const unsigned char neg_one = (unsigned char)-1, const_zero = 0; - const int is_unsigned = neg_one > const_zero; - if (sizeof(unsigned char) < sizeof(long)) { - long val = __Pyx_PyInt_AsLong(x); - if (unlikely(val != (long)(unsigned char)val)) { - if (!unlikely(val == -1 && PyErr_Occurred())) { - PyErr_SetString(PyExc_OverflowError, - (is_unsigned && unlikely(val < 0)) ? - "can't convert negative value to unsigned char" : - "value too large to convert to unsigned char"); - } - return (unsigned char)-1; - } - return (unsigned char)val; - } - return (unsigned char)__Pyx_PyInt_AsUnsignedLong(x); -} - -static CYTHON_INLINE unsigned short __Pyx_PyInt_AsUnsignedShort(PyObject* x) { - const unsigned short neg_one = (unsigned short)-1, const_zero = 0; - const int is_unsigned = neg_one > const_zero; - if (sizeof(unsigned short) < sizeof(long)) { - long val = __Pyx_PyInt_AsLong(x); - if (unlikely(val != (long)(unsigned short)val)) { - if (!unlikely(val == -1 && PyErr_Occurred())) { - PyErr_SetString(PyExc_OverflowError, - (is_unsigned && unlikely(val < 0)) ? - "can't convert negative value to unsigned short" : - "value too large to convert to unsigned short"); - } - return (unsigned short)-1; - } - return (unsigned short)val; - } - return (unsigned short)__Pyx_PyInt_AsUnsignedLong(x); -} - -static CYTHON_INLINE unsigned int __Pyx_PyInt_AsUnsignedInt(PyObject* x) { - const unsigned int neg_one = (unsigned int)-1, const_zero = 0; - const int is_unsigned = neg_one > const_zero; - if (sizeof(unsigned int) < sizeof(long)) { - long val = __Pyx_PyInt_AsLong(x); - if (unlikely(val != (long)(unsigned int)val)) { - if (!unlikely(val == -1 && PyErr_Occurred())) { - PyErr_SetString(PyExc_OverflowError, - (is_unsigned && unlikely(val < 0)) ? - "can't convert negative value to unsigned int" : - "value too large to convert to unsigned int"); - } - return (unsigned int)-1; - } - return (unsigned int)val; - } - return (unsigned int)__Pyx_PyInt_AsUnsignedLong(x); -} - -static CYTHON_INLINE char __Pyx_PyInt_AsChar(PyObject* x) { - const char neg_one = (char)-1, const_zero = 0; - const int is_unsigned = neg_one > const_zero; - if (sizeof(char) < sizeof(long)) { - long val = __Pyx_PyInt_AsLong(x); - if (unlikely(val != (long)(char)val)) { - if (!unlikely(val == -1 && PyErr_Occurred())) { - PyErr_SetString(PyExc_OverflowError, - (is_unsigned && unlikely(val < 0)) ? - "can't convert negative value to char" : - "value too large to convert to char"); - } - return (char)-1; - } - return (char)val; - } - return (char)__Pyx_PyInt_AsLong(x); -} - -static CYTHON_INLINE short __Pyx_PyInt_AsShort(PyObject* x) { - const short neg_one = (short)-1, const_zero = 0; - const int is_unsigned = neg_one > const_zero; - if (sizeof(short) < sizeof(long)) { - long val = __Pyx_PyInt_AsLong(x); - if (unlikely(val != (long)(short)val)) { - if (!unlikely(val == -1 && PyErr_Occurred())) { - PyErr_SetString(PyExc_OverflowError, - (is_unsigned && unlikely(val < 0)) ? - "can't convert negative value to short" : - "value too large to convert to short"); - } - return (short)-1; - } - return (short)val; - } - return (short)__Pyx_PyInt_AsLong(x); -} - -static CYTHON_INLINE int __Pyx_PyInt_AsInt(PyObject* x) { - const int neg_one = (int)-1, const_zero = 0; - const int is_unsigned = neg_one > const_zero; - if (sizeof(int) < sizeof(long)) { - long val = __Pyx_PyInt_AsLong(x); - if (unlikely(val != (long)(int)val)) { - if (!unlikely(val == -1 && PyErr_Occurred())) { - PyErr_SetString(PyExc_OverflowError, - (is_unsigned && unlikely(val < 0)) ? - "can't convert negative value to int" : - "value too large to convert to int"); - } - return (int)-1; - } - return (int)val; - } - return (int)__Pyx_PyInt_AsLong(x); -} - -static CYTHON_INLINE signed char __Pyx_PyInt_AsSignedChar(PyObject* x) { - const signed char neg_one = (signed char)-1, const_zero = 0; - const int is_unsigned = neg_one > const_zero; - if (sizeof(signed char) < sizeof(long)) { - long val = __Pyx_PyInt_AsLong(x); - if (unlikely(val != (long)(signed char)val)) { - if (!unlikely(val == -1 && PyErr_Occurred())) { - PyErr_SetString(PyExc_OverflowError, - (is_unsigned && unlikely(val < 0)) ? - "can't convert negative value to signed char" : - "value too large to convert to signed char"); - } - return (signed char)-1; - } - return (signed char)val; - } - return (signed char)__Pyx_PyInt_AsSignedLong(x); -} - -static CYTHON_INLINE signed short __Pyx_PyInt_AsSignedShort(PyObject* x) { - const signed short neg_one = (signed short)-1, const_zero = 0; - const int is_unsigned = neg_one > const_zero; - if (sizeof(signed short) < sizeof(long)) { - long val = __Pyx_PyInt_AsLong(x); - if (unlikely(val != (long)(signed short)val)) { - if (!unlikely(val == -1 && PyErr_Occurred())) { - PyErr_SetString(PyExc_OverflowError, - (is_unsigned && unlikely(val < 0)) ? - "can't convert negative value to signed short" : - "value too large to convert to signed short"); - } - return (signed short)-1; - } - return (signed short)val; - } - return (signed short)__Pyx_PyInt_AsSignedLong(x); -} - -static CYTHON_INLINE signed int __Pyx_PyInt_AsSignedInt(PyObject* x) { - const signed int neg_one = (signed int)-1, const_zero = 0; - const int is_unsigned = neg_one > const_zero; - if (sizeof(signed int) < sizeof(long)) { - long val = __Pyx_PyInt_AsLong(x); - if (unlikely(val != (long)(signed int)val)) { - if (!unlikely(val == -1 && PyErr_Occurred())) { - PyErr_SetString(PyExc_OverflowError, - (is_unsigned && unlikely(val < 0)) ? - "can't convert negative value to signed int" : - "value too large to convert to signed int"); - } - return (signed int)-1; - } - return (signed int)val; - } - return (signed int)__Pyx_PyInt_AsSignedLong(x); -} - -static CYTHON_INLINE int __Pyx_PyInt_AsLongDouble(PyObject* x) { - const int neg_one = (int)-1, const_zero = 0; - const int is_unsigned = neg_one > const_zero; - if (sizeof(int) < sizeof(long)) { - long val = __Pyx_PyInt_AsLong(x); - if (unlikely(val != (long)(int)val)) { - if (!unlikely(val == -1 && PyErr_Occurred())) { - PyErr_SetString(PyExc_OverflowError, - (is_unsigned && unlikely(val < 0)) ? - "can't convert negative value to int" : - "value too large to convert to int"); - } - return (int)-1; - } - return (int)val; - } - return (int)__Pyx_PyInt_AsLong(x); -} - -static CYTHON_INLINE unsigned long __Pyx_PyInt_AsUnsignedLong(PyObject* x) { - const unsigned long neg_one = (unsigned long)-1, const_zero = 0; - const int is_unsigned = neg_one > const_zero; -#if PY_VERSION_HEX < 0x03000000 - if (likely(PyInt_Check(x))) { - long val = PyInt_AS_LONG(x); - if (is_unsigned && unlikely(val < 0)) { - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to unsigned long"); - return (unsigned long)-1; - } - return (unsigned long)val; - } else -#endif - if (likely(PyLong_Check(x))) { - if (is_unsigned) { - if (unlikely(Py_SIZE(x) < 0)) { - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to unsigned long"); - return (unsigned long)-1; - } - return PyLong_AsUnsignedLong(x); - } else { - return PyLong_AsLong(x); - } - } else { - unsigned long val; - PyObject *tmp = __Pyx_PyNumber_Int(x); - if (!tmp) return (unsigned long)-1; - val = __Pyx_PyInt_AsUnsignedLong(tmp); - Py_DECREF(tmp); - return val; - } -} - -static CYTHON_INLINE unsigned PY_LONG_LONG __Pyx_PyInt_AsUnsignedLongLong(PyObject* x) { - const unsigned PY_LONG_LONG neg_one = (unsigned PY_LONG_LONG)-1, const_zero = 0; - const int is_unsigned = neg_one > const_zero; -#if PY_VERSION_HEX < 0x03000000 - if (likely(PyInt_Check(x))) { - long val = PyInt_AS_LONG(x); - if (is_unsigned && unlikely(val < 0)) { - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to unsigned PY_LONG_LONG"); - return (unsigned PY_LONG_LONG)-1; - } - return (unsigned PY_LONG_LONG)val; - } else -#endif - if (likely(PyLong_Check(x))) { - if (is_unsigned) { - if (unlikely(Py_SIZE(x) < 0)) { - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to unsigned PY_LONG_LONG"); - return (unsigned PY_LONG_LONG)-1; - } - return PyLong_AsUnsignedLongLong(x); - } else { - return PyLong_AsLongLong(x); - } - } else { - unsigned PY_LONG_LONG val; - PyObject *tmp = __Pyx_PyNumber_Int(x); - if (!tmp) return (unsigned PY_LONG_LONG)-1; - val = __Pyx_PyInt_AsUnsignedLongLong(tmp); - Py_DECREF(tmp); - return val; - } -} - -static CYTHON_INLINE long __Pyx_PyInt_AsLong(PyObject* x) { - const long neg_one = (long)-1, const_zero = 0; - const int is_unsigned = neg_one > const_zero; -#if PY_VERSION_HEX < 0x03000000 - if (likely(PyInt_Check(x))) { - long val = PyInt_AS_LONG(x); - if (is_unsigned && unlikely(val < 0)) { - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to long"); - return (long)-1; - } - return (long)val; - } else -#endif - if (likely(PyLong_Check(x))) { - if (is_unsigned) { - if (unlikely(Py_SIZE(x) < 0)) { - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to long"); - return (long)-1; - } - return PyLong_AsUnsignedLong(x); - } else { - return PyLong_AsLong(x); - } - } else { - long val; - PyObject *tmp = __Pyx_PyNumber_Int(x); - if (!tmp) return (long)-1; - val = __Pyx_PyInt_AsLong(tmp); - Py_DECREF(tmp); - return val; - } -} - -static CYTHON_INLINE PY_LONG_LONG __Pyx_PyInt_AsLongLong(PyObject* x) { - const PY_LONG_LONG neg_one = (PY_LONG_LONG)-1, const_zero = 0; - const int is_unsigned = neg_one > const_zero; -#if PY_VERSION_HEX < 0x03000000 - if (likely(PyInt_Check(x))) { - long val = PyInt_AS_LONG(x); - if (is_unsigned && unlikely(val < 0)) { - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to PY_LONG_LONG"); - return (PY_LONG_LONG)-1; - } - return (PY_LONG_LONG)val; - } else -#endif - if (likely(PyLong_Check(x))) { - if (is_unsigned) { - if (unlikely(Py_SIZE(x) < 0)) { - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to PY_LONG_LONG"); - return (PY_LONG_LONG)-1; - } - return PyLong_AsUnsignedLongLong(x); - } else { - return PyLong_AsLongLong(x); - } - } else { - PY_LONG_LONG val; - PyObject *tmp = __Pyx_PyNumber_Int(x); - if (!tmp) return (PY_LONG_LONG)-1; - val = __Pyx_PyInt_AsLongLong(tmp); - Py_DECREF(tmp); - return val; - } -} - -static CYTHON_INLINE signed long __Pyx_PyInt_AsSignedLong(PyObject* x) { - const signed long neg_one = (signed long)-1, const_zero = 0; - const int is_unsigned = neg_one > const_zero; -#if PY_VERSION_HEX < 0x03000000 - if (likely(PyInt_Check(x))) { - long val = PyInt_AS_LONG(x); - if (is_unsigned && unlikely(val < 0)) { - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to signed long"); - return (signed long)-1; - } - return (signed long)val; - } else -#endif - if (likely(PyLong_Check(x))) { - if (is_unsigned) { - if (unlikely(Py_SIZE(x) < 0)) { - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to signed long"); - return (signed long)-1; - } - return PyLong_AsUnsignedLong(x); - } else { - return PyLong_AsLong(x); - } - } else { - signed long val; - PyObject *tmp = __Pyx_PyNumber_Int(x); - if (!tmp) return (signed long)-1; - val = __Pyx_PyInt_AsSignedLong(tmp); - Py_DECREF(tmp); - return val; - } -} - -static CYTHON_INLINE signed PY_LONG_LONG __Pyx_PyInt_AsSignedLongLong(PyObject* x) { - const signed PY_LONG_LONG neg_one = (signed PY_LONG_LONG)-1, const_zero = 0; - const int is_unsigned = neg_one > const_zero; -#if PY_VERSION_HEX < 0x03000000 - if (likely(PyInt_Check(x))) { - long val = PyInt_AS_LONG(x); - if (is_unsigned && unlikely(val < 0)) { - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to signed PY_LONG_LONG"); - return (signed PY_LONG_LONG)-1; - } - return (signed PY_LONG_LONG)val; - } else -#endif - if (likely(PyLong_Check(x))) { - if (is_unsigned) { - if (unlikely(Py_SIZE(x) < 0)) { - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to signed PY_LONG_LONG"); - return (signed PY_LONG_LONG)-1; - } - return PyLong_AsUnsignedLongLong(x); - } else { - return PyLong_AsLongLong(x); - } - } else { - signed PY_LONG_LONG val; - PyObject *tmp = __Pyx_PyNumber_Int(x); - if (!tmp) return (signed PY_LONG_LONG)-1; - val = __Pyx_PyInt_AsSignedLongLong(tmp); - Py_DECREF(tmp); - return val; - } -} - -static int __Pyx_SetVtable(PyObject *dict, void *vtable) { -#if PY_VERSION_HEX >= 0x02070000 && !(PY_MAJOR_VERSION==3&&PY_MINOR_VERSION==0) - PyObject *ob = PyCapsule_New(vtable, 0, 0); -#else - PyObject *ob = PyCObject_FromVoidPtr(vtable, 0); -#endif - if (!ob) - goto bad; - if (PyDict_SetItemString(dict, "__pyx_vtable__", ob) < 0) - goto bad; - Py_DECREF(ob); - return 0; -bad: - Py_XDECREF(ob); - return -1; -} - -#include "compile.h" -#include "frameobject.h" -#include "traceback.h" - -static void __Pyx_AddTraceback(const char *funcname) { - PyObject *py_srcfile = 0; - PyObject *py_funcname = 0; - PyObject *py_globals = 0; - PyCodeObject *py_code = 0; - PyFrameObject *py_frame = 0; - - #if PY_MAJOR_VERSION < 3 - py_srcfile = PyString_FromString(__pyx_filename); - #else - py_srcfile = PyUnicode_FromString(__pyx_filename); - #endif - if (!py_srcfile) goto bad; - if (__pyx_clineno) { - #if PY_MAJOR_VERSION < 3 - py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, __pyx_clineno); - #else - py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, __pyx_clineno); - #endif - } - else { - #if PY_MAJOR_VERSION < 3 - py_funcname = PyString_FromString(funcname); - #else - py_funcname = PyUnicode_FromString(funcname); - #endif - } - if (!py_funcname) goto bad; - py_globals = PyModule_GetDict(__pyx_m); - if (!py_globals) goto bad; - py_code = PyCode_New( - 0, /*int argcount,*/ - #if PY_MAJOR_VERSION >= 3 - 0, /*int kwonlyargcount,*/ - #endif - 0, /*int nlocals,*/ - 0, /*int stacksize,*/ - 0, /*int flags,*/ - __pyx_empty_bytes, /*PyObject *code,*/ - __pyx_empty_tuple, /*PyObject *consts,*/ - __pyx_empty_tuple, /*PyObject *names,*/ - __pyx_empty_tuple, /*PyObject *varnames,*/ - __pyx_empty_tuple, /*PyObject *freevars,*/ - __pyx_empty_tuple, /*PyObject *cellvars,*/ - py_srcfile, /*PyObject *filename,*/ - py_funcname, /*PyObject *name,*/ - __pyx_lineno, /*int firstlineno,*/ - __pyx_empty_bytes /*PyObject *lnotab*/ - ); - if (!py_code) goto bad; - py_frame = PyFrame_New( - PyThreadState_GET(), /*PyThreadState *tstate,*/ - py_code, /*PyCodeObject *code,*/ - py_globals, /*PyObject *globals,*/ - 0 /*PyObject *locals*/ - ); - if (!py_frame) goto bad; - py_frame->f_lineno = __pyx_lineno; - PyTraceBack_Here(py_frame); -bad: - Py_XDECREF(py_srcfile); - Py_XDECREF(py_funcname); - Py_XDECREF(py_code); - Py_XDECREF(py_frame); -} - -static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { - while (t->p) { - #if PY_MAJOR_VERSION < 3 - if (t->is_unicode) { - *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL); - } else if (t->intern) { - *t->p = PyString_InternFromString(t->s); - } else { - *t->p = PyString_FromStringAndSize(t->s, t->n - 1); - } - #else /* Python 3+ has unicode identifiers */ - if (t->is_unicode | t->is_str) { - if (t->intern) { - *t->p = PyUnicode_InternFromString(t->s); - } else if (t->encoding) { - *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL); - } else { - *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1); - } - } else { - *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1); - } - #endif - if (!*t->p) - return -1; - ++t; - } - return 0; -} - -/* Type Conversion Functions */ - -static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) { - int is_true = x == Py_True; - if (is_true | (x == Py_False) | (x == Py_None)) return is_true; - else return PyObject_IsTrue(x); -} - -static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) { - PyNumberMethods *m; - const char *name = NULL; - PyObject *res = NULL; -#if PY_VERSION_HEX < 0x03000000 - if (PyInt_Check(x) || PyLong_Check(x)) -#else - if (PyLong_Check(x)) -#endif - return Py_INCREF(x), x; - m = Py_TYPE(x)->tp_as_number; -#if PY_VERSION_HEX < 0x03000000 - if (m && m->nb_int) { - name = "int"; - res = PyNumber_Int(x); - } - else if (m && m->nb_long) { - name = "long"; - res = PyNumber_Long(x); - } -#else - if (m && m->nb_int) { - name = "int"; - res = PyNumber_Long(x); - } -#endif - if (res) { -#if PY_VERSION_HEX < 0x03000000 - if (!PyInt_Check(res) && !PyLong_Check(res)) { -#else - if (!PyLong_Check(res)) { -#endif - PyErr_Format(PyExc_TypeError, - "__%s__ returned non-%s (type %.200s)", - name, name, Py_TYPE(res)->tp_name); - Py_DECREF(res); - return NULL; - } - } - else if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_TypeError, - "an integer is required"); - } - return res; -} - -static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) { - Py_ssize_t ival; - PyObject* x = PyNumber_Index(b); - if (!x) return -1; - ival = PyInt_AsSsize_t(x); - Py_DECREF(x); - return ival; -} - -static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) { -#if PY_VERSION_HEX < 0x02050000 - if (ival <= LONG_MAX) - return PyInt_FromLong((long)ival); - else { - unsigned char *bytes = (unsigned char *) &ival; - int one = 1; int little = (int)*(unsigned char*)&one; - return _PyLong_FromByteArray(bytes, sizeof(size_t), little, 0); - } -#else - return PyInt_FromSize_t(ival); -#endif -} - -static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject* x) { - unsigned PY_LONG_LONG val = __Pyx_PyInt_AsUnsignedLongLong(x); - if (unlikely(val == (unsigned PY_LONG_LONG)-1 && PyErr_Occurred())) { - return (size_t)-1; - } else if (unlikely(val != (unsigned PY_LONG_LONG)(size_t)val)) { - PyErr_SetString(PyExc_OverflowError, - "value too large to convert to size_t"); - return (size_t)-1; - } - return (size_t)val; -} - - -#endif /* Py_PYTHON_H */ diff --git a/src/re2.pyx b/src/re2.pyx index 71752944..d8bbeaaf 100644 --- a/src/re2.pyx +++ b/src/re2.pyx @@ -1,36 +1,79 @@ # cython: infer_types(False) +"""Regular expressions using Google's RE2 engine. + +Compared to Python's ``re``, the RE2 engine converts regular expressions to +deterministic finite automata, which guarantees linear-time behavior. + +Intended as a drop-in replacement for ``re``. Unicode is supported by encoding +to UTF-8, and bytes strings are treated as UTF-8. For best performance, work +with UTF-8 encoded bytes strings. + +Regular expressions that are not compatible with RE2 are processed with +fallback to ``re``. Examples of features not supported by RE2: + + - lookahead assertions ``(?!...)`` + - backreferences (``\\n`` in search pattern) + - \W and \S not supported inside character classes + +On the other hand, unicode character classes are supported. +Syntax reference: https://github.com/google/re2/wiki/Syntax +""" + # Import re flags to be compatible. import sys import re I = re.I -IGNORECASE = re.IGNORECASE M = re.M -MULTILINE = re.MULTILINE S = re.S -DOTALL = re.DOTALL U = re.U -UNICODE = re.UNICODE X = re.X -VERBOSE = re.VERBOSE L = re.L +IGNORECASE = re.IGNORECASE +MULTILINE = re.MULTILINE +DOTALL = re.DOTALL +UNICODE = re.UNICODE +VERBOSE = re.VERBOSE LOCALE = re.LOCALE +cdef int _I = re.I +cdef int _M = re.M +cdef int _S = re.S +cdef int _U = re.U +cdef int _X = re.X +cdef int _L = re.L + FALLBACK_QUIETLY = 0 FALLBACK_WARNING = 1 FALLBACK_EXCEPTION = 2 VERSION = (0, 2, 23) VERSION_HEX = 0x000217 +cdef int current_notification = FALLBACK_QUIETLY # Type of compiled re object from Python stdlib SREPattern = type(re.compile('')) -cdef int current_notification = FALLBACK_QUIETLY + +class RegexError(re.error): + """ome error has occured in compilation of the regex.""" + pass + +error = RegexError + + +class BackreferencesException(Exception): + """Search pattern contains backreferences.""" + pass + + +class CharClassProblemException(Exception): + """Search pattern contains unsupported character class.""" + pass + def set_fallback_notification(level): - """ - Set the fallback notification to a level; one of: + """Set the fallback notification to a level; one of: FALLBACK_QUIETLY FALLBACK_WARNING FALLBACK_EXCEPTION @@ -42,106 +85,99 @@ def set_fallback_notification(level): current_notification = level -class RegexError(re.error): - """ - Some error has occured in compilation of the regex. - """ - pass - -error = RegexError -cdef int _I = I, _M = M, _S = S, _U = U, _X = X, _L = L cimport _re2 cimport cpython.unicode from cython.operator cimport preincrement as inc, dereference as deref import warnings -cdef object cpp_to_pystring(_re2.cpp_string input): - # This function is a quick converter from a std::string object - # to a python string. By taking the slice we go to the right size, + +cdef bytes cpp_to_bytes(_re2.cpp_string input): + """Convert from a std::string object to a python string.""" + # By taking the slice we go to the right size, # despite spurious or missing null characters. return input.c_str()[:input.length()] -cdef inline object cpp_to_utf8(_re2.cpp_string input): - # This function converts a std::string object to a utf8 object. - return cpython.unicode.PyUnicode_DecodeUTF8(input.c_str(), input.length(), 'strict') -cdef inline object char_to_utf8(_re2.const_char_ptr input, int length): - # This function converts a C string to a utf8 object. +cdef inline unicode cpp_to_unicode(_re2.cpp_string input): + """Convert a std::string object to a unicode string.""" + return cpython.unicode.PyUnicode_DecodeUTF8( + input.c_str(), input.length(), 'strict') + + +cdef inline unicode char_to_unicode(_re2.const_char_ptr input, int length): + """Convert a C string to a unicode string.""" return cpython.unicode.PyUnicode_DecodeUTF8(input, length, 'strict') -cdef inline object unicode_to_bytestring(object pystring, int * encoded): - # This function will convert a utf8 string to a bytestring object. + +cdef inline unicode_to_bytes(object pystring, int * encoded): + """Convert a unicode string to a utf8 bytes object, if necessary. + + If pystring is a bytes string or a buffer, return unchanged.""" if cpython.unicode.PyUnicode_Check(pystring): - pystring = cpython.unicode.PyUnicode_EncodeUTF8(cpython.unicode.PyUnicode_AS_UNICODE(pystring), - cpython.unicode.PyUnicode_GET_SIZE(pystring), - "strict") + pystring = cpython.unicode.PyUnicode_EncodeUTF8( + cpython.unicode.PyUnicode_AS_UNICODE(pystring), + cpython.unicode.PyUnicode_GET_SIZE(pystring), + "strict") encoded[0] = 1 else: encoded[0] = 0 return pystring -cdef inline int pystring_to_bytestring(object pystring, char ** cstring, Py_ssize_t * length): - # This function will convert a pystring to a bytesstring, placing - # the char * in cstring, and the length in length. + +cdef inline int pystring_to_cstring( + object pystring, char ** cstring, Py_ssize_t * length): + """Get a C string from a bytes/buffer object.""" + # Place the char * in cstring, and the length in length. # First it will try treating it as a str object, but failing that # it will move to utf-8. If utf8 does not work, then it has to be # a non-supported encoding. - return _re2.PyObject_AsCharBuffer(pystring, <_re2.const_char_ptr*> cstring, length) + return _re2.PyObject_AsCharBuffer( + pystring, <_re2.const_char_ptr*> cstring, length) + # FIXME: use Python 3 buffer interface when available + cdef extern from *: cdef void emit_ifndef_py_unicode_wide "#if !defined(Py_UNICODE_WIDE) //" () cdef void emit_endif "#endif //" () + cdef class Match: + cdef readonly Pattern re + cdef readonly object string + cdef readonly int pos + cdef readonly int endpos + cdef _re2.StringPiece * matches cdef _re2.const_stringintmap * named_groups - cdef bint encoded - cdef int _lastindex cdef int nmatches - cdef int _pos - cdef int _endpos - cdef object match_string - cdef object _pattern_object + cdef int _lastindex cdef tuple _groups cdef tuple _spans cdef dict _named_groups cdef dict _named_indexes - def __init__(self, object pattern_object, int num_groups): + def __init__(self, Pattern pattern_object, int num_groups): self._lastindex = -1 self._groups = None - self._pos = 0 - self._endpos = -1 + self.pos = 0 + self.endpos = -1 self.matches = _re2.new_StringPiece_array(num_groups + 1) self.nmatches = num_groups - self._pattern_object = pattern_object + self.re = pattern_object def __dealloc__(self): _re2.delete_StringPiece_array(self.matches) - property re: - def __get__(self): - return self._pattern_object - - property pos: - def __get__(self): - return self._pos - - property endpos: - def __get__(self): - return self._endpos - - property string: - def __get__(self): - return self.match_string + def __repr__(self): + return '' % ( + (self.pos, self.endpos), self.string) cdef init_groups(self): cdef list groups = [] cdef int i - cdef bint cur_encoded = self.encoded if self._groups is not None: return @@ -161,55 +197,60 @@ cdef class Match: self._lastindex = i else: # The rules for last group are a bit complicated: - # if two groups end at the same point, the earlier one is considered last - # so we don't switch our selection unless the end point has moved + # if two groups end at the same point, the earlier one + # is considered last, so we don't switch our selection + # unless the end point has moved. if cur_end > last_end: last_end = cur_end self._lastindex = i - - if cur_encoded: - groups.append(char_to_utf8(self.matches[i].data(), self.matches[i].length())) - else: - groups.append(self.matches[i].data()[:self.matches[i].length()]) + groups.append( + self.matches[i].data()[:self.matches[i].length()]) self._groups = tuple(groups) def groups(self, default=None): self.init_groups() + if self.encoded: + return tuple([ + g.decode('utf8') if g else default + for g in self._groups[1:]]) if default is not None: return tuple([g or default for g in self._groups[1:]]) return self._groups[1:] def group(self, *args): - try: - string = basestring - except NameError as e: - string = (str, bytes) - if len(args) > 1: - return tuple([self.group(i) for i in args]) - elif len(args) > 0: - groupnum = args[0] - else: + if len(args) == 0: groupnum = 0 + elif len(args) == 1: + groupnum = args[0] + else: # len(args) > 1: + return tuple([self.group(i) for i in args]) + if self.encoded: + return self._group(groupnum).decode('utf8') + return self._group(groupnum) + cdef bytes _group(self, object groupnum): cdef int idx - self.init_groups() - - if isinstance(groupnum, string): - return self.groupdict()[groupnum] - - idx = groupnum - - if idx > self.nmatches - 1: - raise IndexError("no such group") - return self._groups[idx] - - cdef object _convert_positions(self, positions): - cdef char * s = self.match_string + if isinstance(groupnum, int): + idx = groupnum + if idx > self.nmatches - 1: + raise IndexError("no such group %d; available groups: %r" + % (idx, list(range(self.nmatches)))) + return self._groups[idx] + groupdict = self._groupdict() + if groupnum not in groupdict: + raise IndexError("no such group %r; available groups: %r" + % (groupnum, list(groupdict.keys()))) + return groupdict[groupnum] + + cdef list _convert_positions(self, positions): + cdef char * s cdef int cpos = 0 cdef int upos = 0 - cdef int size = len(self.match_string) + cdef Py_ssize_t size cdef int c + if pystring_to_cstring(self.string, &s, &size) == -1: + raise TypeError("expected string or buffer") new_positions = [] i = 0 @@ -239,7 +280,8 @@ cdef class Match: else: cpos += 4 inc(upos) - # wide unicode chars get 2 unichars when python is compiled with --enable-unicode=ucs2 + # wide unicode chars get 2 unichars when python is compiled + # with --enable-unicode=ucs2 # TODO: verify this emit_ifndef_py_unicode_wide() inc(upos) @@ -252,11 +294,11 @@ cdef class Match: return new_positions def _convert_spans(self, spans): - positions = [x for x,y in spans] + [y for x,y in spans] + positions = [x for x, _ in spans] + [y for _, y in spans] positions = sorted(set(positions)) posdict = dict(zip(positions, self._convert_positions(positions))) - return [(posdict[x], posdict[y]) for x,y in spans] + return [(posdict[x], posdict[y]) for x, y in spans] cdef _make_spans(self): @@ -264,8 +306,11 @@ cdef class Match: return cdef int start, end - cdef char * s = self.match_string + cdef char * s + cdef Py_ssize_t size cdef _re2.StringPiece * piece + if pystring_to_cstring(self.string, &s, &size) == -1: + raise TypeError("expected string or buffer") spans = [] for i in range(self.nmatches): @@ -284,33 +329,31 @@ cdef class Match: self._spans = tuple(spans) - property regs: - def __get__(self): - if self._spans is None: - self._make_spans() - return self._spans - def expand(self, object template): + """Expand a template with groups.""" # TODO - This can be optimized to work a bit faster in C. - # Expand a template with groups - items = template.split('\\') + if isinstance(template, unicode): + template = template.encode('utf8') + items = template.split(b'\\') for i, item in enumerate(items[1:]): - if item[0].isdigit(): + if item[0:1].isdigit(): # Number group - if item[0] == '0': - items[i + 1] = '\x00' + item[1:] + if item[0] == b'0': + items[i + 1] = b'\x00' + item[1:] # ??? else: - items[i + 1] = self.group(int(item[0])) + item[1:] - elif item[:2] == 'g<' and '>' in item: + items[i + 1] = self._group(int(item[0:1])) + item[1:] + elif item[:2] == b'g<' and b'>' in item: # This is a named group - name, rest = item[2:].split('>', 1) - items[i + 1] = self.group(name) + rest + name, rest = item[2:].split(b'>', 1) + items[i + 1] = self._group(name) + rest else: # This isn't a template at all - items[i + 1] = '\\' + item - return ''.join(items) + items[i + 1] = b'\\' + item + if self.encoded: + return b''.join(items).decode('utf8') + return b''.join(items) - def groupdict(self): + cdef dict _groupdict(self): cdef _re2.stringintmapiterator it cdef dict result = {} cdef dict indexes = {} @@ -323,14 +366,22 @@ cdef class Match: self._named_groups = result it = self.named_groups.begin() while it != self.named_groups.end(): - indexes[cpp_to_pystring(deref(it).first)] = deref(it).second - result[cpp_to_pystring(deref(it).first)] = self._groups[deref(it).second] + indexes[cpp_to_bytes(deref(it).first)] = deref(it).second + result[cpp_to_bytes(deref(it).first)] = self._groups[ + deref(it).second] inc(it) self._named_groups = result self._named_indexes = indexes return result + def groupdict(self): + result = self._groupdict() + if self.encoded: + return {a.decode('utf8') if isinstance(a, bytes) else a: + b.decode('utf8') for a, b in result.items()} + return result + def end(self, group=0): return self.span(group)[1] @@ -339,16 +390,25 @@ cdef class Match: def span(self, group=0): self._make_spans() - if type(group) is int: + if isinstance(group, int): if group > len(self._spans): - raise IndexError("no such group") + raise IndexError("no such group %d; available groups: %r" + % (group, list(range(len(self._spans))))) return self._spans[group] else: - self.groupdict() + self._groupdict() + if self.encoded: + group = group.encode('utf8') if group not in self._named_indexes: - raise IndexError("no such group") + raise IndexError("no such group %r; available groups: %r" + % (group, list(self._named_indexes))) return self._spans[self._named_indexes[group]] + property regs: + def __get__(self): + if self._spans is None: + self._make_spans() + return self._spans property lastindex: def __get__(self): @@ -369,49 +429,43 @@ cdef class Match: it = self.named_groups.begin() while it != self.named_groups.end(): if deref(it).second == self._lastindex: - return cpp_to_pystring(deref(it).first) + return cpp_to_bytes(deref(it).first) inc(it) return None cdef class Pattern: + cdef readonly int flags + cdef readonly int groups + cdef readonly object pattern + cdef _re2.RE2 * re_pattern - cdef int ngroups cdef bint encoded - cdef int _flags - cdef public object pattern cdef object __weakref__ - property flags: - def __get__(self): - return self._flags - - property groups: - def __get__(self): - return self.ngroups - def __dealloc__(self): del self.re_pattern + def __repr__(self): + return 're2.compile(%r, %r)' % (self.pattern, self.flags) + cdef _search(self, string, int pos, int endpos, _re2.re2_Anchor anchoring): - """ - Scan through string looking for a match, and return a corresponding - Match instance. Return None if no position in the string matches. - """ + """Scan through string looking for a match, and return a corresponding + Match instance. Return None if no position in the string matches.""" cdef Py_ssize_t size cdef int result cdef char * cstring cdef int encoded = 0 cdef _re2.StringPiece * sp - cdef Match m = Match(self, self.ngroups + 1) + cdef Match m = Match(self, self.groups + 1) if hasattr(string, 'tostring'): string = string.tostring() - string = unicode_to_bytestring(string, &encoded) + string = unicode_to_bytes(string, &encoded) - if pystring_to_bytestring(string, &cstring, &size) == -1: + if pystring_to_cstring(string, &cstring, &size) == -1: raise TypeError("expected string or buffer") if endpos >= 0 and endpos <= pos: @@ -425,58 +479,54 @@ cdef class Pattern: sp = new _re2.StringPiece(cstring, size) with nogil: - result = self.re_pattern.Match(sp[0], pos, size, anchoring, m.matches, self.ngroups + 1) + result = self.re_pattern.Match( + sp[0], + pos, + size, + anchoring, + m.matches, + self.groups + 1) del sp if result == 0: return None m.encoded = (encoded) m.named_groups = _re2.addressof(self.re_pattern.NamedCapturingGroups()) - m.nmatches = self.ngroups + 1 - m.match_string = string - m._pos = pos + m.nmatches = self.groups + 1 + m.string = string + m.pos = pos if endpos == -1: - m._endpos = len(string) + m.endpos = len(string) else: - m._endpos = endpos + m.endpos = endpos return m - - def search(self, string, int pos=0, int endpos=-1): - """ - Scan through string looking for a match, and return a corresponding - Match instance. Return None if no position in the string matches. - """ + def search(self, object string, int pos=0, int endpos=-1): + """Scan through string looking for a match, and return a corresponding + Match instance. Return None if no position in the string matches.""" return self._search(string, pos, endpos, _re2.UNANCHORED) - - def match(self, string, int pos=0, int endpos=-1): - """ - Matches zero or more characters at the beginning of the string. - """ + def match(self, object string, int pos=0, int endpos=-1): + """Matches zero or more characters at the beginning of the string.""" return self._search(string, pos, endpos, _re2.ANCHOR_START) - cdef _print_pattern(self): + def _print_pattern(self): cdef _re2.cpp_string * s s = <_re2.cpp_string *>_re2.addressofs(self.re_pattern.pattern()) - print cpp_to_pystring(s[0]) + "\n" - sys.stdout.flush() + print(cpp_to_bytes(s[0]).decode('utf8')) def finditer(self, object string, int pos=0, int endpos=-1): - """ - Yield all non-overlapping matches of pattern in string as Match - objects. - """ + """Yield all non-overlapping matches of pattern in string as Match + objects.""" cdef Py_ssize_t size cdef int result cdef char * cstring cdef _re2.StringPiece * sp cdef Match m - cdef list resultlist = [] cdef int encoded = 0 - string = unicode_to_bytestring(string, &encoded) - if pystring_to_bytestring(string, &cstring, &size) == -1: + string = unicode_to_bytes(string, &encoded) + if pystring_to_cstring(string, &cstring, &size) == -1: raise TypeError("expected string or buffer") encoded = encoded @@ -486,7 +536,7 @@ cdef class Pattern: sp = new _re2.StringPiece(cstring, size) while True: - m = Match(self, self.ngroups + 1) + m = Match(self, self.groups + 1) with nogil: result = self.re_pattern.Match( sp[0], @@ -494,19 +544,19 @@ cdef class Pattern: size, _re2.UNANCHORED, m.matches, - self.ngroups + 1) + self.groups + 1) if result == 0: break m.encoded = encoded m.named_groups = _re2.addressof( self.re_pattern.NamedCapturingGroups()) - m.nmatches = self.ngroups + 1 - m.match_string = string - m._pos = pos + m.nmatches = self.groups + 1 + m.string = string + m.pos = pos if endpos == -1: - m._endpos = len(string) + m.endpos = len(string) else: - m._endpos = endpos + m.endpos = endpos yield m if pos == size: break @@ -518,10 +568,8 @@ cdef class Pattern: del sp def findall(self, object string, int pos=0, int endpos=-1): - """ - Return all non-overlapping matches of pattern in string as a list - of strings. - """ + """Return all non-overlapping matches of pattern in string as a list + of strings.""" cdef Py_ssize_t size cdef int result cdef char * cstring @@ -530,8 +578,8 @@ cdef class Pattern: cdef list resultlist = [] cdef int encoded = 0 - string = unicode_to_bytestring(string, &encoded) - if pystring_to_bytestring(string, &cstring, &size) == -1: + string = unicode_to_bytes(string, &encoded) + if pystring_to_cstring(string, &cstring, &size) == -1: raise TypeError("expected string or buffer") encoded = encoded @@ -541,7 +589,8 @@ cdef class Pattern: sp = new _re2.StringPiece(cstring, size) while True: - m = Match(self, self.ngroups + 1) + # FIXME: can probably avoid creating Match objects + m = Match(self, self.groups + 1) with nogil: result = self.re_pattern.Match( sp[0], @@ -549,23 +598,23 @@ cdef class Pattern: size, _re2.UNANCHORED, m.matches, - self.ngroups + 1) + self.groups + 1) if result == 0: break m.encoded = encoded m.named_groups = _re2.addressof( self.re_pattern.NamedCapturingGroups()) - m.nmatches = self.ngroups + 1 - m.match_string = string - m._pos = pos + m.nmatches = self.groups + 1 + m.string = string + m.pos = pos if endpos == -1: - m._endpos = len(string) + m.endpos = len(string) else: - m._endpos = endpos - if self.ngroups > 1: + m.endpos = endpos + if self.groups > 1: resultlist.append(m.groups("")) else: - resultlist.append(m.group(self.ngroups)) + resultlist.append(m.group(self.groups)) if pos == size: break # offset the pos to move to the next point @@ -577,39 +626,39 @@ cdef class Pattern: return resultlist def split(self, string, int maxsplit=0): - """ - split(string[, maxsplit = 0]) --> list - Split a string by the occurances of the pattern. - """ + """split(string[, maxsplit = 0]) --> list + + Split a string by the occurrences of the pattern.""" cdef Py_ssize_t size - cdef int num_groups = 1 cdef int result - cdef int endpos cdef int pos = 0 cdef int lookahead = 0 cdef int num_split = 0 cdef char * cstring cdef _re2.StringPiece * sp cdef _re2.StringPiece * matches - cdef Match m cdef list resultlist = [] cdef int encoded = 0 if maxsplit < 0: maxsplit = 0 - string = unicode_to_bytestring(string, &encoded) - if pystring_to_bytestring(string, &cstring, &size) == -1: + string = unicode_to_bytes(string, &encoded) + if pystring_to_cstring(string, &cstring, &size) == -1: raise TypeError("expected string or buffer") - encoded = encoded - - matches = _re2.new_StringPiece_array(self.ngroups + 1) + matches = _re2.new_StringPiece_array(self.groups + 1) sp = new _re2.StringPiece(cstring, size) while True: with nogil: - result = self.re_pattern.Match(sp[0], (pos + lookahead), size, _re2.UNANCHORED, matches, self.ngroups + 1) + result = self.re_pattern.Match( + sp[0], + (pos + lookahead), + size, + _re2.UNANCHORED, + matches, + self.groups + 1) if result == 0: break @@ -624,18 +673,22 @@ cdef class Pattern: continue if encoded: - resultlist.append(char_to_utf8(&sp.data()[pos], match_start - pos)) + resultlist.append( + char_to_unicode(&sp.data()[pos], match_start - pos)) else: resultlist.append(sp.data()[pos:match_start]) - if self.ngroups > 0: - for group in range(self.ngroups): + if self.groups > 0: + for group in range(self.groups): if matches[group + 1].data() == NULL: resultlist.append(None) else: if encoded: - resultlist.append(char_to_utf8(matches[group + 1].data(), matches[group + 1].length())) + resultlist.append(char_to_unicode( + matches[group + 1].data(), + matches[group + 1].length())) else: - resultlist.append(matches[group + 1].data()[:matches[group + 1].length()]) + resultlist.append(matches[group + 1].data()[: + matches[group + 1].length()]) # offset the pos to move to the next point pos = match_end @@ -646,7 +699,8 @@ cdef class Pattern: break if encoded: - resultlist.append(char_to_utf8(&sp.data()[pos], sp.length() - pos)) + resultlist.append( + char_to_unicode(&sp.data()[pos], sp.length() - pos)) else: resultlist.append(sp.data()[pos:]) _re2.delete_StringPiece_array(matches) @@ -654,20 +708,18 @@ cdef class Pattern: return resultlist def sub(self, repl, string, int count=0): - """ - sub(repl, string[, count = 0]) --> newstring + """sub(repl, string[, count = 0]) --> newstring + Return the string obtained by replacing the leftmost non-overlapping - occurrences of pattern in string by the replacement repl. - """ + occurrences of pattern in string by the replacement repl.""" return self.subn(repl, string, count)[0] def subn(self, repl, string, int count=0): - """ - subn(repl, string[, count = 0]) --> (newstring, number of subs) + """subn(repl, string[, count = 0]) --> (newstring, number of subs) + Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the - replacement repl. - """ + replacement repl.""" cdef Py_ssize_t size cdef char * cstring cdef _re2.cpp_string * fixed_repl @@ -676,15 +728,14 @@ cdef class Pattern: cdef total_replacements = 0 cdef int string_encoded = 0 cdef int repl_encoded = 0 - cdef int encoded = 0 if callable(repl): # This is a callback, so let's use the custom function return self._subn_callback(repl, string, count) - string = unicode_to_bytestring(string, &string_encoded) - repl = unicode_to_bytestring(repl, &repl_encoded) - if pystring_to_bytestring(repl, &cstring, &size) == -1: + string = unicode_to_bytes(string, &string_encoded) + repl = unicode_to_bytes(repl, &repl_encoded) + if pystring_to_cstring(repl, &cstring, &size) == -1: raise TypeError("expected string or buffer") fixed_repl = NULL @@ -693,23 +744,24 @@ cdef class Pattern: cdef int c = 0 while s < end: c = s[0] - if (c == '\\'): + if (c == b'\\'): s += 1 if s == end: raise RegexError("Invalid rewrite pattern") c = s[0] - if c == '\\' or (c >= '0' and c <= '9'): + if c == b'\\' or (c >= b'0' and c <= b'9'): if fixed_repl != NULL: - fixed_repl.push_back('\\') + fixed_repl.push_back(b'\\') fixed_repl.push_back(c) else: if fixed_repl == NULL: - fixed_repl = new _re2.cpp_string(cstring, s - cstring - 1) - if c == 'n': - fixed_repl.push_back('\n') + fixed_repl = new _re2.cpp_string( + cstring, s - cstring - 1) + if c == b'n': + fixed_repl.push_back(b'\n') else: - fixed_repl.push_back('\\') - fixed_repl.push_back('\\') + fixed_repl.push_back(b'\\') + fixed_repl.push_back(b'\\') fixed_repl.push_back(c) else: if fixed_repl != NULL: @@ -723,33 +775,31 @@ cdef class Pattern: input_str = new _re2.cpp_string(string) if not count: - total_replacements = _re2.pattern_GlobalReplace(input_str, - self.re_pattern[0], - sp[0]) + total_replacements = _re2.pattern_GlobalReplace( + input_str, self.re_pattern[0], sp[0]) elif count == 1: - total_replacements = _re2.pattern_Replace(input_str, - self.re_pattern[0], - sp[0]) + total_replacements = _re2.pattern_Replace( + input_str, self.re_pattern[0], sp[0]) else: del fixed_repl del input_str del sp - raise NotImplementedError("So far pyre2 does not support custom replacement counts") + raise NotImplementedError( + "So far pyre2 does not support custom replacement counts") if string_encoded or (repl_encoded and total_replacements > 0): - result = cpp_to_utf8(input_str[0]) + result = cpp_to_unicode(input_str[0]) else: - result = cpp_to_pystring(input_str[0]) + result = cpp_to_bytes(input_str[0]) del fixed_repl del input_str del sp return (result, total_replacements) def _subn_callback(self, callback, string, int count=0): - """ - This function is probably the hardest to implement correctly. - This is my first attempt, but if anybody has a better solution, please help out. - """ + # This function is probably the hardest to implement correctly. + # This is my first attempt, but if anybody has a better solution, + # please help out. cdef Py_ssize_t size cdef int result cdef int endpos @@ -764,8 +814,8 @@ cdef class Pattern: if count < 0: count = 0 - string = unicode_to_bytestring(string, &encoded) - if pystring_to_bytestring(string, &cstring, &size) == -1: + string = unicode_to_bytes(string, &encoded) + if pystring_to_cstring(string, &cstring, &size) == -1: raise TypeError("expected string or buffer") encoded = encoded @@ -773,23 +823,31 @@ cdef class Pattern: try: while True: - m = Match(self, self.ngroups + 1) + m = Match(self, self.groups + 1) with nogil: - result = self.re_pattern.Match(sp[0], pos, size, _re2.UNANCHORED, m.matches, self.ngroups + 1) + result = self.re_pattern.Match( + sp[0], + pos, + size, + _re2.UNANCHORED, + m.matches, + self.groups + 1) if result == 0: break endpos = m.matches[0].data() - cstring if encoded: - resultlist.append(char_to_utf8(&sp.data()[pos], endpos - pos)) + resultlist.append( + char_to_unicode(&sp.data()[pos], endpos - pos)) else: resultlist.append(sp.data()[pos:endpos]) pos = endpos + m.matches[0].length() m.encoded = encoded - m.named_groups = _re2.addressof(self.re_pattern.NamedCapturingGroups()) - m.nmatches = self.ngroups + 1 - m.match_string = string + m.named_groups = _re2.addressof( + self.re_pattern.NamedCapturingGroups()) + m.nmatches = self.groups + 1 + m.string = string resultlist.append(callback(m) or '') num_repl += 1 @@ -797,11 +855,12 @@ cdef class Pattern: break if encoded: - resultlist.append(char_to_utf8(&sp.data()[pos], sp.length() - pos)) + resultlist.append( + char_to_unicode(&sp.data()[pos], sp.length() - pos)) return (u''.join(resultlist), num_repl) else: resultlist.append(sp.data()[pos:]) - return (''.join(resultlist), num_repl) + return (b''.join(resultlist), num_repl) finally: del sp @@ -811,151 +870,153 @@ _cache_repl = {} _MAXCACHE = 100 def compile(pattern, int flags=0, int max_mem=8388608): - cachekey = (type(pattern),) + (pattern, flags) - p = _cache.get(cachekey) - if p is not None: - return p + cachekey = (type(pattern), pattern, flags) + if cachekey in _cache: + return _cache[cachekey] p = _compile(pattern, flags, max_mem) if len(_cache) >= _MAXCACHE: - _cache.clear() + _cache.popitem() _cache[cachekey] = p return p -class BackreferencesException(Exception): - pass -class CharClassProblemException(Exception): - pass +WHITESPACE = b' \t\n\r\v\f' -WHITESPACE = set(" \t\n\r\v\f") -class Tokenizer: - def __init__(self, string): +cdef class Tokenizer: + cdef bytes string + cdef bytes next + cdef int length + cdef int index + + def __init__(self, bytes string): self.string = string + self.length = len(string) self.index = 0 - self.__next() - def __next(self): - if self.index >= len(self.string): + self._next() + + cdef _next(self): + cdef bytes ch + if self.index >= self.length: self.next = None return - ch = self.string[self.index] - if ch[0] == "\\": - try: - c = self.string[self.index + 1] - except IndexError: - raise RegexError, "bogus escape (end of line)" - ch = ch + c - self.index = self.index + len(ch) + ch = self.string[self.index:self.index + 1] + if ch[0:1] == b'\\': + if self.index + 2 > self.length: + raise RegexError("bogus escape (end of line)") + ch = self.string[self.index:self.index + 2] + self.index += 1 + self.index += 1 + # FIXME: return indices instead of creating new bytes objects self.next = ch - def get(self): - this = self.next - self.__next() + + cdef bytes get(self): + cdef bytes this = self.next + self._next() return this -def prepare_pattern(pattern, int flags): - source = Tokenizer(pattern) - new_pattern = [] - cdef str strflags = '' - if flags & _S: - strflags += 's' - if flags & _M: - strflags += 'm' +def prepare_pattern(object pattern, int flags): + cdef bytearray result = bytearray() + cdef bytes this + cdef Tokenizer source = Tokenizer(pattern) - if strflags: - new_pattern.append('(?' + strflags + ')') + if flags & (_S | _M): + result.extend(b'(?') + if flags & _S: + result.append(b's') + if flags & _M: + result.append(b'm') + result.append(b')') - while 1: + while True: this = source.get() if this is None: break if flags & _X: if this in WHITESPACE: continue - if this == "#": - while 1: + if this == b"#": + while True: this = source.get() - if this in (None, "\n"): + if this in (None, b'\n'): break continue - if this[0] not in '[\\': - new_pattern.append(this) + if this[0:1] != b'[' and this[0:1] != b'\\': + result.extend(this) continue - elif this == '[': - new_pattern.append(this) - while 1: + elif this == b'[': + result.extend(this) + while True: this = source.get() if this is None: - raise RegexError, "unexpected end of regular expression" - elif this == ']': - new_pattern.append(this) + raise RegexError("unexpected end of regular expression") + elif this == b']': + result.extend(this) break - elif this[0] == '\\': + elif this[0:1] == b'\\': if flags & _U: - if this[1] == 'd': - new_pattern.append(r'\p{Nd}') - elif this[1] == 'w': - new_pattern.append(r'_\p{L}\p{Nd}') - elif this[1] == 's': - new_pattern.append(r'\s\p{Z}') - elif this[1] == 'D': - new_pattern.append(r'\P{Nd}') - elif this[1] == 'W': - # Since \w and \s are made out of several character groups, - # I don't see a way to convert their complements into a group - # without rewriting the whole expression, which seems too complicated. - - raise CharClassProblemException() - elif this[1] == 'S': - raise CharClassProblemException() + if this[1:2] == b'd': + result.extend(br'\p{Nd}') + elif this[1:2] == b'w': + result.extend(br'_\p{L}\p{Nd}') + elif this[1:2] == b's': + result.extend(br'\s\p{Z}') + elif this[1:2] == b'D': + result.extend(br'\P{Nd}') + elif this[1:2] == b'W': + # Since \w and \s are made out of several character + # groups, I don't see a way to convert their + # complements into a group without rewriting the + # whole expression, which seems too complicated. + raise CharClassProblemException(repr(this)) + elif this[1:2] == b'S': + raise CharClassProblemException(repr(this)) else: - new_pattern.append(this) + result.extend(this) else: - new_pattern.append(this) + result.extend(this) else: - new_pattern.append(this) - elif this[0] == '\\': - if this[1] in '89': - raise BackreferencesException() - elif this[1] in '1234567': - if source.next and source.next in '1234567': + result.extend(this) + elif this[0:1] == b'\\': + if b'8' <= this[1:2] <= b'9': + raise BackreferencesException('%r %r' % (this, pattern)) + elif b'1' <= this[1:2] <= b'7': + if source.next and source.next in b'1234567': this += source.get() - if source.next and source.next in '1234567': + if source.next and source.next in b'1234567': # all clear, this is an octal escape - new_pattern.append(this) + result.extend(this) else: - raise BackreferencesException() + raise BackreferencesException('%r %r' % (this, pattern)) else: - raise BackreferencesException() + raise BackreferencesException('%r %r' % (this, pattern)) elif flags & _U: - if this[1] == 'd': - new_pattern.append(r'\p{Nd}') - elif this[1] == 'w': - new_pattern.append(r'[_\p{L}\p{Nd}]') - elif this[1] == 's': - new_pattern.append(r'[\s\p{Z}]') - elif this[1] == 'D': - new_pattern.append(r'[^\p{Nd}]') - elif this[1] == 'W': - new_pattern.append(r'[^_\p{L}\p{Nd}]') - elif this[1] == 'S': - new_pattern.append(r'[^\s\p{Z}]') + if this[1:2] == b'd': + result.extend(br'\p{Nd}') + elif this[1:2] == b'w': + result.extend(br'[_\p{L}\p{Nd}]') + elif this[1:2] == b's': + result.extend(br'[\s\p{Z}]') + elif this[1:2] == b'D': + result.extend(br'[^\p{Nd}]') + elif this[1:2] == b'W': + result.extend(br'[^_\p{L}\p{Nd}]') + elif this[1:2] == b'S': + result.extend(br'[^\s\p{Z}]') else: - new_pattern.append(this) + result.extend(this) else: - new_pattern.append(this) - - return ''.join(new_pattern) + result.extend(this) + return result -def _compile(pattern, int flags=0, int max_mem=8388608): - """ - Compile a regular expression pattern, returning a pattern object. - """ +def _compile(object pattern, int flags=0, int max_mem=8388608): + """Compile a regular expression pattern, returning a pattern object.""" cdef char * string cdef Py_ssize_t length cdef _re2.StringPiece * s @@ -965,12 +1026,14 @@ def _compile(pattern, int flags=0, int max_mem=8388608): if isinstance(pattern, (Pattern, SREPattern)): if flags: - raise ValueError('Cannot process flags argument with a compiled pattern') + raise ValueError( + 'Cannot process flags argument with a compiled pattern') return pattern cdef object original_pattern = pattern + pattern = unicode_to_bytes(pattern, &encoded) try: - pattern = prepare_pattern(original_pattern, flags) + pattern = prepare_pattern(pattern, flags) except BackreferencesException: error_msg = "Backreferences not supported" if current_notification == FALLBACK_EXCEPTION: @@ -997,11 +1060,8 @@ def _compile(pattern, int flags=0, int max_mem=8388608): opts.set_encoding(_re2.EncodingUTF8) # We use this function to get the proper length of the string. - - pattern = unicode_to_bytestring(pattern, &encoded) - if pystring_to_bytestring(pattern, &string, &length) == -1: + if pystring_to_cstring(pattern, &string, &length) == -1: raise TypeError("first argument must be a string or compiled pattern") - s = new _re2.StringPiece(string, length) cdef _re2.RE2 *re_pattern @@ -1011,7 +1071,7 @@ def _compile(pattern, int flags=0, int max_mem=8388608): if not re_pattern.ok(): # Something went wrong with the compilation. del s - error_msg = cpp_to_pystring(re_pattern.error()) + error_msg = cpp_to_bytes(re_pattern.error()) error_code = re_pattern.error_code() del re_pattern if current_notification == FALLBACK_EXCEPTION: @@ -1029,80 +1089,75 @@ def _compile(pattern, int flags=0, int max_mem=8388608): cdef Pattern pypattern = Pattern() pypattern.pattern = original_pattern pypattern.re_pattern = re_pattern - pypattern.ngroups = re_pattern.NumberOfCapturingGroups() + pypattern.groups = re_pattern.NumberOfCapturingGroups() pypattern.encoded = encoded - pypattern._flags = flags + pypattern.flags = flags del s return pypattern + def search(pattern, string, int flags=0): - """ - Scan through string looking for a match to the pattern, returning - a match object or none if no match was found. - """ + """Scan through string looking for a match to the pattern, returning + a match object or none if no match was found.""" return compile(pattern, flags).search(string) + def match(pattern, string, int flags=0): - """ - Try to apply the pattern at the start of the string, returning - a match object, or None if no match was found. - """ + """Try to apply the pattern at the start of the string, returning + a match object, or None if no match was found.""" return compile(pattern, flags).match(string) + def finditer(pattern, string, int flags=0): - """ - Return an list of all non-overlapping matches in the + """Return an list of all non-overlapping matches in the string. For each match, the iterator returns a match object. - Empty matches are included in the result. - """ + Empty matches are included in the result.""" return compile(pattern, flags).finditer(string) + def findall(pattern, string, int flags=0): - """ - Return an list of all non-overlapping matches in the + """Return an list of all non-overlapping matches in the string. For each match, the iterator returns a match object. - Empty matches are included in the result. - """ + Empty matches are included in the result.""" return compile(pattern, flags).findall(string) + def split(pattern, string, int maxsplit=0): - """ - Split the source string by the occurrences of the pattern, - returning a list containing the resulting substrings. - """ + """Split the source string by the occurrences of the pattern, + returning a list containing the resulting substrings.""" return compile(pattern).split(string, maxsplit) + def sub(pattern, repl, string, int count=0): - """ - Return the string obtained by replacing the leftmost + """Return the string obtained by replacing the leftmost non-overlapping occurrences of the pattern in string by the replacement repl. repl can be either a string or a callable; if a string, backslash escapes in it are processed. If it is a callable, it's passed the match object and must return - a replacement string to be used. - """ + a replacement string to be used.""" return compile(pattern).sub(repl, string, count) + def subn(pattern, repl, string, int count=0): - """ - Return a 2-tuple containing (new_string, number). + """Return a 2-tuple containing (new_string, number). new_string is the string obtained by replacing the leftmost non-overlapping occurrences of the pattern in the source string by the replacement repl. number is the number of substitutions that were made. repl can be either a string or a callable; if a string, backslash escapes in it are processed. If it is a callable, it's passed the match object and must - return a replacement string to be used. - """ + return a replacement string to be used.""" return compile(pattern).subn(repl, string, count) + _alphanum = {} for c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890': _alphanum[c] = 1 del c + def escape(pattern): "Escape all non-alphanumeric characters in pattern." s = list(pattern) diff --git a/tests/findall.txt b/tests/findall.txt index 630a14d5..58342b61 100644 --- a/tests/findall.txt +++ b/tests/findall.txt @@ -2,6 +2,7 @@ findall tests ============= >>> import re2 + >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) This one is from http://docs.python.org/library/re.html?#finding-all-adverbs: diff --git a/tests/finditer.txt b/tests/finditer.txt old mode 100755 new mode 100644 index d171c92c..1fa4cc44 --- a/tests/finditer.txt +++ b/tests/finditer.txt @@ -1,14 +1,16 @@ Simple tests for the ``finditer`` function. =========================================== - >>> import re2 as re + >>> import re2 + >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) - >>> len(list(re.finditer(r'\w+', open("cnn_homepage.dat").read()))) + >>> len(list(re2.finditer(r'\w+', open("cnn_homepage.dat").read()))) 14230 - >>> [m.group(1) for m in re.finditer(r'\n#hdr-editions(.*?)\n', open("cnn_homepage.dat").read())] + >>> [m.group(1) for m in re2.finditer(r'\n#hdr-editions(.*?)\n', open("cnn_homepage.dat").read())] [' a { text-decoration:none; }', ' li { padding:0 10px; }', ' ul li.no-pad-left span { font-size:12px; }'] - >>> [m.group(1) for m in re.finditer(r'^#hdr-editions(.*?)$', open("cnn_homepage.dat").read(), re.M)] + >>> [m.group(1) for m in re2.finditer(r'^#hdr-editions(.*?)$', + ... open("cnn_homepage.dat").read(), re2.M)] [' a { text-decoration:none; }', ' li { padding:0 10px; }', ' ul li.no-pad-left span { font-size:12px; }'] diff --git a/tests/issue4.txt b/tests/issue4.txt index 34984d5a..29f4bb41 100644 --- a/tests/issue4.txt +++ b/tests/issue4.txt @@ -1,12 +1,25 @@ issue #4 ======== + >>> import re >>> import re2 - >>> TERM_SPEC2 = re2.compile('([\W\d_]*)(([^\W\d_]*[-\.]*)*[^\W\d_])([\W\d_]*[^\W\d_]*)', re2.UNICODE) + >>> re2.set_fallback_notification(re2.FALLBACK_WARNING) + >>> regex = '([\W\d_]*)(([^\W\d_]*[-\.]*)*[^\W\d_])([\W\d_]*[^\W\d_]*)' + >>> TERM_SPEC = re.compile(regex, re.UNICODE) + >>> TERM_SPEC2 = re2.compile(regex, re2.UNICODE) + >>> TERM_SPEC.search("a").groups() + ('', 'a', '', '') >>> TERM_SPEC2.search("a").groups() ('', 'a', '', '') - -Still broken because of unicode: >>> TERM_SPEC2.search(u"Hello").groups() - (u'', u'Hello', u'Hell', u',') + (u'', u'Hello', u'Hell', u'') + >>> TERM_SPEC.search(u"Hello").groups() + (u'', u'Hello', u'Hell', u'') + + >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) + >>> regex = '(foo)?((.*).)(bar)?' + >>> re.search(regex, "a", flags=re.UNICODE).groups() + (None, 'a', '', None) + >>> re2.search(regex, "a", flags=re.UNICODE).groups() + (None, 'a', '', None) diff --git a/tests/match_expand.txt b/tests/match_expand.txt index 624ee240..9225658b 100644 --- a/tests/match_expand.txt +++ b/tests/match_expand.txt @@ -4,14 +4,15 @@ Match Expand Tests Match objects have an .expand() method which allows them to expand templates as if the .sub() method was called on the pattern. - >>> import re2 as re - >>> m = re.match(r"(\w+) (\w+)\W+(?P\w+)", "Isaac Newton, physicist") + >>> import re2 + >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) + >>> m = re2.match(r"(\w+) (\w+)\W+(?P<title>\w+)", "Isaac Newton, physicist") >>> m.expand(r"\2, \1") 'Newton, Isaac' >>> m.expand(r"\1 \g<title>") 'Isaac physicist' - >>> m.expand(r"\0 \1 \2") - '\x00 Isaac Newton' + >>> m.expand(r"\2, \1 \2") + 'Newton, Isaac Newton' >>> m.expand(r"\3") 'physicist' diff --git a/tests/mmap.txt b/tests/mmap.txt new file mode 100644 index 00000000..5c61cbee --- /dev/null +++ b/tests/mmap.txt @@ -0,0 +1,17 @@ + +Testing re2 on buffer object +============================ + + >>> import re2 + >>> import mmap + >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) + + >>> tmp = open("cnn_homepage.dat", "r+b") + >>> data = mmap.mmap(tmp.fileno(), 0) + + >>> len(list(re2.finditer(r'\w+', data))) + 14230 + + >>> data.close() + >>> tmp.close() + diff --git a/tests/namedgroups.txt b/tests/namedgroups.txt index 46175de5..707351d0 100644 --- a/tests/namedgroups.txt +++ b/tests/namedgroups.txt @@ -1,9 +1,10 @@ Testing some aspects of named groups ================================================= - >>> import re2 as re + >>> import re2 + >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) - >>> m = re.match(r"(?P<first_name>\w+) (?P<last_name>\w+)", "Malcolm Reynolds") + >>> m = re2.match(r"(?P<first_name>\w+) (?P<last_name>\w+)", "Malcolm Reynolds") >>> m.start("first_name") 0 >>> m.start("last_name") @@ -14,9 +15,19 @@ Testing some aspects of named groups >>> m.regs ((0, 16), (0, 7), (8, 16)) +Compare patterns with and without unicode + + >>> re2.compile(r"(?P<first_name>\w+) (?P<last_name>\w+)")._print_pattern() + (?P<first_name>\w+) (?P<last_name>\w+) + >>> pattern = re2.compile(u"(?P<first_name>\\w+) (?P<last_name>\\w+)", + ... re2.UNICODE) + >>> pattern._print_pattern() + (?P<first_name>[_\p{L}\p{Nd}]+) (?P<last_name>[_\p{L}\p{Nd}]+) + Make sure positions are converted properly for unicode - >>> m = re.match(r"(?P<first_name>\w+) (?P<last_name>\w+)", u'\u05d9\u05e9\u05e8\u05d0\u05dc \u05e6\u05d3\u05d5\u05e7', re.UNICODE) + >>> m = pattern.match( + ... u'\u05d9\u05e9\u05e8\u05d0\u05dc \u05e6\u05d3\u05d5\u05e7') >>> m.start("first_name") 0 >>> m.start("last_name") diff --git a/tests/pattern.txt b/tests/pattern.txt index 19b8a2f1..0e21d71b 100644 --- a/tests/pattern.txt +++ b/tests/pattern.txt @@ -2,6 +2,7 @@ pattern tests ============= >>> import re2 + >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) We should be able to get back what we put in. diff --git a/tests/performance.py b/tests/performance.py index a45f4cd1..a944ee7c 100755 --- a/tests/performance.py +++ b/tests/performance.py @@ -94,9 +94,10 @@ def benchmarks_to_ReST(benchmarks): col_sizes[col] = max(len(row[col]) for row in table) def print_divider(symbol='-'): - print '+' + '+'.join(symbol*col_size for col_size in col_sizes) + '+' + print('+' + '+'.join(symbol*col_size for col_size in col_sizes) + '+') def print_row(row): - print '|' + '|'.join(item.ljust(col_sizes[i]) for i, item in enumerate(row)) + '|' + print('|' + '|'.join(item.ljust(col_sizes[i]) for i, item in + enumerate(row)) + '|') print_divider() print_row(table[0]) @@ -132,7 +133,7 @@ def decorator(method): def getwikidata(): global _wikidata if _wikidata is None: - _wikidata = gzip.open('wikipages.xml.gz').read() + _wikidata = gzip.open('wikipages.xml.gz', 'rb').read() return _wikidata @@ -184,7 +185,7 @@ def split_pages(pattern, data): def getweblogdata(): - return open(os.path.join(os.path.dirname(__file__), 'access.log')) + return open(os.path.join(os.path.dirname(__file__), 'access.log'), 'rb') @register_test("weblog scan", #r'^(\S+) (\S+) (\S+) \[(\d{1,2})/(\w{3})/(\d{4}):(\d{2}):(\d{2}):(\d{2}) -(\d{4})\] "(\S+) (\S+) (\S+)" (\d+) (\d+|-) "([^"]+)" "([^"]+)"\n', diff --git a/tests/re2_test.py b/tests/re2_test.py index 2eb793e1..7a2d69a6 100755 --- a/tests/re2_test.py +++ b/tests/re2_test.py @@ -10,7 +10,7 @@ def testall(): for file in glob.glob(os.path.join(os.path.dirname(__file__), "*.txt")): - print "Testing %s..." % file + print("Testing %s..." % file) doctest.testfile(os.path.join(".", os.path.basename(file))) if __name__ == "__main__": diff --git a/tests/search.txt b/tests/search.txt index f167e151..311625b1 100644 --- a/tests/search.txt +++ b/tests/search.txt @@ -1,22 +1,23 @@ These are simple tests of the ``search`` function ================================================= - >>> import re2 as re - >>> re.search("((?:[01]?\d{1,2}|2[0-4]\d|25[0-5])\.){3}(?:[01]?\d{1,2}|2[0-4]\d|25[0-5])", "hello 28.224.2.1 test").group() + >>> import re2 + >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) + >>> re2.search("((?:[01]?\d{1,2}|2[0-4]\d|25[0-5])\.){3}(?:[01]?\d{1,2}|2[0-4]\d|25[0-5])", "hello 28.224.2.1 test").group() '28.224.2.1' - >>> re.search("(\d{3})\D?(\d{3})\D?(\d{4})", "800-555-1212").groups() + >>> re2.search("(\d{3})\D?(\d{3})\D?(\d{4})", "800-555-1212").groups() ('800', '555', '1212') >>> input = 'a' * 999 - >>> len(re.search('(?:a{1000})?a{999}', input).group()) + >>> len(re2.search('(?:a{1000})?a{999}', input).group()) 999 - >>> re.search(r'\n#hdr-editions(.*?)\n', open("cnn_homepage.dat").read()).groups() + >>> re2.search(r'\n#hdr-editions(.*?)\n', open("cnn_homepage.dat").read()).groups() (' a { text-decoration:none; }',) Verify some sanity checks - >>> re.compile(r'x').search('x', 2000) - >>> re.compile(r'x').search('x', 1, -300) + >>> re2.compile(r'x').search('x', 2000) + >>> re2.compile(r'x').search('x', 1, -300) diff --git a/tests/split.txt b/tests/split.txt index 7493e66a..d6ceed31 100644 --- a/tests/split.txt +++ b/tests/split.txt @@ -1,9 +1,15 @@ Split tests =========== -This one tests to make sure that utf8 data is parsed correctly. +This one tests to make sure that unicode / utf8 data is parsed correctly. + + >>> import re2 + >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) + >>> a = u'我很好, 你呢?' + >>> re2.split(u' ', a) == [u'\u6211\u5f88\u597d,', u'\u4f60\u5462?'] + True + >>> re2.split(b' ', a.encode('utf8')) == [ + ... b'\xe6\x88\x91\xe5\xbe\x88\xe5\xa5\xbd,', + ... b'\xe4\xbd\xa0\xe5\x91\xa2?'] + True - >>> import re2 as re - >>> a = '我很好, 你呢?'.decode('utf8') - >>> print re.split(' ', a) - [u'\u6211\u5f88\u597d,', u'\u4f60\u5462?'] diff --git a/tests/sub.txt b/tests/sub.txt index 7c4460be..ca1349ea 100644 --- a/tests/sub.txt +++ b/tests/sub.txt @@ -5,9 +5,10 @@ This first test is just looking to replace things between parentheses with an empty string. - >>> import re2 as re + >>> import re2 + >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) >>> import hashlib >>> import gzip - >>> data = gzip.open('wikipages.xml.gz').read() - >>> print hashlib.md5(re.sub('\(.*?\)', '', data)).hexdigest() + >>> data = gzip.open('wikipages.xml.gz', 'rb').read() + >>> print(hashlib.md5(re2.sub(b'\(.*?\)', b'', data)).hexdigest()) b7a469f55ab76cd5887c81dbb0cfe6d3 diff --git a/tests/test_re.py b/tests/test_re.py index bb8f6547..ffe78198 100644 --- a/tests/test_re.py +++ b/tests/test_re.py @@ -1,3 +1,4 @@ +from __future import print_function from test.test_support import verbose, run_unittest, import_module import re2 as re from re import Scanner @@ -695,7 +696,7 @@ def test_dealloc(self): def run_re_tests(): from re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR if verbose: - print 'Running re_tests test suite' + print('Running re_tests test suite') else: # To save time, only run the first and last 10 tests #tests = tests[:10] + tests[-10:] @@ -716,23 +717,23 @@ def run_re_tests(): except re.error: if outcome == SYNTAX_ERROR: pass # Expected a syntax error else: - print '=== Syntax error:', t + print('=== Syntax error:', t) except KeyboardInterrupt: raise KeyboardInterrupt except: - print '*** Unexpected error ***', t + print('*** Unexpected error ***', t) if verbose: traceback.print_exc(file=sys.stdout) else: try: result = obj.search(s) except re.error, msg: - print '=== Unexpected exception', t, repr(msg) + print('=== Unexpected exception', t, repr(msg)) if outcome == SYNTAX_ERROR: # This should have been a syntax error; forget it. pass elif outcome == FAIL: if result is None: pass # No match, as expected - else: print '=== Succeeded incorrectly', t + else: print('=== Succeeded incorrectly', t) elif outcome == SUCCEED: if result is not None: # Matched, as expected, so now we compute the @@ -760,17 +761,17 @@ def run_re_tests(): vardict[i] = gi repl = eval(repl, vardict) if repl != expected: - print '=== grouping error', t, - print repr(repl) + ' should be ' + repr(expected) + print('=== grouping error', t, end=' ') + print(repr(repl) + ' should be ' + repr(expected)) else: - print '=== Failed incorrectly', t + print('=== Failed incorrectly', t) # Try the match on a unicode string, and check that it # still succeeds. try: result = obj.search(unicode(s, "latin-1")) if result is None: - print '=== Fails on unicode match', t + print('=== Fails on unicode match', t) except NameError: continue # 1.5.2 except TypeError: @@ -781,7 +782,7 @@ def run_re_tests(): obj=re.compile(unicode(pattern, "latin-1")) result = obj.search(s) if result is None: - print '=== Fails on unicode pattern match', t + print('=== Fails on unicode pattern match', t) # Try the match with the search area limited to the extent # of the match and see if it still succeeds. \B will @@ -793,28 +794,28 @@ def run_re_tests(): obj = re.compile(pattern) result = obj.search(s, result.start(0), result.end(0) + 1) if result is None: - print '=== Failed on range-limited match', t + print('=== Failed on range-limited match', t) # Try the match with IGNORECASE enabled, and check that it # still succeeds. obj = re.compile(pattern, re.IGNORECASE) result = obj.search(s) if result is None: - print '=== Fails on case-insensitive match', t + print('=== Fails on case-insensitive match', t) # Try the match with LOCALE enabled, and check that it # still succeeds. obj = re.compile(pattern, re.LOCALE) result = obj.search(s) if result is None: - print '=== Fails on locale-sensitive match', t + print('=== Fails on locale-sensitive match', t) # Try the match with UNICODE locale enabled, and check # that it still succeeds. obj = re.compile(pattern, re.UNICODE) result = obj.search(s) if result is None: - print '=== Fails on unicode-sensitive match', t + print('=== Fails on unicode-sensitive match', t) def test_main(): run_unittest(ReTests) diff --git a/tests/unicode.txt b/tests/unicode.txt index 0d526b0d..71ba46ba 100644 --- a/tests/unicode.txt +++ b/tests/unicode.txt @@ -2,68 +2,67 @@ Here are some tests to make sure that utf-8 works ================================================= >>> import re2 as re + >>> re.set_fallback_notification(re.FALLBACK_EXCEPTION) >>> a = u'\u6211\u5f88\u597d' >>> c = re.compile(a[0]) - >>> c.search(a).group() - u'\u6211' + >>> c.search(a).group() == u'\u6211' + True Test unicode stickyness - >>> re.sub(r'x', u'y', 'x') - u'y' - >>> re.sub(r'x', 'y', u'x') - u'y' - >>> re.sub(ur'x', 'y', 'x') - 'y' - >>> re.findall(ur'.', 'x') - ['x'] - >>> re.findall(ur'.', u'x') - [u'x'] - >>> re.split(ur',', '1,2,3') - ['1', '2', '3'] - >>> re.split(ur',', u'1,2,3') - [u'1', u'2', u'3'] - >>> re.search(ur'(\d)', '1').group(1) - '1' - >>> re.search(ur'(\d)', u'1').group(1) - u'1' + >>> re.sub(u'x', u'y', u'x') == u'y' + True + >>> re.sub(r'x', 'y', 'x') == 'y' + True + >>> re.findall('.', 'x') == ['x'] + True + >>> re.findall(u'.', u'x') == [u'x'] + True + >>> re.split(',', '1,2,3') == ['1', '2', '3'] + True + >>> re.split(u',', u'1,2,3') == [u'1', u'2', u'3'] + True + >>> re.search('(\\d)', '1').group(1) == '1' + True + >>> re.search(u'(\\d)', u'1').group(1) == u'1' + True Test unicode character groups - >>> re.search(r'\d', u'\u0661', re.UNICODE).group(0) - u'\u0661' - >>> int(re.search(r'\d', u'\u0661', re.UNICODE).group(0)) - 1 - >>> re.search(r'\w', u'\u0401') - >>> re.search(r'\w', u'\u0401', re.UNICODE).group(0) - u'\u0401' - >>> re.search(r'\s', u'\u1680', re.UNICODE).group(0) - u'\u1680' - >>> re.findall(r'[\s\d\w]', 'hey 123', re.UNICODE) - ['h', 'e', 'y', ' ', '1', '2', '3'] - >>> re.search(r'\D', u'\u0661x', re.UNICODE).group(0) - u'x' - >>> re.search(r'\W', u'\u0401!', re.UNICODE).group(0) - u'!' - >>> re.search(r'\S', u'\u1680x', re.UNICODE).group(0) - u'x' - >>> re.search(r'[\D]', u'\u0661x', re.UNICODE).group(0) - u'x' - >>> re.search(r'[\W]', u'\u0401!', re.UNICODE).group(0) - u'!' - >>> re.search(r'[\S]', u'\u1680x', re.UNICODE).group(0) - u'x' + >>> re.search(u'\\d', u'\u0661', re.UNICODE).group(0) == u'\u0661' + True + >>> int(re.search(u'\\d', u'\u0661', re.UNICODE).group(0)) == 1 + True + >>> re.search(u'\\w', u'\u0401') + >>> re.search(u'\\w', u'\u0401', re.UNICODE).group(0) == u'\u0401' + True + >>> re.search(u'\\s', u'\u1680', re.UNICODE).group(0) == u'\u1680' + True + >>> re.findall(r'[\s\d\w]', 'hey 123', re.UNICODE) == ['h', 'e', 'y', ' ', '1', '2', '3'] + True + >>> re.search(u'\\D', u'\u0661x', re.UNICODE).group(0) == u'x' + True + >>> re.search(u'\\W', u'\u0401!', re.UNICODE).group(0) == u'!' + True + >>> re.search(u'\\S', u'\u1680x', re.UNICODE).group(0) == u'x' + True + >>> re.set_fallback_notification(re.FALLBACK_WARNING) + >>> re.search(u'[\\W]', u'\u0401!', re.UNICODE).group(0) == u'!' + True + >>> re.search(u'[\\S]', u'\u1680x', re.UNICODE).group(0) == u'x' + True + >>> re.set_fallback_notification(re.FALLBACK_EXCEPTION) Group positions need to be fixed with unicode - >>> re.search(r' (.)', u'\U0001d200xxx\u1234 x').span(1) + >>> re.search(u' (.)', u'\U0001d200xxx\u1234 x').span(1) (6, 7) - >>> re.search(r' (.)', u'\U0001d200xxx\u1234 x'.encode('utf-8')).span(1) + >>> re.search(u' (.)', u'\U0001d200xxx\u1234 x'.encode('utf-8')).span(1) (11, 12) Pos and endpos also need to be corrected - >>> re.compile(r'x').findall(u'\u1234x', 1, 2) - [u'x'] + >>> re.compile(u'x').findall(u'\u1234x', 1, 2) == [u'x'] # fix pos and endpos. + True From 0d0aeae499d1dd7f43c0043b9764cbbd3fd184c4 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Sat, 1 Aug 2015 23:14:47 +0200 Subject: [PATCH 003/114] minor changes --- src/re2.pyx | 25 ++++++++----------------- tests/namedgroups.txt | 2 +- tests/unicode.txt | 4 ++-- 3 files changed, 11 insertions(+), 20 deletions(-) diff --git a/src/re2.pyx b/src/re2.pyx index d8bbeaaf..bc89baae 100644 --- a/src/re2.pyx +++ b/src/re2.pyx @@ -19,10 +19,14 @@ On the other hand, unicode character classes are supported. Syntax reference: https://github.com/google/re2/wiki/Syntax """ -# Import re flags to be compatible. import sys import re +import warnings +cimport _re2 +cimport cpython.unicode +from cython.operator cimport preincrement as inc, dereference as deref +# Import re flags to be compatible. I = re.I M = re.M S = re.S @@ -36,19 +40,14 @@ UNICODE = re.UNICODE VERBOSE = re.VERBOSE LOCALE = re.LOCALE -cdef int _I = re.I -cdef int _M = re.M -cdef int _S = re.S -cdef int _U = re.U -cdef int _X = re.X -cdef int _L = re.L - FALLBACK_QUIETLY = 0 FALLBACK_WARNING = 1 FALLBACK_EXCEPTION = 2 VERSION = (0, 2, 23) VERSION_HEX = 0x000217 + +cdef int _I = I, _M = M, _S = S, _U = U, _X = X, _L = L cdef int current_notification = FALLBACK_QUIETLY # Type of compiled re object from Python stdlib @@ -85,14 +84,6 @@ def set_fallback_notification(level): current_notification = level - - -cimport _re2 -cimport cpython.unicode -from cython.operator cimport preincrement as inc, dereference as deref -import warnings - - cdef bytes cpp_to_bytes(_re2.cpp_string input): """Convert from a std::string object to a python string.""" # By taking the slice we go to the right size, @@ -113,7 +104,7 @@ cdef inline unicode char_to_unicode(_re2.const_char_ptr input, int length): cdef inline unicode_to_bytes(object pystring, int * encoded): """Convert a unicode string to a utf8 bytes object, if necessary. - + If pystring is a bytes string or a buffer, return unchanged.""" if cpython.unicode.PyUnicode_Check(pystring): pystring = cpython.unicode.PyUnicode_EncodeUTF8( diff --git a/tests/namedgroups.txt b/tests/namedgroups.txt index 707351d0..59199bd6 100644 --- a/tests/namedgroups.txt +++ b/tests/namedgroups.txt @@ -24,7 +24,7 @@ Compare patterns with and without unicode >>> pattern._print_pattern() (?P<first_name>[_\p{L}\p{Nd}]+) (?P<last_name>[_\p{L}\p{Nd}]+) -Make sure positions are converted properly for unicode +Make sure positions are converted properly for unicode >>> m = pattern.match( ... u'\u05d9\u05e9\u05e8\u05d0\u05dc \u05e6\u05d3\u05d5\u05e7') diff --git a/tests/unicode.txt b/tests/unicode.txt index 71ba46ba..2433aefe 100644 --- a/tests/unicode.txt +++ b/tests/unicode.txt @@ -56,9 +56,9 @@ Test unicode character groups Group positions need to be fixed with unicode - >>> re.search(u' (.)', u'\U0001d200xxx\u1234 x').span(1) + >>> re.search(u' (.)', u'\U0001d200xxx\u1234 x').span(1) (6, 7) - >>> re.search(u' (.)', u'\U0001d200xxx\u1234 x'.encode('utf-8')).span(1) + >>> re.search(u' (.)', u'\U0001d200xxx\u1234 x'.encode('utf-8')).span(1) (11, 12) Pos and endpos also need to be corrected From 5c60555341d67083b0019952f5886a36f08372bb Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Sun, 2 Aug 2015 18:19:34 +0200 Subject: [PATCH 004/114] re-organize code --- .gitignore | 3 +- src/#clib.pxd# | 1 - src/match.pxi | 293 +++++++++++++++ src/pattern.pxi | 656 ++++++++++++++++++++++++++++++++ src/re2.pyx | 973 +----------------------------------------------- 5 files changed, 959 insertions(+), 967 deletions(-) delete mode 100644 src/#clib.pxd# create mode 100644 src/match.pxi create mode 100644 src/pattern.pxi diff --git a/.gitignore b/.gitignore index eae3beb2..7a33e695 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,8 @@ MANIFEST /dist src/re2.html src/re2.so -re2.so +src/re2.cpp +tests/access.log *~ *.pyc *.swp diff --git a/src/#clib.pxd# b/src/#clib.pxd# deleted file mode 100644 index 8b137891..00000000 --- a/src/#clib.pxd# +++ /dev/null @@ -1 +0,0 @@ - diff --git a/src/match.pxi b/src/match.pxi new file mode 100644 index 00000000..ffd93084 --- /dev/null +++ b/src/match.pxi @@ -0,0 +1,293 @@ + +cdef class Match: + cdef readonly Pattern re + cdef readonly object string + cdef readonly int pos + cdef readonly int endpos + + cdef _re2.StringPiece * matches + cdef _re2.const_stringintmap * named_groups + cdef bint encoded + cdef int nmatches + cdef int _lastindex + cdef tuple _groups + cdef tuple _spans + cdef dict _named_groups + cdef dict _named_indexes + + def __init__(self, Pattern pattern_object, int num_groups): + self._lastindex = -1 + self._groups = None + self.pos = 0 + self.endpos = -1 + self.matches = _re2.new_StringPiece_array(num_groups + 1) + self.nmatches = num_groups + self.re = pattern_object + + def __dealloc__(self): + _re2.delete_StringPiece_array(self.matches) + + def __repr__(self): + return '<re2.Match object; span=%r, match=%r>' % ( + (self.pos, self.endpos), self.string) + + cdef init_groups(self): + cdef list groups = [] + cdef int i + + if self._groups is not None: + return + + cdef _re2.const_char_ptr last_end = NULL + cdef _re2.const_char_ptr cur_end = NULL + + for i in range(self.nmatches): + if self.matches[i].data() == NULL: + groups.append(None) + else: + if i > 0: + cur_end = self.matches[i].data() + self.matches[i].length() + + if last_end == NULL: + last_end = cur_end + self._lastindex = i + else: + # The rules for last group are a bit complicated: + # if two groups end at the same point, the earlier one + # is considered last, so we don't switch our selection + # unless the end point has moved. + if cur_end > last_end: + last_end = cur_end + self._lastindex = i + groups.append( + self.matches[i].data()[:self.matches[i].length()]) + self._groups = tuple(groups) + + def groups(self, default=None): + self.init_groups() + if self.encoded: + return tuple([ + g.decode('utf8') if g else default + for g in self._groups[1:]]) + if default is not None: + return tuple([g or default for g in self._groups[1:]]) + return self._groups[1:] + + def group(self, *args): + if len(args) == 0: + groupnum = 0 + elif len(args) == 1: + groupnum = args[0] + else: # len(args) > 1: + return tuple([self.group(i) for i in args]) + if self.encoded: + return self._group(groupnum).decode('utf8') + return self._group(groupnum) + + cdef bytes _group(self, object groupnum): + cdef int idx + self.init_groups() + if isinstance(groupnum, int): + idx = groupnum + if idx > self.nmatches - 1: + raise IndexError("no such group %d; available groups: %r" + % (idx, list(range(self.nmatches)))) + return self._groups[idx] + groupdict = self._groupdict() + if groupnum not in groupdict: + raise IndexError("no such group %r; available groups: %r" + % (groupnum, list(groupdict.keys()))) + return groupdict[groupnum] + + cdef list _convert_positions(self, positions): + cdef char * s + cdef int cpos = 0 + cdef int upos = 0 + cdef Py_ssize_t size + cdef int c + if pystring_to_cstring(self.string, &s, &size) == -1: + raise TypeError("expected string or buffer") + + new_positions = [] + i = 0 + num_positions = len(positions) + if positions[i] == -1: + new_positions.append(-1) + inc(i) + if i == num_positions: + return new_positions + if positions[i] == 0: + new_positions.append(0) + inc(i) + if i == num_positions: + return new_positions + + while cpos < size: + c = <unsigned char>s[cpos] + if c < 0x80: + inc(cpos) + inc(upos) + elif c < 0xe0: + cpos += 2 + inc(upos) + elif c < 0xf0: + cpos += 3 + inc(upos) + else: + cpos += 4 + inc(upos) + # wide unicode chars get 2 unichars when python is compiled + # with --enable-unicode=ucs2 + # TODO: verify this + emit_ifndef_py_unicode_wide() + inc(upos) + emit_endif() + + if positions[i] == cpos: + new_positions.append(upos) + inc(i) + if i == num_positions: + return new_positions + + def _convert_spans(self, spans): + positions = [x for x, _ in spans] + [y for _, y in spans] + positions = sorted(set(positions)) + posdict = dict(zip(positions, self._convert_positions(positions))) + + return [(posdict[x], posdict[y]) for x, y in spans] + + + cdef _make_spans(self): + if self._spans is not None: + return + + cdef int start, end + cdef char * s + cdef Py_ssize_t size + cdef _re2.StringPiece * piece + if pystring_to_cstring(self.string, &s, &size) == -1: + raise TypeError("expected string or buffer") + + spans = [] + for i in range(self.nmatches): + if self.matches[i].data() == NULL: + spans.append((-1, -1)) + else: + piece = &self.matches[i] + if piece.data() == NULL: + return (-1, -1) + start = piece.data() - s + end = start + piece.length() + spans.append((start, end)) + + if self.encoded: + spans = self._convert_spans(spans) + + self._spans = tuple(spans) + + def expand(self, object template): + """Expand a template with groups.""" + # TODO - This can be optimized to work a bit faster in C. + if isinstance(template, unicode): + template = template.encode('utf8') + items = template.split(b'\\') + for i, item in enumerate(items[1:]): + if item[0:1].isdigit(): + # Number group + if item[0] == b'0': + items[i + 1] = b'\x00' + item[1:] # ??? + else: + items[i + 1] = self._group(int(item[0:1])) + item[1:] + elif item[:2] == b'g<' and b'>' in item: + # This is a named group + name, rest = item[2:].split(b'>', 1) + items[i + 1] = self._group(name) + rest + else: + # This isn't a template at all + items[i + 1] = b'\\' + item + if self.encoded: + return b''.join(items).decode('utf8') + return b''.join(items) + + cdef dict _groupdict(self): + cdef _re2.stringintmapiterator it + cdef dict result = {} + cdef dict indexes = {} + + self.init_groups() + + if self._named_groups: + return self._named_groups + + self._named_groups = result + it = self.named_groups.begin() + while it != self.named_groups.end(): + indexes[cpp_to_bytes(deref(it).first)] = deref(it).second + result[cpp_to_bytes(deref(it).first)] = self._groups[ + deref(it).second] + inc(it) + + self._named_groups = result + self._named_indexes = indexes + return result + + def groupdict(self): + result = self._groupdict() + if self.encoded: + return {a.decode('utf8') if isinstance(a, bytes) else a: + b.decode('utf8') for a, b in result.items()} + return result + + def end(self, group=0): + return self.span(group)[1] + + def start(self, group=0): + return self.span(group)[0] + + def span(self, group=0): + self._make_spans() + if isinstance(group, int): + if group > len(self._spans): + raise IndexError("no such group %d; available groups: %r" + % (group, list(range(len(self._spans))))) + return self._spans[group] + else: + self._groupdict() + if self.encoded: + group = group.encode('utf8') + if group not in self._named_indexes: + raise IndexError("no such group %r; available groups: %r" + % (group, list(self._named_indexes))) + return self._spans[self._named_indexes[group]] + + property regs: + def __get__(self): + if self._spans is None: + self._make_spans() + return self._spans + + property lastindex: + def __get__(self): + self.init_groups() + if self._lastindex < 1: + return None + else: + return self._lastindex + + property lastgroup: + def __get__(self): + self.init_groups() + cdef _re2.stringintmapiterator it + + if self._lastindex < 1: + return None + + it = self.named_groups.begin() + while it != self.named_groups.end(): + if deref(it).second == self._lastindex: + return cpp_to_bytes(deref(it).first) + inc(it) + + return None + + diff --git a/src/pattern.pxi b/src/pattern.pxi new file mode 100644 index 00000000..f8267cf0 --- /dev/null +++ b/src/pattern.pxi @@ -0,0 +1,656 @@ + + +cdef class Pattern: + cdef readonly int flags + cdef readonly int groups + cdef readonly object pattern + + cdef _re2.RE2 * re_pattern + cdef bint encoded + cdef object __weakref__ + + def __dealloc__(self): + del self.re_pattern + + def __repr__(self): + return 're2.compile(%r, %r)' % (self.pattern, self.flags) + + cdef _search(self, string, int pos, int endpos, _re2.re2_Anchor anchoring): + """Scan through string looking for a match, and return a corresponding + Match instance. Return None if no position in the string matches.""" + cdef Py_ssize_t size + cdef int result + cdef char * cstring + cdef int encoded = 0 + cdef _re2.StringPiece * sp + cdef Match m = Match(self, self.groups + 1) + + if hasattr(string, 'tostring'): + string = string.tostring() + + string = unicode_to_bytes(string, &encoded) + + if pystring_to_cstring(string, &cstring, &size) == -1: + raise TypeError("expected string or buffer") + + if 0 <= endpos <= pos or pos > size: + return None + if 0 <= endpos < size + size = endpos + + sp = new _re2.StringPiece(cstring, size) + with nogil: + result = self.re_pattern.Match( + sp[0], + <int>pos, + <int>size, + anchoring, + m.matches, + self.groups + 1) + + del sp + if result == 0: + return None + m.encoded = encoded + m.named_groups = _re2.addressof(self.re_pattern.NamedCapturingGroups()) + m.nmatches = self.groups + 1 + m.string = string + m.pos = pos + if endpos == -1: + m.endpos = len(string) + else: + m.endpos = endpos + return m + + def search(self, object string, int pos=0, int endpos=-1): + """Scan through string looking for a match, and return a corresponding + Match instance. Return None if no position in the string matches.""" + return self._search(string, pos, endpos, _re2.UNANCHORED) + + def match(self, object string, int pos=0, int endpos=-1): + """Matches zero or more characters at the beginning of the string.""" + return self._search(string, pos, endpos, _re2.ANCHOR_START) + + def _print_pattern(self): + cdef _re2.cpp_string * s + s = <_re2.cpp_string *>_re2.addressofs(self.re_pattern.pattern()) + print(cpp_to_bytes(s[0]).decode('utf8')) + + def finditer(self, object string, int pos=0, int endpos=-1): + """Yield all non-overlapping matches of pattern in string as Match + objects.""" + cdef Py_ssize_t size + cdef int result + cdef char * cstring + cdef _re2.StringPiece * sp + cdef Match m + cdef int encoded = 0 + + string = unicode_to_bytes(string, &encoded) + if pystring_to_cstring(string, &cstring, &size) == -1: + raise TypeError("expected string or buffer") + + if endpos != -1 and endpos < size: + size = endpos + + sp = new _re2.StringPiece(cstring, size) + + while True: + m = Match(self, self.groups + 1) + with nogil: + result = self.re_pattern.Match( + sp[0], + <int>pos, + <int>size, + _re2.UNANCHORED, + m.matches, + self.groups + 1) + if result == 0: + break + m.encoded = encoded + m.named_groups = _re2.addressof( + self.re_pattern.NamedCapturingGroups()) + m.nmatches = self.groups + 1 + m.string = string + m.pos = pos + if endpos == -1: + m.endpos = len(string) + else: + m.endpos = endpos + yield m + if pos == size: + break + # offset the pos to move to the next point + if m.matches[0].length() == 0: + pos += 1 + else: + pos = m.matches[0].data() - cstring + m.matches[0].length() + del sp + + def findall(self, object string, int pos=0, endpos=None): + """Return all non-overlapping matches of pattern in string as a list + of strings.""" + cdef Py_ssize_t size + cdef int result + cdef char * cstring + cdef _re2.StringPiece * sp + cdef Match m + cdef list resultlist = [] + cdef int encoded = 0 + + string = unicode_to_bytes(string, &encoded) + if pystring_to_cstring(string, &cstring, &size) == -1: + raise TypeError("expected string or buffer") + + if endpos is not None and endpos < size: + size = endpos + + sp = new _re2.StringPiece(cstring, size) + + while True: + # FIXME: can probably avoid creating Match objects + m = Match(self, self.groups + 1) + with nogil: + result = self.re_pattern.Match( + sp[0], + <int>pos, + <int>size, + _re2.UNANCHORED, + m.matches, + self.groups + 1) + if result == 0: + break + m.encoded = encoded + m.named_groups = _re2.addressof( + self.re_pattern.NamedCapturingGroups()) + m.nmatches = self.groups + 1 + m.string = string + m.pos = pos + if endpos is not None: + m.endpos = len(string) + else: + m.endpos = endpos + if self.groups > 1: + resultlist.append(m.groups("")) + else: + resultlist.append(m.group(self.groups)) + if pos == size: + break + # offset the pos to move to the next point + if m.matches[0].length() == 0: + pos += 1 + else: + pos = m.matches[0].data() - cstring + m.matches[0].length() + del sp + return resultlist + + def split(self, string, int maxsplit=0): + """split(string[, maxsplit = 0]) --> list + + Split a string by the occurrences of the pattern.""" + cdef Py_ssize_t size + cdef int result + cdef int pos = 0 + cdef int lookahead = 0 + cdef int num_split = 0 + cdef char * cstring + cdef _re2.StringPiece * sp + cdef _re2.StringPiece * matches + cdef list resultlist = [] + cdef int encoded = 0 + + if maxsplit < 0: + maxsplit = 0 + + string = unicode_to_bytes(string, &encoded) + if pystring_to_cstring(string, &cstring, &size) == -1: + raise TypeError("expected string or buffer") + + matches = _re2.new_StringPiece_array(self.groups + 1) + sp = new _re2.StringPiece(cstring, size) + + while True: + with nogil: + result = self.re_pattern.Match( + sp[0], + <int>(pos + lookahead), + <int>size, + _re2.UNANCHORED, + matches, + self.groups + 1) + if result == 0: + break + + match_start = matches[0].data() - cstring + match_end = match_start + matches[0].length() + + # If an empty match, just look ahead until you find something + if match_start == match_end: + if pos + lookahead == size: + break + lookahead += 1 + continue + + if encoded: + resultlist.append( + char_to_unicode(&sp.data()[pos], match_start - pos)) + else: + resultlist.append(sp.data()[pos:match_start]) + if self.groups > 0: + for group in range(self.groups): + if matches[group + 1].data() == NULL: + resultlist.append(None) + else: + if encoded: + resultlist.append(char_to_unicode( + matches[group + 1].data(), + matches[group + 1].length())) + else: + resultlist.append(matches[group + 1].data()[: + matches[group + 1].length()]) + + # offset the pos to move to the next point + pos = match_end + lookahead = 0 + + num_split += 1 + if maxsplit and num_split >= maxsplit: + break + + if encoded: + resultlist.append( + char_to_unicode(&sp.data()[pos], sp.length() - pos)) + else: + resultlist.append(sp.data()[pos:]) + _re2.delete_StringPiece_array(matches) + del sp + return resultlist + + def sub(self, repl, string, int count=0): + """sub(repl, string[, count = 0]) --> newstring + + Return the string obtained by replacing the leftmost non-overlapping + occurrences of pattern in string by the replacement repl.""" + return self.subn(repl, string, count)[0] + + def subn(self, repl, string, int count=0): + """subn(repl, string[, count = 0]) --> (newstring, number of subs) + + Return the tuple (new_string, number_of_subs_made) found by replacing + the leftmost non-overlapping occurrences of pattern with the + replacement repl.""" + cdef Py_ssize_t size + cdef char * cstring + cdef _re2.cpp_string * fixed_repl + cdef _re2.StringPiece * sp + cdef _re2.cpp_string * input_str + cdef total_replacements = 0 + cdef int string_encoded = 0 + cdef int repl_encoded = 0 + + if callable(repl): + # This is a callback, so let's use the custom function + return self._subn_callback(repl, string, count) + + string = unicode_to_bytes(string, &string_encoded) + repl = unicode_to_bytes(repl, &repl_encoded) + if pystring_to_cstring(repl, &cstring, &size) == -1: + raise TypeError("expected string or buffer") + + fixed_repl = NULL + cdef _re2.const_char_ptr s = cstring + cdef _re2.const_char_ptr end = s + size + cdef int c = 0 + while s < end: + c = s[0] + if (c == b'\\'): + s += 1 + if s == end: + raise RegexError("Invalid rewrite pattern") + c = s[0] + if c == b'\\' or (c >= b'0' and c <= b'9'): + if fixed_repl != NULL: + fixed_repl.push_back(b'\\') + fixed_repl.push_back(c) + else: + if fixed_repl == NULL: + fixed_repl = new _re2.cpp_string( + cstring, s - cstring - 1) + if c == b'n': + fixed_repl.push_back(b'\n') + else: + fixed_repl.push_back(b'\\') + fixed_repl.push_back(b'\\') + fixed_repl.push_back(c) + else: + if fixed_repl != NULL: + fixed_repl.push_back(c) + + s += 1 + if fixed_repl != NULL: + sp = new _re2.StringPiece(fixed_repl.c_str()) + else: + sp = new _re2.StringPiece(cstring, size) + + input_str = new _re2.cpp_string(string) + if not count: + total_replacements = _re2.pattern_GlobalReplace( + input_str, self.re_pattern[0], sp[0]) + elif count == 1: + total_replacements = _re2.pattern_Replace( + input_str, self.re_pattern[0], sp[0]) + else: + del fixed_repl + del input_str + del sp + raise NotImplementedError( + "So far pyre2 does not support custom replacement counts") + + if string_encoded or (repl_encoded and total_replacements > 0): + result = cpp_to_unicode(input_str[0]) + else: + result = cpp_to_bytes(input_str[0]) + del fixed_repl + del input_str + del sp + return (result, total_replacements) + + def _subn_callback(self, callback, string, int count=0): + # This function is probably the hardest to implement correctly. + # This is my first attempt, but if anybody has a better solution, + # please help out. + cdef Py_ssize_t size + cdef int result + cdef int endpos + cdef int pos = 0 + cdef int encoded = 0 + cdef int num_repl = 0 + cdef char * cstring + cdef _re2.StringPiece * sp + cdef Match m + cdef list resultlist = [] + + if count < 0: + count = 0 + + string = unicode_to_bytes(string, &encoded) + if pystring_to_cstring(string, &cstring, &size) == -1: + raise TypeError("expected string or buffer") + + sp = new _re2.StringPiece(cstring, size) + + try: + while True: + m = Match(self, self.groups + 1) + with nogil: + result = self.re_pattern.Match( + sp[0], + <int>pos, + <int>size, + _re2.UNANCHORED, + m.matches, + self.groups + 1) + if result == 0: + break + + endpos = m.matches[0].data() - cstring + if encoded: + resultlist.append( + char_to_unicode(&sp.data()[pos], endpos - pos)) + else: + resultlist.append(sp.data()[pos:endpos]) + pos = endpos + m.matches[0].length() + + m.encoded = encoded + m.named_groups = _re2.addressof( + self.re_pattern.NamedCapturingGroups()) + m.nmatches = self.groups + 1 + m.string = string + resultlist.append(callback(m) or '') + + num_repl += 1 + if count and num_repl >= count: + break + + if encoded: + resultlist.append( + char_to_unicode(&sp.data()[pos], sp.length() - pos)) + return (u''.join(resultlist), num_repl) + else: + resultlist.append(sp.data()[pos:]) + return (b''.join(resultlist), num_repl) + finally: + del sp + +_cache = {} +_cache_repl = {} + +_MAXCACHE = 100 + +def compile(pattern, int flags=0, int max_mem=8388608): + cachekey = (type(pattern), pattern, flags) + if cachekey in _cache: + return _cache[cachekey] + p = _compile(pattern, flags, max_mem) + + if len(_cache) >= _MAXCACHE: + _cache.popitem() + _cache[cachekey] = p + return p + + +WHITESPACE = b' \t\n\r\v\f' + + +cdef class Tokenizer: + cdef bytes string + cdef bytes next + cdef int length + cdef int index + + def __init__(self, bytes string): + self.string = string + self.length = len(string) + self.index = 0 + self._next() + + cdef _next(self): + cdef bytes ch + if self.index >= self.length: + self.next = None + return + ch = self.string[self.index:self.index + 1] + if ch[0:1] == b'\\': + if self.index + 2 > self.length: + raise RegexError("bogus escape (end of line)") + ch = self.string[self.index:self.index + 2] + self.index += 1 + self.index += 1 + # FIXME: return indices instead of creating new bytes objects + self.next = ch + + cdef bytes get(self): + cdef bytes this = self.next + self._next() + return this + + +def prepare_pattern(object pattern, int flags): + cdef bytearray result = bytearray() + cdef bytes this + cdef Tokenizer source = Tokenizer(pattern) + + if flags & (_S | _M): + result.extend(b'(?') + if flags & _S: + result.append(b's') + if flags & _M: + result.append(b'm') + result.append(b')') + + while True: + this = source.get() + if this is None: + break + if flags & _X: + if this in WHITESPACE: + continue + if this == b"#": + while True: + this = source.get() + if this in (None, b'\n'): + break + continue + + if this[0:1] != b'[' and this[0:1] != b'\\': + result.extend(this) + continue + + elif this == b'[': + result.extend(this) + while True: + this = source.get() + if this is None: + raise RegexError("unexpected end of regular expression") + elif this == b']': + result.extend(this) + break + elif this[0:1] == b'\\': + if flags & _U: + if this[1:2] == b'd': + result.extend(br'\p{Nd}') + elif this[1:2] == b'w': + result.extend(br'_\p{L}\p{Nd}') + elif this[1:2] == b's': + result.extend(br'\s\p{Z}') + elif this[1:2] == b'D': + result.extend(br'\P{Nd}') + elif this[1:2] == b'W': + # Since \w and \s are made out of several character + # groups, I don't see a way to convert their + # complements into a group without rewriting the + # whole expression, which seems too complicated. + raise CharClassProblemException(repr(this)) + elif this[1:2] == b'S': + raise CharClassProblemException(repr(this)) + else: + result.extend(this) + else: + result.extend(this) + else: + result.extend(this) + elif this[0:1] == b'\\': + if b'8' <= this[1:2] <= b'9': + raise BackreferencesException('%r %r' % (this, pattern)) + elif b'1' <= this[1:2] <= b'7': + if source.next and source.next in b'1234567': + this += source.get() + if source.next and source.next in b'1234567': + # all clear, this is an octal escape + result.extend(this) + else: + raise BackreferencesException('%r %r' % (this, pattern)) + else: + raise BackreferencesException('%r %r' % (this, pattern)) + elif flags & _U: + if this[1:2] == b'd': + result.extend(br'\p{Nd}') + elif this[1:2] == b'w': + result.extend(br'[_\p{L}\p{Nd}]') + elif this[1:2] == b's': + result.extend(br'[\s\p{Z}]') + elif this[1:2] == b'D': + result.extend(br'[^\p{Nd}]') + elif this[1:2] == b'W': + result.extend(br'[^_\p{L}\p{Nd}]') + elif this[1:2] == b'S': + result.extend(br'[^\s\p{Z}]') + else: + result.extend(this) + else: + result.extend(this) + + return <bytes>result + + +def _compile(object pattern, int flags=0, int max_mem=8388608): + """Compile a regular expression pattern, returning a pattern object.""" + cdef char * string + cdef Py_ssize_t length + cdef _re2.StringPiece * s + cdef _re2.Options opts + cdef int error_code + cdef int encoded = 0 + + if isinstance(pattern, (Pattern, SREPattern)): + if flags: + raise ValueError( + 'Cannot process flags argument with a compiled pattern') + return pattern + + cdef object original_pattern = pattern + pattern = unicode_to_bytes(pattern, &encoded) + try: + pattern = prepare_pattern(pattern, flags) + except BackreferencesException: + error_msg = "Backreferences not supported" + if current_notification == <int>FALLBACK_EXCEPTION: + # Raise an exception regardless of the type of error. + raise RegexError(error_msg) + elif current_notification == <int>FALLBACK_WARNING: + warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) + return re.compile(original_pattern, flags) + except CharClassProblemException: + error_msg = "\W and \S not supported inside character classes" + if current_notification == <int>FALLBACK_EXCEPTION: + # Raise an exception regardless of the type of error. + raise RegexError(error_msg) + elif current_notification == <int>FALLBACK_WARNING: + warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) + return re.compile(original_pattern, flags) + + # Set the options given the flags above. + if flags & _I: + opts.set_case_sensitive(0); + + opts.set_max_mem(max_mem) + opts.set_log_errors(0) + opts.set_encoding(_re2.EncodingUTF8) + + # We use this function to get the proper length of the string. + if pystring_to_cstring(pattern, &string, &length) == -1: + raise TypeError("first argument must be a string or compiled pattern") + s = new _re2.StringPiece(string, length) + + cdef _re2.RE2 *re_pattern + with nogil: + re_pattern = new _re2.RE2(s[0], opts) + + if not re_pattern.ok(): + # Something went wrong with the compilation. + del s + error_msg = cpp_to_bytes(re_pattern.error()) + error_code = re_pattern.error_code() + del re_pattern + if current_notification == <int>FALLBACK_EXCEPTION: + # Raise an exception regardless of the type of error. + raise RegexError(error_msg) + elif error_code not in (_re2.ErrorBadPerlOp, _re2.ErrorRepeatSize, + _re2.ErrorBadEscape): + # Raise an error because these will not be fixed by using the + # ``re`` module. + raise RegexError(error_msg) + elif current_notification == <int>FALLBACK_WARNING: + warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) + return re.compile(original_pattern, flags) + + cdef Pattern pypattern = Pattern() + pypattern.pattern = original_pattern + pypattern.re_pattern = re_pattern + pypattern.groups = re_pattern.NumberOfCapturingGroups() + pypattern.encoded = encoded + pypattern.flags = flags + del s + return pypattern + + diff --git a/src/re2.pyx b/src/re2.pyx index bc89baae..41e2607a 100644 --- a/src/re2.pyx +++ b/src/re2.pyx @@ -25,14 +25,11 @@ import warnings cimport _re2 cimport cpython.unicode from cython.operator cimport preincrement as inc, dereference as deref +from cpython.buffer cimport PyBUF_SIMPLE, Py_buffer +from cpython.buffer cimport PyObject_GetBuffer, PyBuffer_Release # Import re flags to be compatible. -I = re.I -M = re.M -S = re.S -U = re.U -X = re.X -L = re.L +I, M, S, U, X, L = re.I, re.M, re.S, re.U, re.X, re.L IGNORECASE = re.IGNORECASE MULTILINE = re.MULTILINE DOTALL = re.DOTALL @@ -53,9 +50,12 @@ cdef int current_notification = FALLBACK_QUIETLY # Type of compiled re object from Python stdlib SREPattern = type(re.compile('')) +include "match.pxi" +include "pattern.pxi" + class RegexError(re.error): - """ome error has occured in compilation of the regex.""" + """Some error has occured in compilation of the regex.""" pass error = RegexError @@ -120,13 +120,9 @@ cdef inline unicode_to_bytes(object pystring, int * encoded): cdef inline int pystring_to_cstring( object pystring, char ** cstring, Py_ssize_t * length): """Get a C string from a bytes/buffer object.""" - # Place the char * in cstring, and the length in length. - # First it will try treating it as a str object, but failing that - # it will move to utf-8. If utf8 does not work, then it has to be - # a non-supported encoding. + # FIXME: use Python 3 buffer interface when available return _re2.PyObject_AsCharBuffer( pystring, <_re2.const_char_ptr*> cstring, length) - # FIXME: use Python 3 buffer interface when available cdef extern from *: @@ -134,959 +130,6 @@ cdef extern from *: cdef void emit_endif "#endif //" () -cdef class Match: - cdef readonly Pattern re - cdef readonly object string - cdef readonly int pos - cdef readonly int endpos - - cdef _re2.StringPiece * matches - cdef _re2.const_stringintmap * named_groups - cdef bint encoded - cdef int nmatches - cdef int _lastindex - cdef tuple _groups - cdef tuple _spans - cdef dict _named_groups - cdef dict _named_indexes - - def __init__(self, Pattern pattern_object, int num_groups): - self._lastindex = -1 - self._groups = None - self.pos = 0 - self.endpos = -1 - self.matches = _re2.new_StringPiece_array(num_groups + 1) - self.nmatches = num_groups - self.re = pattern_object - - def __dealloc__(self): - _re2.delete_StringPiece_array(self.matches) - - def __repr__(self): - return '<re2.Match object; span=%r, match=%r>' % ( - (self.pos, self.endpos), self.string) - - cdef init_groups(self): - cdef list groups = [] - cdef int i - - if self._groups is not None: - return - - cdef _re2.const_char_ptr last_end = NULL - cdef _re2.const_char_ptr cur_end = NULL - - for i in range(self.nmatches): - if self.matches[i].data() == NULL: - groups.append(None) - else: - if i > 0: - cur_end = self.matches[i].data() + self.matches[i].length() - - if last_end == NULL: - last_end = cur_end - self._lastindex = i - else: - # The rules for last group are a bit complicated: - # if two groups end at the same point, the earlier one - # is considered last, so we don't switch our selection - # unless the end point has moved. - if cur_end > last_end: - last_end = cur_end - self._lastindex = i - groups.append( - self.matches[i].data()[:self.matches[i].length()]) - self._groups = tuple(groups) - - def groups(self, default=None): - self.init_groups() - if self.encoded: - return tuple([ - g.decode('utf8') if g else default - for g in self._groups[1:]]) - if default is not None: - return tuple([g or default for g in self._groups[1:]]) - return self._groups[1:] - - def group(self, *args): - if len(args) == 0: - groupnum = 0 - elif len(args) == 1: - groupnum = args[0] - else: # len(args) > 1: - return tuple([self.group(i) for i in args]) - if self.encoded: - return self._group(groupnum).decode('utf8') - return self._group(groupnum) - - cdef bytes _group(self, object groupnum): - cdef int idx - self.init_groups() - if isinstance(groupnum, int): - idx = groupnum - if idx > self.nmatches - 1: - raise IndexError("no such group %d; available groups: %r" - % (idx, list(range(self.nmatches)))) - return self._groups[idx] - groupdict = self._groupdict() - if groupnum not in groupdict: - raise IndexError("no such group %r; available groups: %r" - % (groupnum, list(groupdict.keys()))) - return groupdict[groupnum] - - cdef list _convert_positions(self, positions): - cdef char * s - cdef int cpos = 0 - cdef int upos = 0 - cdef Py_ssize_t size - cdef int c - if pystring_to_cstring(self.string, &s, &size) == -1: - raise TypeError("expected string or buffer") - - new_positions = [] - i = 0 - num_positions = len(positions) - if positions[i] == -1: - new_positions.append(-1) - inc(i) - if i == num_positions: - return new_positions - if positions[i] == 0: - new_positions.append(0) - inc(i) - if i == num_positions: - return new_positions - - while cpos < size: - c = <unsigned char>s[cpos] - if c < 0x80: - inc(cpos) - inc(upos) - elif c < 0xe0: - cpos += 2 - inc(upos) - elif c < 0xf0: - cpos += 3 - inc(upos) - else: - cpos += 4 - inc(upos) - # wide unicode chars get 2 unichars when python is compiled - # with --enable-unicode=ucs2 - # TODO: verify this - emit_ifndef_py_unicode_wide() - inc(upos) - emit_endif() - - if positions[i] == cpos: - new_positions.append(upos) - inc(i) - if i == num_positions: - return new_positions - - def _convert_spans(self, spans): - positions = [x for x, _ in spans] + [y for _, y in spans] - positions = sorted(set(positions)) - posdict = dict(zip(positions, self._convert_positions(positions))) - - return [(posdict[x], posdict[y]) for x, y in spans] - - - cdef _make_spans(self): - if self._spans is not None: - return - - cdef int start, end - cdef char * s - cdef Py_ssize_t size - cdef _re2.StringPiece * piece - if pystring_to_cstring(self.string, &s, &size) == -1: - raise TypeError("expected string or buffer") - - spans = [] - for i in range(self.nmatches): - if self.matches[i].data() == NULL: - spans.append((-1, -1)) - else: - piece = &self.matches[i] - if piece.data() == NULL: - return (-1, -1) - start = piece.data() - s - end = start + piece.length() - spans.append((start, end)) - - if self.encoded: - spans = self._convert_spans(spans) - - self._spans = tuple(spans) - - def expand(self, object template): - """Expand a template with groups.""" - # TODO - This can be optimized to work a bit faster in C. - if isinstance(template, unicode): - template = template.encode('utf8') - items = template.split(b'\\') - for i, item in enumerate(items[1:]): - if item[0:1].isdigit(): - # Number group - if item[0] == b'0': - items[i + 1] = b'\x00' + item[1:] # ??? - else: - items[i + 1] = self._group(int(item[0:1])) + item[1:] - elif item[:2] == b'g<' and b'>' in item: - # This is a named group - name, rest = item[2:].split(b'>', 1) - items[i + 1] = self._group(name) + rest - else: - # This isn't a template at all - items[i + 1] = b'\\' + item - if self.encoded: - return b''.join(items).decode('utf8') - return b''.join(items) - - cdef dict _groupdict(self): - cdef _re2.stringintmapiterator it - cdef dict result = {} - cdef dict indexes = {} - - self.init_groups() - - if self._named_groups: - return self._named_groups - - self._named_groups = result - it = self.named_groups.begin() - while it != self.named_groups.end(): - indexes[cpp_to_bytes(deref(it).first)] = deref(it).second - result[cpp_to_bytes(deref(it).first)] = self._groups[ - deref(it).second] - inc(it) - - self._named_groups = result - self._named_indexes = indexes - return result - - def groupdict(self): - result = self._groupdict() - if self.encoded: - return {a.decode('utf8') if isinstance(a, bytes) else a: - b.decode('utf8') for a, b in result.items()} - return result - - def end(self, group=0): - return self.span(group)[1] - - def start(self, group=0): - return self.span(group)[0] - - def span(self, group=0): - self._make_spans() - if isinstance(group, int): - if group > len(self._spans): - raise IndexError("no such group %d; available groups: %r" - % (group, list(range(len(self._spans))))) - return self._spans[group] - else: - self._groupdict() - if self.encoded: - group = group.encode('utf8') - if group not in self._named_indexes: - raise IndexError("no such group %r; available groups: %r" - % (group, list(self._named_indexes))) - return self._spans[self._named_indexes[group]] - - property regs: - def __get__(self): - if self._spans is None: - self._make_spans() - return self._spans - - property lastindex: - def __get__(self): - self.init_groups() - if self._lastindex < 1: - return None - else: - return self._lastindex - - property lastgroup: - def __get__(self): - self.init_groups() - cdef _re2.stringintmapiterator it - - if self._lastindex < 1: - return None - - it = self.named_groups.begin() - while it != self.named_groups.end(): - if deref(it).second == self._lastindex: - return cpp_to_bytes(deref(it).first) - inc(it) - - return None - - -cdef class Pattern: - cdef readonly int flags - cdef readonly int groups - cdef readonly object pattern - - cdef _re2.RE2 * re_pattern - cdef bint encoded - cdef object __weakref__ - - def __dealloc__(self): - del self.re_pattern - - def __repr__(self): - return 're2.compile(%r, %r)' % (self.pattern, self.flags) - - cdef _search(self, string, int pos, int endpos, _re2.re2_Anchor anchoring): - """Scan through string looking for a match, and return a corresponding - Match instance. Return None if no position in the string matches.""" - cdef Py_ssize_t size - cdef int result - cdef char * cstring - cdef int encoded = 0 - cdef _re2.StringPiece * sp - cdef Match m = Match(self, self.groups + 1) - - if hasattr(string, 'tostring'): - string = string.tostring() - - string = unicode_to_bytes(string, &encoded) - - if pystring_to_cstring(string, &cstring, &size) == -1: - raise TypeError("expected string or buffer") - - if endpos >= 0 and endpos <= pos: - return None - - if endpos >= 0 and endpos < size: - size = endpos - - if pos > size: - return None - - sp = new _re2.StringPiece(cstring, size) - with nogil: - result = self.re_pattern.Match( - sp[0], - <int>pos, - <int>size, - anchoring, - m.matches, - self.groups + 1) - - del sp - if result == 0: - return None - m.encoded = <bint>(encoded) - m.named_groups = _re2.addressof(self.re_pattern.NamedCapturingGroups()) - m.nmatches = self.groups + 1 - m.string = string - m.pos = pos - if endpos == -1: - m.endpos = len(string) - else: - m.endpos = endpos - return m - - def search(self, object string, int pos=0, int endpos=-1): - """Scan through string looking for a match, and return a corresponding - Match instance. Return None if no position in the string matches.""" - return self._search(string, pos, endpos, _re2.UNANCHORED) - - def match(self, object string, int pos=0, int endpos=-1): - """Matches zero or more characters at the beginning of the string.""" - return self._search(string, pos, endpos, _re2.ANCHOR_START) - - def _print_pattern(self): - cdef _re2.cpp_string * s - s = <_re2.cpp_string *>_re2.addressofs(self.re_pattern.pattern()) - print(cpp_to_bytes(s[0]).decode('utf8')) - - def finditer(self, object string, int pos=0, int endpos=-1): - """Yield all non-overlapping matches of pattern in string as Match - objects.""" - cdef Py_ssize_t size - cdef int result - cdef char * cstring - cdef _re2.StringPiece * sp - cdef Match m - cdef int encoded = 0 - - string = unicode_to_bytes(string, &encoded) - if pystring_to_cstring(string, &cstring, &size) == -1: - raise TypeError("expected string or buffer") - encoded = <bint>encoded - - if endpos != -1 and endpos < size: - size = endpos - - sp = new _re2.StringPiece(cstring, size) - - while True: - m = Match(self, self.groups + 1) - with nogil: - result = self.re_pattern.Match( - sp[0], - <int>pos, - <int>size, - _re2.UNANCHORED, - m.matches, - self.groups + 1) - if result == 0: - break - m.encoded = encoded - m.named_groups = _re2.addressof( - self.re_pattern.NamedCapturingGroups()) - m.nmatches = self.groups + 1 - m.string = string - m.pos = pos - if endpos == -1: - m.endpos = len(string) - else: - m.endpos = endpos - yield m - if pos == size: - break - # offset the pos to move to the next point - if m.matches[0].length() == 0: - pos += 1 - else: - pos = m.matches[0].data() - cstring + m.matches[0].length() - del sp - - def findall(self, object string, int pos=0, int endpos=-1): - """Return all non-overlapping matches of pattern in string as a list - of strings.""" - cdef Py_ssize_t size - cdef int result - cdef char * cstring - cdef _re2.StringPiece * sp - cdef Match m - cdef list resultlist = [] - cdef int encoded = 0 - - string = unicode_to_bytes(string, &encoded) - if pystring_to_cstring(string, &cstring, &size) == -1: - raise TypeError("expected string or buffer") - encoded = <bint>encoded - - if endpos != -1 and endpos < size: - size = endpos - - sp = new _re2.StringPiece(cstring, size) - - while True: - # FIXME: can probably avoid creating Match objects - m = Match(self, self.groups + 1) - with nogil: - result = self.re_pattern.Match( - sp[0], - <int>pos, - <int>size, - _re2.UNANCHORED, - m.matches, - self.groups + 1) - if result == 0: - break - m.encoded = encoded - m.named_groups = _re2.addressof( - self.re_pattern.NamedCapturingGroups()) - m.nmatches = self.groups + 1 - m.string = string - m.pos = pos - if endpos == -1: - m.endpos = len(string) - else: - m.endpos = endpos - if self.groups > 1: - resultlist.append(m.groups("")) - else: - resultlist.append(m.group(self.groups)) - if pos == size: - break - # offset the pos to move to the next point - if m.matches[0].length() == 0: - pos += 1 - else: - pos = m.matches[0].data() - cstring + m.matches[0].length() - del sp - return resultlist - - def split(self, string, int maxsplit=0): - """split(string[, maxsplit = 0]) --> list - - Split a string by the occurrences of the pattern.""" - cdef Py_ssize_t size - cdef int result - cdef int pos = 0 - cdef int lookahead = 0 - cdef int num_split = 0 - cdef char * cstring - cdef _re2.StringPiece * sp - cdef _re2.StringPiece * matches - cdef list resultlist = [] - cdef int encoded = 0 - - if maxsplit < 0: - maxsplit = 0 - - string = unicode_to_bytes(string, &encoded) - if pystring_to_cstring(string, &cstring, &size) == -1: - raise TypeError("expected string or buffer") - - matches = _re2.new_StringPiece_array(self.groups + 1) - sp = new _re2.StringPiece(cstring, size) - - while True: - with nogil: - result = self.re_pattern.Match( - sp[0], - <int>(pos + lookahead), - <int>size, - _re2.UNANCHORED, - matches, - self.groups + 1) - if result == 0: - break - - match_start = matches[0].data() - cstring - match_end = match_start + matches[0].length() - - # If an empty match, just look ahead until you find something - if match_start == match_end: - if pos + lookahead == size: - break - lookahead += 1 - continue - - if encoded: - resultlist.append( - char_to_unicode(&sp.data()[pos], match_start - pos)) - else: - resultlist.append(sp.data()[pos:match_start]) - if self.groups > 0: - for group in range(self.groups): - if matches[group + 1].data() == NULL: - resultlist.append(None) - else: - if encoded: - resultlist.append(char_to_unicode( - matches[group + 1].data(), - matches[group + 1].length())) - else: - resultlist.append(matches[group + 1].data()[: - matches[group + 1].length()]) - - # offset the pos to move to the next point - pos = match_end - lookahead = 0 - - num_split += 1 - if maxsplit and num_split >= maxsplit: - break - - if encoded: - resultlist.append( - char_to_unicode(&sp.data()[pos], sp.length() - pos)) - else: - resultlist.append(sp.data()[pos:]) - _re2.delete_StringPiece_array(matches) - del sp - return resultlist - - def sub(self, repl, string, int count=0): - """sub(repl, string[, count = 0]) --> newstring - - Return the string obtained by replacing the leftmost non-overlapping - occurrences of pattern in string by the replacement repl.""" - return self.subn(repl, string, count)[0] - - def subn(self, repl, string, int count=0): - """subn(repl, string[, count = 0]) --> (newstring, number of subs) - - Return the tuple (new_string, number_of_subs_made) found by replacing - the leftmost non-overlapping occurrences of pattern with the - replacement repl.""" - cdef Py_ssize_t size - cdef char * cstring - cdef _re2.cpp_string * fixed_repl - cdef _re2.StringPiece * sp - cdef _re2.cpp_string * input_str - cdef total_replacements = 0 - cdef int string_encoded = 0 - cdef int repl_encoded = 0 - - if callable(repl): - # This is a callback, so let's use the custom function - return self._subn_callback(repl, string, count) - - string = unicode_to_bytes(string, &string_encoded) - repl = unicode_to_bytes(repl, &repl_encoded) - if pystring_to_cstring(repl, &cstring, &size) == -1: - raise TypeError("expected string or buffer") - - fixed_repl = NULL - cdef _re2.const_char_ptr s = cstring - cdef _re2.const_char_ptr end = s + size - cdef int c = 0 - while s < end: - c = s[0] - if (c == b'\\'): - s += 1 - if s == end: - raise RegexError("Invalid rewrite pattern") - c = s[0] - if c == b'\\' or (c >= b'0' and c <= b'9'): - if fixed_repl != NULL: - fixed_repl.push_back(b'\\') - fixed_repl.push_back(c) - else: - if fixed_repl == NULL: - fixed_repl = new _re2.cpp_string( - cstring, s - cstring - 1) - if c == b'n': - fixed_repl.push_back(b'\n') - else: - fixed_repl.push_back(b'\\') - fixed_repl.push_back(b'\\') - fixed_repl.push_back(c) - else: - if fixed_repl != NULL: - fixed_repl.push_back(c) - - s += 1 - if fixed_repl != NULL: - sp = new _re2.StringPiece(fixed_repl.c_str()) - else: - sp = new _re2.StringPiece(cstring, size) - - input_str = new _re2.cpp_string(string) - if not count: - total_replacements = _re2.pattern_GlobalReplace( - input_str, self.re_pattern[0], sp[0]) - elif count == 1: - total_replacements = _re2.pattern_Replace( - input_str, self.re_pattern[0], sp[0]) - else: - del fixed_repl - del input_str - del sp - raise NotImplementedError( - "So far pyre2 does not support custom replacement counts") - - if string_encoded or (repl_encoded and total_replacements > 0): - result = cpp_to_unicode(input_str[0]) - else: - result = cpp_to_bytes(input_str[0]) - del fixed_repl - del input_str - del sp - return (result, total_replacements) - - def _subn_callback(self, callback, string, int count=0): - # This function is probably the hardest to implement correctly. - # This is my first attempt, but if anybody has a better solution, - # please help out. - cdef Py_ssize_t size - cdef int result - cdef int endpos - cdef int pos = 0 - cdef int encoded = 0 - cdef int num_repl = 0 - cdef char * cstring - cdef _re2.StringPiece * sp - cdef Match m - cdef list resultlist = [] - - if count < 0: - count = 0 - - string = unicode_to_bytes(string, &encoded) - if pystring_to_cstring(string, &cstring, &size) == -1: - raise TypeError("expected string or buffer") - encoded = <bint>encoded - - sp = new _re2.StringPiece(cstring, size) - - try: - while True: - m = Match(self, self.groups + 1) - with nogil: - result = self.re_pattern.Match( - sp[0], - <int>pos, - <int>size, - _re2.UNANCHORED, - m.matches, - self.groups + 1) - if result == 0: - break - - endpos = m.matches[0].data() - cstring - if encoded: - resultlist.append( - char_to_unicode(&sp.data()[pos], endpos - pos)) - else: - resultlist.append(sp.data()[pos:endpos]) - pos = endpos + m.matches[0].length() - - m.encoded = encoded - m.named_groups = _re2.addressof( - self.re_pattern.NamedCapturingGroups()) - m.nmatches = self.groups + 1 - m.string = string - resultlist.append(callback(m) or '') - - num_repl += 1 - if count and num_repl >= count: - break - - if encoded: - resultlist.append( - char_to_unicode(&sp.data()[pos], sp.length() - pos)) - return (u''.join(resultlist), num_repl) - else: - resultlist.append(sp.data()[pos:]) - return (b''.join(resultlist), num_repl) - finally: - del sp - -_cache = {} -_cache_repl = {} - -_MAXCACHE = 100 - -def compile(pattern, int flags=0, int max_mem=8388608): - cachekey = (type(pattern), pattern, flags) - if cachekey in _cache: - return _cache[cachekey] - p = _compile(pattern, flags, max_mem) - - if len(_cache) >= _MAXCACHE: - _cache.popitem() - _cache[cachekey] = p - return p - - -WHITESPACE = b' \t\n\r\v\f' - - -cdef class Tokenizer: - cdef bytes string - cdef bytes next - cdef int length - cdef int index - - def __init__(self, bytes string): - self.string = string - self.length = len(string) - self.index = 0 - self._next() - - cdef _next(self): - cdef bytes ch - if self.index >= self.length: - self.next = None - return - ch = self.string[self.index:self.index + 1] - if ch[0:1] == b'\\': - if self.index + 2 > self.length: - raise RegexError("bogus escape (end of line)") - ch = self.string[self.index:self.index + 2] - self.index += 1 - self.index += 1 - # FIXME: return indices instead of creating new bytes objects - self.next = ch - - cdef bytes get(self): - cdef bytes this = self.next - self._next() - return this - - -def prepare_pattern(object pattern, int flags): - cdef bytearray result = bytearray() - cdef bytes this - cdef Tokenizer source = Tokenizer(pattern) - - if flags & (_S | _M): - result.extend(b'(?') - if flags & _S: - result.append(b's') - if flags & _M: - result.append(b'm') - result.append(b')') - - while True: - this = source.get() - if this is None: - break - if flags & _X: - if this in WHITESPACE: - continue - if this == b"#": - while True: - this = source.get() - if this in (None, b'\n'): - break - continue - - if this[0:1] != b'[' and this[0:1] != b'\\': - result.extend(this) - continue - - elif this == b'[': - result.extend(this) - while True: - this = source.get() - if this is None: - raise RegexError("unexpected end of regular expression") - elif this == b']': - result.extend(this) - break - elif this[0:1] == b'\\': - if flags & _U: - if this[1:2] == b'd': - result.extend(br'\p{Nd}') - elif this[1:2] == b'w': - result.extend(br'_\p{L}\p{Nd}') - elif this[1:2] == b's': - result.extend(br'\s\p{Z}') - elif this[1:2] == b'D': - result.extend(br'\P{Nd}') - elif this[1:2] == b'W': - # Since \w and \s are made out of several character - # groups, I don't see a way to convert their - # complements into a group without rewriting the - # whole expression, which seems too complicated. - raise CharClassProblemException(repr(this)) - elif this[1:2] == b'S': - raise CharClassProblemException(repr(this)) - else: - result.extend(this) - else: - result.extend(this) - else: - result.extend(this) - elif this[0:1] == b'\\': - if b'8' <= this[1:2] <= b'9': - raise BackreferencesException('%r %r' % (this, pattern)) - elif b'1' <= this[1:2] <= b'7': - if source.next and source.next in b'1234567': - this += source.get() - if source.next and source.next in b'1234567': - # all clear, this is an octal escape - result.extend(this) - else: - raise BackreferencesException('%r %r' % (this, pattern)) - else: - raise BackreferencesException('%r %r' % (this, pattern)) - elif flags & _U: - if this[1:2] == b'd': - result.extend(br'\p{Nd}') - elif this[1:2] == b'w': - result.extend(br'[_\p{L}\p{Nd}]') - elif this[1:2] == b's': - result.extend(br'[\s\p{Z}]') - elif this[1:2] == b'D': - result.extend(br'[^\p{Nd}]') - elif this[1:2] == b'W': - result.extend(br'[^_\p{L}\p{Nd}]') - elif this[1:2] == b'S': - result.extend(br'[^\s\p{Z}]') - else: - result.extend(this) - else: - result.extend(this) - - return <bytes>result - - -def _compile(object pattern, int flags=0, int max_mem=8388608): - """Compile a regular expression pattern, returning a pattern object.""" - cdef char * string - cdef Py_ssize_t length - cdef _re2.StringPiece * s - cdef _re2.Options opts - cdef int error_code - cdef int encoded = 0 - - if isinstance(pattern, (Pattern, SREPattern)): - if flags: - raise ValueError( - 'Cannot process flags argument with a compiled pattern') - return pattern - - cdef object original_pattern = pattern - pattern = unicode_to_bytes(pattern, &encoded) - try: - pattern = prepare_pattern(pattern, flags) - except BackreferencesException: - error_msg = "Backreferences not supported" - if current_notification == <int>FALLBACK_EXCEPTION: - # Raise an exception regardless of the type of error. - raise RegexError(error_msg) - elif current_notification == <int>FALLBACK_WARNING: - warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) - return re.compile(original_pattern, flags) - except CharClassProblemException: - error_msg = "\W and \S not supported inside character classes" - if current_notification == <int>FALLBACK_EXCEPTION: - # Raise an exception regardless of the type of error. - raise RegexError(error_msg) - elif current_notification == <int>FALLBACK_WARNING: - warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) - return re.compile(original_pattern, flags) - - # Set the options given the flags above. - if flags & _I: - opts.set_case_sensitive(0); - - opts.set_max_mem(max_mem) - opts.set_log_errors(0) - opts.set_encoding(_re2.EncodingUTF8) - - # We use this function to get the proper length of the string. - if pystring_to_cstring(pattern, &string, &length) == -1: - raise TypeError("first argument must be a string or compiled pattern") - s = new _re2.StringPiece(string, length) - - cdef _re2.RE2 *re_pattern - with nogil: - re_pattern = new _re2.RE2(s[0], opts) - - if not re_pattern.ok(): - # Something went wrong with the compilation. - del s - error_msg = cpp_to_bytes(re_pattern.error()) - error_code = re_pattern.error_code() - del re_pattern - if current_notification == <int>FALLBACK_EXCEPTION: - # Raise an exception regardless of the type of error. - raise RegexError(error_msg) - elif error_code not in (_re2.ErrorBadPerlOp, _re2.ErrorRepeatSize, - _re2.ErrorBadEscape): - # Raise an error because these will not be fixed by using the - # ``re`` module. - raise RegexError(error_msg) - elif current_notification == <int>FALLBACK_WARNING: - warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) - return re.compile(original_pattern, flags) - - cdef Pattern pypattern = Pattern() - pypattern.pattern = original_pattern - pypattern.re_pattern = re_pattern - pypattern.groups = re_pattern.NumberOfCapturingGroups() - pypattern.encoded = <bint>encoded - pypattern.flags = flags - del s - return pypattern - - def search(pattern, string, int flags=0): """Scan through string looking for a match to the pattern, returning a match object or none if no match was found.""" From 1621e47113f27f1ae2b6ca8cd7bdc7b8c4c801a0 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Sun, 2 Aug 2015 23:22:29 +0200 Subject: [PATCH 005/114] properly translate pos, endpos indices with unicode, &c. - properly translate pos, endpos indices with unicode - keep original unicode string in Match objects - separate compile.pxi file --- .gitignore | 2 +- src/compile.pxi | 228 +++++++++++++++++++++ src/match.pxi | 307 ++++++++++++++-------------- src/pattern.pxi | 449 +++++++++++++---------------------------- src/re2.pyx | 168 ++++++++------- tests/issue4.txt | 35 ++-- tests/match_expand.txt | 10 +- tests/namedgroups.txt | 16 +- tests/split.txt | 3 +- tests/unicode.txt | 10 +- 10 files changed, 641 insertions(+), 587 deletions(-) create mode 100644 src/compile.pxi diff --git a/.gitignore b/.gitignore index 7a33e695..6b6d7429 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,9 @@ MANIFEST /build /dist -src/re2.html src/re2.so src/re2.cpp +src/*.html tests/access.log *~ *.pyc diff --git a/src/compile.pxi b/src/compile.pxi new file mode 100644 index 00000000..e7bf6a88 --- /dev/null +++ b/src/compile.pxi @@ -0,0 +1,228 @@ + +def compile(pattern, int flags=0, int max_mem=8388608): + cachekey = (type(pattern), pattern, flags) + if cachekey in _cache: + return _cache[cachekey] + p = _compile(pattern, flags, max_mem) + + if len(_cache) >= _MAXCACHE: + _cache.popitem() + _cache[cachekey] = p + return p + + +WHITESPACE = b' \t\n\r\v\f' + + +cdef class Tokenizer: + cdef bytes string + cdef bytes next + cdef int length + cdef int index + + def __init__(self, bytes string): + self.string = string + self.length = len(string) + self.index = 0 + self._next() + + cdef _next(self): + cdef bytes ch + if self.index >= self.length: + self.next = None + return + ch = self.string[self.index:self.index + 1] + if ch[0:1] == b'\\': + if self.index + 2 > self.length: + raise RegexError("bogus escape (end of line)") + ch = self.string[self.index:self.index + 2] + self.index += 1 + self.index += 1 + # FIXME: return indices instead of creating new bytes objects + self.next = ch + + cdef bytes get(self): + cdef bytes this = self.next + self._next() + return this + + +def prepare_pattern(object pattern, int flags): + cdef bytearray result = bytearray() + cdef bytes this + cdef Tokenizer source = Tokenizer(pattern) + + if flags & (_S | _M): + result.extend(b'(?') + if flags & _S: + result.extend(b's') + if flags & _M: + result.extend(b'm') + result.extend(b')') + + while True: + this = source.get() + if this is None: + break + if flags & _X: + if this in WHITESPACE: + continue + if this == b"#": + while True: + this = source.get() + if this in (None, b'\n'): + break + continue + + if this[0:1] != b'[' and this[0:1] != b'\\': + result.extend(this) + continue + + elif this == b'[': + result.extend(this) + while True: + this = source.get() + if this is None: + raise RegexError("unexpected end of regular expression") + elif this == b']': + result.extend(this) + break + elif this[0:1] == b'\\': + if flags & _U: + if this[1:2] == b'd': + result.extend(br'\p{Nd}') + elif this[1:2] == b'w': + result.extend(br'_\p{L}\p{Nd}') + elif this[1:2] == b's': + result.extend(br'\s\p{Z}') + elif this[1:2] == b'D': + result.extend(br'\P{Nd}') + elif this[1:2] == b'W': + # Since \w and \s are made out of several character + # groups, I don't see a way to convert their + # complements into a group without rewriting the + # whole expression, which seems too complicated. + raise CharClassProblemException(repr(this)) + elif this[1:2] == b'S': + raise CharClassProblemException(repr(this)) + else: + result.extend(this) + else: + result.extend(this) + else: + result.extend(this) + elif this[0:1] == b'\\': + if b'8' <= this[1:2] <= b'9': + raise BackreferencesException('%r %r' % (this, pattern)) + elif b'1' <= this[1:2] <= b'7': + if source.next and source.next in b'1234567': + this += source.get() + if source.next and source.next in b'1234567': + # all clear, this is an octal escape + result.extend(this) + else: + raise BackreferencesException('%r %r' % (this, pattern)) + else: + raise BackreferencesException('%r %r' % (this, pattern)) + elif flags & _U: + if this[1:2] == b'd': + result.extend(br'\p{Nd}') + elif this[1:2] == b'w': + result.extend(br'[_\p{L}\p{Nd}]') + elif this[1:2] == b's': + result.extend(br'[\s\p{Z}]') + elif this[1:2] == b'D': + result.extend(br'[^\p{Nd}]') + elif this[1:2] == b'W': + result.extend(br'[^_\p{L}\p{Nd}]') + elif this[1:2] == b'S': + result.extend(br'[^\s\p{Z}]') + else: + result.extend(this) + else: + result.extend(this) + + return <bytes>result + + +def _compile(object pattern, int flags=0, int max_mem=8388608): + """Compile a regular expression pattern, returning a pattern object.""" + cdef char * string + cdef Py_ssize_t length + cdef _re2.StringPiece * s + cdef _re2.Options opts + cdef int error_code + cdef int encoded = 0 + + if isinstance(pattern, (Pattern, SREPattern)): + if flags: + raise ValueError( + 'Cannot process flags argument with a compiled pattern') + return pattern + + cdef object original_pattern = pattern + pattern = unicode_to_bytes(pattern, &encoded) + try: + pattern = prepare_pattern(pattern, flags) + except BackreferencesException: + error_msg = "Backreferences not supported" + if current_notification == FALLBACK_EXCEPTION: + # Raise an exception regardless of the type of error. + raise RegexError(error_msg) + elif current_notification == FALLBACK_WARNING: + warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) + return re.compile(original_pattern, flags) + except CharClassProblemException: + error_msg = "\W and \S not supported inside character classes" + if current_notification == FALLBACK_EXCEPTION: + # Raise an exception regardless of the type of error. + raise RegexError(error_msg) + elif current_notification == FALLBACK_WARNING: + warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) + return re.compile(original_pattern, flags) + + # Set the options given the flags above. + if flags & _I: + opts.set_case_sensitive(0); + + opts.set_max_mem(max_mem) + opts.set_log_errors(0) + opts.set_encoding(_re2.EncodingUTF8) + + # We use this function to get the proper length of the string. + if pystring_to_cstring(pattern, &string, &length) == -1: + raise TypeError("first argument must be a string or compiled pattern") + s = new _re2.StringPiece(string, length) + + cdef _re2.RE2 *re_pattern + with nogil: + re_pattern = new _re2.RE2(s[0], opts) + + if not re_pattern.ok(): + # Something went wrong with the compilation. + del s + error_msg = cpp_to_bytes(re_pattern.error()) + error_code = re_pattern.error_code() + del re_pattern + if current_notification == FALLBACK_EXCEPTION: + # Raise an exception regardless of the type of error. + raise RegexError(error_msg) + elif error_code not in (_re2.ErrorBadPerlOp, _re2.ErrorRepeatSize, + _re2.ErrorBadEscape): + # Raise an error because these will not be fixed by using the + # ``re`` module. + raise RegexError(error_msg) + elif current_notification == FALLBACK_WARNING: + warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) + return re.compile(original_pattern, flags) + + cdef Pattern pypattern = Pattern() + pypattern.pattern = original_pattern + pypattern.re_pattern = re_pattern + pypattern.groups = re_pattern.NumberOfCapturingGroups() + pypattern.encoded = encoded + pypattern.flags = flags + del s + return pypattern + + diff --git a/src/match.pxi b/src/match.pxi index ffd93084..3eb1c359 100644 --- a/src/match.pxi +++ b/src/match.pxi @@ -7,6 +7,9 @@ cdef class Match: cdef _re2.StringPiece * matches cdef _re2.const_stringintmap * named_groups + cdef object bytestr + cdef char * cstring + cdef int size cdef bint encoded cdef int nmatches cdef int _lastindex @@ -24,20 +27,41 @@ cdef class Match: self.nmatches = num_groups self.re = pattern_object - def __dealloc__(self): - _re2.delete_StringPiece_array(self.matches) + property regs: + def __get__(self): + if self._spans is None: + self._make_spans() + return self._spans - def __repr__(self): - return '<re2.Match object; span=%r, match=%r>' % ( - (self.pos, self.endpos), self.string) + property lastindex: + def __get__(self): + self.init_groups() + if self._lastindex < 1: + return None + else: + return self._lastindex - cdef init_groups(self): - cdef list groups = [] - cdef int i + property lastgroup: + def __get__(self): + self.init_groups() + cdef _re2.stringintmapiterator it + if self._lastindex < 1: + return None + + it = self.named_groups.begin() + while it != self.named_groups.end(): + if deref(it).second == self._lastindex: + return cpp_to_bytes(deref(it).first) + inc(it) + return None + + cdef init_groups(self): if self._groups is not None: return + cdef list groups = [] + cdef int i cdef _re2.const_char_ptr last_end = NULL cdef _re2.const_char_ptr cur_end = NULL @@ -63,27 +87,6 @@ cdef class Match: self.matches[i].data()[:self.matches[i].length()]) self._groups = tuple(groups) - def groups(self, default=None): - self.init_groups() - if self.encoded: - return tuple([ - g.decode('utf8') if g else default - for g in self._groups[1:]]) - if default is not None: - return tuple([g or default for g in self._groups[1:]]) - return self._groups[1:] - - def group(self, *args): - if len(args) == 0: - groupnum = 0 - elif len(args) == 1: - groupnum = args[0] - else: # len(args) > 1: - return tuple([self.group(i) for i in args]) - if self.encoded: - return self._group(groupnum).decode('utf8') - return self._group(groupnum) - cdef bytes _group(self, object groupnum): cdef int idx self.init_groups() @@ -99,91 +102,52 @@ cdef class Match: % (groupnum, list(groupdict.keys()))) return groupdict[groupnum] - cdef list _convert_positions(self, positions): - cdef char * s - cdef int cpos = 0 - cdef int upos = 0 - cdef Py_ssize_t size - cdef int c - if pystring_to_cstring(self.string, &s, &size) == -1: - raise TypeError("expected string or buffer") - - new_positions = [] - i = 0 - num_positions = len(positions) - if positions[i] == -1: - new_positions.append(-1) - inc(i) - if i == num_positions: - return new_positions - if positions[i] == 0: - new_positions.append(0) - inc(i) - if i == num_positions: - return new_positions - - while cpos < size: - c = <unsigned char>s[cpos] - if c < 0x80: - inc(cpos) - inc(upos) - elif c < 0xe0: - cpos += 2 - inc(upos) - elif c < 0xf0: - cpos += 3 - inc(upos) - else: - cpos += 4 - inc(upos) - # wide unicode chars get 2 unichars when python is compiled - # with --enable-unicode=ucs2 - # TODO: verify this - emit_ifndef_py_unicode_wide() - inc(upos) - emit_endif() - - if positions[i] == cpos: - new_positions.append(upos) - inc(i) - if i == num_positions: - return new_positions - - def _convert_spans(self, spans): - positions = [x for x, _ in spans] + [y for _, y in spans] - positions = sorted(set(positions)) - posdict = dict(zip(positions, self._convert_positions(positions))) - - return [(posdict[x], posdict[y]) for x, y in spans] + cdef dict _groupdict(self): + self.init_groups() + if self._named_groups is not None: + return self._named_groups + cdef _re2.stringintmapiterator it + cdef dict result = {} + cdef dict indexes = {} - cdef _make_spans(self): - if self._spans is not None: - return + self._named_groups = result + it = self.named_groups.begin() + while it != self.named_groups.end(): + indexes[cpp_to_bytes(deref(it).first)] = deref(it).second + result[cpp_to_bytes(deref(it).first)] = self._groups[ + deref(it).second] + inc(it) - cdef int start, end - cdef char * s - cdef Py_ssize_t size - cdef _re2.StringPiece * piece - if pystring_to_cstring(self.string, &s, &size) == -1: - raise TypeError("expected string or buffer") + self._named_groups = result + self._named_indexes = indexes + return result - spans = [] - for i in range(self.nmatches): - if self.matches[i].data() == NULL: - spans.append((-1, -1)) - else: - piece = &self.matches[i] - if piece.data() == NULL: - return (-1, -1) - start = piece.data() - s - end = start + piece.length() - spans.append((start, end)) + def groups(self, default=None): + self.init_groups() + if self.encoded: + return tuple([default if g is None else g.decode('utf8') + for g in self._groups[1:]]) + return tuple([default if g is None else g + for g in self._groups[1:]]) + def group(self, *args): + if len(args) == 0: + groupnum = 0 + elif len(args) == 1: + groupnum = args[0] + else: # len(args) > 1: + return tuple([self.group(i) for i in args]) if self.encoded: - spans = self._convert_spans(spans) + return self._group(groupnum).decode('utf8') + return self._group(groupnum) - self._spans = tuple(spans) + def groupdict(self): + result = self._groupdict() + if self.encoded: + return {a.decode('utf8') if isinstance(a, bytes) else a: + b.decode('utf8') for a, b in result.items()} + return result def expand(self, object template): """Expand a template with groups.""" @@ -209,35 +173,6 @@ cdef class Match: return b''.join(items).decode('utf8') return b''.join(items) - cdef dict _groupdict(self): - cdef _re2.stringintmapiterator it - cdef dict result = {} - cdef dict indexes = {} - - self.init_groups() - - if self._named_groups: - return self._named_groups - - self._named_groups = result - it = self.named_groups.begin() - while it != self.named_groups.end(): - indexes[cpp_to_bytes(deref(it).first)] = deref(it).second - result[cpp_to_bytes(deref(it).first)] = self._groups[ - deref(it).second] - inc(it) - - self._named_groups = result - self._named_indexes = indexes - return result - - def groupdict(self): - result = self._groupdict() - if self.encoded: - return {a.decode('utf8') if isinstance(a, bytes) else a: - b.decode('utf8') for a, b in result.items()} - return result - def end(self, group=0): return self.span(group)[1] @@ -260,34 +195,88 @@ cdef class Match: % (group, list(self._named_indexes))) return self._spans[self._named_indexes[group]] - property regs: - def __get__(self): - if self._spans is None: - self._make_spans() - return self._spans + cdef list _convert_positions(self, positions): + """Convert a list of UTF-8 byte indices to unicode indices.""" + cdef unsigned char * s = <unsigned char *>self.cstring + cdef int cpos = 0 + cdef int upos = 0 + cdef int i = 0 + cdef list result = [] - property lastindex: - def __get__(self): - self.init_groups() - if self._lastindex < 1: - return None + if positions[i] == -1: + result.append(-1) + i += 1 + if i == len(positions): + return result + if positions[i] == 0: + result.append(0) + i += 1 + if i == len(positions): + return result + + while cpos < self.size: + if s[cpos] < 0x80: + cpos += 1 + upos += 1 + elif s[cpos] < 0xe0: + cpos += 2 + upos += 1 + elif s[cpos] < 0xf0: + cpos += 3 + upos += 1 else: - return self._lastindex + cpos += 4 + upos += 1 + # wide unicode chars get 2 unichars when python is compiled + # with --enable-unicode=ucs2 + # TODO: verify this + emit_ifndef_py_unicode_wide() + upos += 1 + emit_endif() - property lastgroup: - def __get__(self): - self.init_groups() - cdef _re2.stringintmapiterator it + if positions[i] == cpos: + result.append(upos) + i += 1 + if i == len(positions): + break + return result - if self._lastindex < 1: - return None + def _convert_spans(self, spans): + positions = [x for x, _ in spans] + [y for _, y in spans] + positions = sorted(set(positions)) + posdict = dict(zip(positions, self._convert_positions(positions))) - it = self.named_groups.begin() - while it != self.named_groups.end(): - if deref(it).second == self._lastindex: - return cpp_to_bytes(deref(it).first) - inc(it) + return [(posdict[x], posdict[y]) for x, y in spans] - return None + cdef _make_spans(self): + if self._spans is not None: + return + + cdef int start, end + cdef _re2.StringPiece * piece + + spans = [] + for i in range(self.nmatches): + if self.matches[i].data() == NULL: + spans.append((-1, -1)) + else: + piece = &self.matches[i] + if piece.data() == NULL: + return (-1, -1) + start = piece.data() - self.cstring + end = start + piece.length() + spans.append((start, end)) + + if self.encoded: + spans = self._convert_spans(spans) + + self._spans = tuple(spans) + + def __dealloc__(self): + _re2.delete_StringPiece_array(self.matches) + + def __repr__(self): + return '<re2.Match object; span=%r, match=%r>' % ( + (self.pos, self.endpos), self.string) diff --git a/src/pattern.pxi b/src/pattern.pxi index f8267cf0..a08760a2 100644 --- a/src/pattern.pxi +++ b/src/pattern.pxi @@ -1,5 +1,4 @@ - cdef class Pattern: cdef readonly int flags cdef readonly int groups @@ -9,59 +8,6 @@ cdef class Pattern: cdef bint encoded cdef object __weakref__ - def __dealloc__(self): - del self.re_pattern - - def __repr__(self): - return 're2.compile(%r, %r)' % (self.pattern, self.flags) - - cdef _search(self, string, int pos, int endpos, _re2.re2_Anchor anchoring): - """Scan through string looking for a match, and return a corresponding - Match instance. Return None if no position in the string matches.""" - cdef Py_ssize_t size - cdef int result - cdef char * cstring - cdef int encoded = 0 - cdef _re2.StringPiece * sp - cdef Match m = Match(self, self.groups + 1) - - if hasattr(string, 'tostring'): - string = string.tostring() - - string = unicode_to_bytes(string, &encoded) - - if pystring_to_cstring(string, &cstring, &size) == -1: - raise TypeError("expected string or buffer") - - if 0 <= endpos <= pos or pos > size: - return None - if 0 <= endpos < size - size = endpos - - sp = new _re2.StringPiece(cstring, size) - with nogil: - result = self.re_pattern.Match( - sp[0], - <int>pos, - <int>size, - anchoring, - m.matches, - self.groups + 1) - - del sp - if result == 0: - return None - m.encoded = encoded - m.named_groups = _re2.addressof(self.re_pattern.NamedCapturingGroups()) - m.nmatches = self.groups + 1 - m.string = string - m.pos = pos - if endpos == -1: - m.endpos = len(string) - else: - m.endpos = endpos - return m - def search(self, object string, int pos=0, int endpos=-1): """Scan through string looking for a match, and return a corresponding Match instance. Return None if no position in the string matches.""" @@ -71,31 +17,31 @@ cdef class Pattern: """Matches zero or more characters at the beginning of the string.""" return self._search(string, pos, endpos, _re2.ANCHOR_START) - def _print_pattern(self): - cdef _re2.cpp_string * s - s = <_re2.cpp_string *>_re2.addressofs(self.re_pattern.pattern()) - print(cpp_to_bytes(s[0]).decode('utf8')) - - def finditer(self, object string, int pos=0, int endpos=-1): - """Yield all non-overlapping matches of pattern in string as Match - objects.""" + def findall(self, object string, int pos=0, int endpos=-1): + """Return all non-overlapping matches of pattern in string as a list + of strings.""" + cdef char * cstring cdef Py_ssize_t size cdef int result - cdef char * cstring cdef _re2.StringPiece * sp cdef Match m + cdef list resultlist = [] cdef int encoded = 0 - string = unicode_to_bytes(string, &encoded) - if pystring_to_cstring(string, &cstring, &size) == -1: + bytestr = unicode_to_bytes(string, &encoded) + if pystring_to_cstring(bytestr, &cstring, &size) == -1: raise TypeError("expected string or buffer") - - if endpos != -1 and endpos < size: + if encoded and (pos or endpos != -1): + utf8indices(cstring, size, &pos, &endpos) + if pos > size: + return [] + if 0 <= endpos < size: size = endpos sp = new _re2.StringPiece(cstring, size) while True: + # FIXME: can probably avoid creating Match objects m = Match(self, self.groups + 1) with nogil: result = self.re_pattern.Match( @@ -112,12 +58,18 @@ cdef class Pattern: self.re_pattern.NamedCapturingGroups()) m.nmatches = self.groups + 1 m.string = string + m.bytestr = bytestr + m.cstring = cstring + m.size = size m.pos = pos if endpos == -1: - m.endpos = len(string) + m.endpos = size else: m.endpos = endpos - yield m + if self.groups > 1: + resultlist.append(m.groups("")) + else: + resultlist.append(m.group(self.groups)) if pos == size: break # offset the pos to move to the next point @@ -126,29 +78,31 @@ cdef class Pattern: else: pos = m.matches[0].data() - cstring + m.matches[0].length() del sp + return resultlist - def findall(self, object string, int pos=0, endpos=None): - """Return all non-overlapping matches of pattern in string as a list - of strings.""" + def finditer(self, object string, int pos=0, int endpos=-1): + """Yield all non-overlapping matches of pattern in string as Match + objects.""" cdef Py_ssize_t size cdef int result cdef char * cstring cdef _re2.StringPiece * sp cdef Match m - cdef list resultlist = [] cdef int encoded = 0 - string = unicode_to_bytes(string, &encoded) - if pystring_to_cstring(string, &cstring, &size) == -1: + bytestr = unicode_to_bytes(string, &encoded) + if pystring_to_cstring(bytestr, &cstring, &size) == -1: raise TypeError("expected string or buffer") - - if endpos is not None and endpos < size: + if encoded and (pos or endpos != -1): + utf8indices(cstring, size, &pos, &endpos) + if pos > size: + return + if 0 <= endpos < size: size = endpos sp = new _re2.StringPiece(cstring, size) while True: - # FIXME: can probably avoid creating Match objects m = Match(self, self.groups + 1) with nogil: result = self.re_pattern.Match( @@ -165,15 +119,15 @@ cdef class Pattern: self.re_pattern.NamedCapturingGroups()) m.nmatches = self.groups + 1 m.string = string + m.bytestr = bytestr + m.cstring = cstring + m.size = size m.pos = pos - if endpos is not None: - m.endpos = len(string) + if endpos == -1: + m.endpos = size else: m.endpos = endpos - if self.groups > 1: - resultlist.append(m.groups("")) - else: - resultlist.append(m.group(self.groups)) + yield m if pos == size: break # offset the pos to move to the next point @@ -182,7 +136,6 @@ cdef class Pattern: else: pos = m.matches[0].data() - cstring + m.matches[0].length() del sp - return resultlist def split(self, string, int maxsplit=0): """split(string[, maxsplit = 0]) --> list @@ -202,8 +155,8 @@ cdef class Pattern: if maxsplit < 0: maxsplit = 0 - string = unicode_to_bytes(string, &encoded) - if pystring_to_cstring(string, &cstring, &size) == -1: + bytestr = unicode_to_bytes(string, &encoded) + if pystring_to_cstring(bytestr, &cstring, &size) == -1: raise TypeError("expected string or buffer") matches = _re2.new_StringPiece_array(self.groups + 1) @@ -292,7 +245,7 @@ cdef class Pattern: # This is a callback, so let's use the custom function return self._subn_callback(repl, string, count) - string = unicode_to_bytes(string, &string_encoded) + bytestr = unicode_to_bytes(string, &string_encoded) repl = unicode_to_bytes(repl, &repl_encoded) if pystring_to_cstring(repl, &cstring, &size) == -1: raise TypeError("expected string or buffer") @@ -332,7 +285,7 @@ cdef class Pattern: else: sp = new _re2.StringPiece(cstring, size) - input_str = new _re2.cpp_string(string) + input_str = new _re2.cpp_string(bytestr) if not count: total_replacements = _re2.pattern_GlobalReplace( input_str, self.re_pattern[0], sp[0]) @@ -373,8 +326,8 @@ cdef class Pattern: if count < 0: count = 0 - string = unicode_to_bytes(string, &encoded) - if pystring_to_cstring(string, &cstring, &size) == -1: + bytestr = unicode_to_bytes(string, &encoded) + if pystring_to_cstring(bytestr, &cstring, &size) == -1: raise TypeError("expected string or buffer") sp = new _re2.StringPiece(cstring, size) @@ -406,6 +359,9 @@ cdef class Pattern: self.re_pattern.NamedCapturingGroups()) m.nmatches = self.groups + 1 m.string = string + m.bytestr = bytestr + m.cstring = cstring + m.size = size resultlist.append(callback(m) or '') num_repl += 1 @@ -422,235 +378,102 @@ cdef class Pattern: finally: del sp -_cache = {} -_cache_repl = {} + cdef _search(self, object string, int pos, int endpos, + _re2.re2_Anchor anchoring): + """Scan through string looking for a match, and return a corresponding + Match instance. Return None if no position in the string matches.""" + cdef Py_ssize_t size + cdef int result + cdef char * cstring + cdef int encoded = 0 + cdef _re2.StringPiece * sp + cdef Match m = Match(self, self.groups + 1) -_MAXCACHE = 100 + if 0 <= endpos <= pos: + return None -def compile(pattern, int flags=0, int max_mem=8388608): - cachekey = (type(pattern), pattern, flags) - if cachekey in _cache: - return _cache[cachekey] - p = _compile(pattern, flags, max_mem) + bytestr = unicode_to_bytes(string, &encoded) + if pystring_to_cstring(bytestr, &cstring, &size) == -1: + raise TypeError("expected string or buffer") + if encoded and (pos or endpos != -1): + utf8indices(cstring, size, &pos, &endpos) + if pos > size: + return None + if 0 <= endpos < size: + size = endpos - if len(_cache) >= _MAXCACHE: - _cache.popitem() - _cache[cachekey] = p - return p + sp = new _re2.StringPiece(cstring, size) + with nogil: + result = self.re_pattern.Match( + sp[0], + <int>pos, + <int>size, + anchoring, + m.matches, + self.groups + 1) + del sp + if result == 0: + return None + m.encoded = encoded + m.named_groups = _re2.addressof(self.re_pattern.NamedCapturingGroups()) + m.nmatches = self.groups + 1 + m.string = string + m.bytestr = bytestr + m.cstring = cstring + m.size = size + m.pos = pos + if endpos == -1: + m.endpos = size + else: + m.endpos = endpos + return m -WHITESPACE = b' \t\n\r\v\f' + def __repr__(self): + return 're2.compile(%r, %r)' % (self.pattern, self.flags) + def _dump_pattern(self): + cdef _re2.cpp_string * s + s = <_re2.cpp_string *>_re2.addressofs(self.re_pattern.pattern()) + return cpp_to_bytes(s[0]).decode('utf8') -cdef class Tokenizer: - cdef bytes string - cdef bytes next - cdef int length - cdef int index + def __dealloc__(self): + del self.re_pattern - def __init__(self, bytes string): - self.string = string - self.length = len(string) - self.index = 0 - self._next() - cdef _next(self): - cdef bytes ch - if self.index >= self.length: - self.next = None - return - ch = self.string[self.index:self.index + 1] - if ch[0:1] == b'\\': - if self.index + 2 > self.length: - raise RegexError("bogus escape (end of line)") - ch = self.string[self.index:self.index + 2] - self.index += 1 - self.index += 1 - # FIXME: return indices instead of creating new bytes objects - self.next = ch - - cdef bytes get(self): - cdef bytes this = self.next - self._next() - return this - - -def prepare_pattern(object pattern, int flags): - cdef bytearray result = bytearray() - cdef bytes this - cdef Tokenizer source = Tokenizer(pattern) - - if flags & (_S | _M): - result.extend(b'(?') - if flags & _S: - result.append(b's') - if flags & _M: - result.append(b'm') - result.append(b')') - - while True: - this = source.get() - if this is None: +cdef utf8indices(char * cstring, int size, int *pos, int *endpos): + """Convert unicode indices pos and endpos to UTF-8 indices. + + If the indices are out of range, leave them unchanged.""" + cdef unsigned char * data = <unsigned char *>cstring + cdef int newpos = pos[0], newendpos = -1 + cdef int cpos = 0, upos = 0 + while cpos < size: + if data[cpos] < 0x80: + cpos += 1 + upos += 1 + elif data[cpos] < 0xe0: + cpos += 2 + upos += 1 + elif data[cpos] < 0xf0: + cpos += 3 + upos += 1 + else: + cpos += 4 + upos += 1 + # wide unicode chars get 2 unichars when python is compiled + # with --enable-unicode=ucs2 + # TODO: verify this + emit_ifndef_py_unicode_wide() + upos += 1 + emit_endif() + + if upos == pos[0]: + newpos = cpos + if endpos[0] == -1: + break + elif upos == endpos[0]: + newendpos = cpos break - if flags & _X: - if this in WHITESPACE: - continue - if this == b"#": - while True: - this = source.get() - if this in (None, b'\n'): - break - continue - - if this[0:1] != b'[' and this[0:1] != b'\\': - result.extend(this) - continue - - elif this == b'[': - result.extend(this) - while True: - this = source.get() - if this is None: - raise RegexError("unexpected end of regular expression") - elif this == b']': - result.extend(this) - break - elif this[0:1] == b'\\': - if flags & _U: - if this[1:2] == b'd': - result.extend(br'\p{Nd}') - elif this[1:2] == b'w': - result.extend(br'_\p{L}\p{Nd}') - elif this[1:2] == b's': - result.extend(br'\s\p{Z}') - elif this[1:2] == b'D': - result.extend(br'\P{Nd}') - elif this[1:2] == b'W': - # Since \w and \s are made out of several character - # groups, I don't see a way to convert their - # complements into a group without rewriting the - # whole expression, which seems too complicated. - raise CharClassProblemException(repr(this)) - elif this[1:2] == b'S': - raise CharClassProblemException(repr(this)) - else: - result.extend(this) - else: - result.extend(this) - else: - result.extend(this) - elif this[0:1] == b'\\': - if b'8' <= this[1:2] <= b'9': - raise BackreferencesException('%r %r' % (this, pattern)) - elif b'1' <= this[1:2] <= b'7': - if source.next and source.next in b'1234567': - this += source.get() - if source.next and source.next in b'1234567': - # all clear, this is an octal escape - result.extend(this) - else: - raise BackreferencesException('%r %r' % (this, pattern)) - else: - raise BackreferencesException('%r %r' % (this, pattern)) - elif flags & _U: - if this[1:2] == b'd': - result.extend(br'\p{Nd}') - elif this[1:2] == b'w': - result.extend(br'[_\p{L}\p{Nd}]') - elif this[1:2] == b's': - result.extend(br'[\s\p{Z}]') - elif this[1:2] == b'D': - result.extend(br'[^\p{Nd}]') - elif this[1:2] == b'W': - result.extend(br'[^_\p{L}\p{Nd}]') - elif this[1:2] == b'S': - result.extend(br'[^\s\p{Z}]') - else: - result.extend(this) - else: - result.extend(this) - - return <bytes>result - - -def _compile(object pattern, int flags=0, int max_mem=8388608): - """Compile a regular expression pattern, returning a pattern object.""" - cdef char * string - cdef Py_ssize_t length - cdef _re2.StringPiece * s - cdef _re2.Options opts - cdef int error_code - cdef int encoded = 0 - - if isinstance(pattern, (Pattern, SREPattern)): - if flags: - raise ValueError( - 'Cannot process flags argument with a compiled pattern') - return pattern - - cdef object original_pattern = pattern - pattern = unicode_to_bytes(pattern, &encoded) - try: - pattern = prepare_pattern(pattern, flags) - except BackreferencesException: - error_msg = "Backreferences not supported" - if current_notification == <int>FALLBACK_EXCEPTION: - # Raise an exception regardless of the type of error. - raise RegexError(error_msg) - elif current_notification == <int>FALLBACK_WARNING: - warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) - return re.compile(original_pattern, flags) - except CharClassProblemException: - error_msg = "\W and \S not supported inside character classes" - if current_notification == <int>FALLBACK_EXCEPTION: - # Raise an exception regardless of the type of error. - raise RegexError(error_msg) - elif current_notification == <int>FALLBACK_WARNING: - warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) - return re.compile(original_pattern, flags) - - # Set the options given the flags above. - if flags & _I: - opts.set_case_sensitive(0); - - opts.set_max_mem(max_mem) - opts.set_log_errors(0) - opts.set_encoding(_re2.EncodingUTF8) - - # We use this function to get the proper length of the string. - if pystring_to_cstring(pattern, &string, &length) == -1: - raise TypeError("first argument must be a string or compiled pattern") - s = new _re2.StringPiece(string, length) - - cdef _re2.RE2 *re_pattern - with nogil: - re_pattern = new _re2.RE2(s[0], opts) - - if not re_pattern.ok(): - # Something went wrong with the compilation. - del s - error_msg = cpp_to_bytes(re_pattern.error()) - error_code = re_pattern.error_code() - del re_pattern - if current_notification == <int>FALLBACK_EXCEPTION: - # Raise an exception regardless of the type of error. - raise RegexError(error_msg) - elif error_code not in (_re2.ErrorBadPerlOp, _re2.ErrorRepeatSize, - _re2.ErrorBadEscape): - # Raise an error because these will not be fixed by using the - # ``re`` module. - raise RegexError(error_msg) - elif current_notification == <int>FALLBACK_WARNING: - warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) - return re.compile(original_pattern, flags) - - cdef Pattern pypattern = Pattern() - pypattern.pattern = original_pattern - pypattern.re_pattern = re_pattern - pypattern.groups = re_pattern.NumberOfCapturingGroups() - pypattern.encoded = encoded - pypattern.flags = flags - del s - return pypattern - - + pos[0] = newpos + endpos[0] = newendpos diff --git a/src/re2.pyx b/src/re2.pyx index 41e2607a..3ba6285e 100644 --- a/src/re2.pyx +++ b/src/re2.pyx @@ -25,8 +25,12 @@ import warnings cimport _re2 cimport cpython.unicode from cython.operator cimport preincrement as inc, dereference as deref -from cpython.buffer cimport PyBUF_SIMPLE, Py_buffer -from cpython.buffer cimport PyObject_GetBuffer, PyBuffer_Release +from cpython.buffer cimport Py_buffer, PyObject_GetBuffer, PyBuffer_Release + +cdef extern from *: + cdef void emit_ifndef_py_unicode_wide "#if !defined(Py_UNICODE_WIDE) //" () + cdef void emit_endif "#endif //" () + # Import re flags to be compatible. I, M, S, U, X, L = re.I, re.M, re.S, re.U, re.X, re.L @@ -50,84 +54,15 @@ cdef int current_notification = FALLBACK_QUIETLY # Type of compiled re object from Python stdlib SREPattern = type(re.compile('')) -include "match.pxi" -include "pattern.pxi" - - -class RegexError(re.error): - """Some error has occured in compilation of the regex.""" - pass - -error = RegexError - - -class BackreferencesException(Exception): - """Search pattern contains backreferences.""" - pass - +_cache = {} +_cache_repl = {} -class CharClassProblemException(Exception): - """Search pattern contains unsupported character class.""" - pass - - -def set_fallback_notification(level): - """Set the fallback notification to a level; one of: - FALLBACK_QUIETLY - FALLBACK_WARNING - FALLBACK_EXCEPTION - """ - global current_notification - level = int(level) - if level < 0 or level > 2: - raise ValueError("This function expects a valid notification level.") - current_notification = level +_MAXCACHE = 100 -cdef bytes cpp_to_bytes(_re2.cpp_string input): - """Convert from a std::string object to a python string.""" - # By taking the slice we go to the right size, - # despite spurious or missing null characters. - return input.c_str()[:input.length()] - - -cdef inline unicode cpp_to_unicode(_re2.cpp_string input): - """Convert a std::string object to a unicode string.""" - return cpython.unicode.PyUnicode_DecodeUTF8( - input.c_str(), input.length(), 'strict') - - -cdef inline unicode char_to_unicode(_re2.const_char_ptr input, int length): - """Convert a C string to a unicode string.""" - return cpython.unicode.PyUnicode_DecodeUTF8(input, length, 'strict') - - -cdef inline unicode_to_bytes(object pystring, int * encoded): - """Convert a unicode string to a utf8 bytes object, if necessary. - - If pystring is a bytes string or a buffer, return unchanged.""" - if cpython.unicode.PyUnicode_Check(pystring): - pystring = cpython.unicode.PyUnicode_EncodeUTF8( - cpython.unicode.PyUnicode_AS_UNICODE(pystring), - cpython.unicode.PyUnicode_GET_SIZE(pystring), - "strict") - encoded[0] = 1 - else: - encoded[0] = 0 - return pystring - - -cdef inline int pystring_to_cstring( - object pystring, char ** cstring, Py_ssize_t * length): - """Get a C string from a bytes/buffer object.""" - # FIXME: use Python 3 buffer interface when available - return _re2.PyObject_AsCharBuffer( - pystring, <_re2.const_char_ptr*> cstring, length) - - -cdef extern from *: - cdef void emit_ifndef_py_unicode_wide "#if !defined(Py_UNICODE_WIDE) //" () - cdef void emit_endif "#endif //" () +include "compile.pxi" +include "pattern.pxi" +include "match.pxi" def search(pattern, string, int flags=0): @@ -186,22 +121,83 @@ def subn(pattern, repl, string, int count=0): return compile(pattern).subn(repl, string, count) -_alphanum = {} -for c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890': - _alphanum[c] = 1 -del c - - def escape(pattern): - "Escape all non-alphanumeric characters in pattern." + """Escape all non-alphanumeric characters in pattern.""" s = list(pattern) - alphanum = _alphanum for i in range(len(pattern)): c = pattern[i] - if ord(c) < 0x80 and c not in alphanum: + if ord(c) < 0x80 and not c.isalnum(): if c == "\000": s[i] = "\\000" else: s[i] = "\\" + c return pattern[:0].join(s) + +class RegexError(re.error): + """Some error has occured in compilation of the regex.""" + pass + +error = RegexError + + +class BackreferencesException(Exception): + """Search pattern contains backreferences.""" + pass + + +class CharClassProblemException(Exception): + """Search pattern contains unsupported character class.""" + pass + + +def set_fallback_notification(level): + """Set the fallback notification to a level; one of: + FALLBACK_QUIETLY + FALLBACK_WARNING + FALLBACK_EXCEPTION + """ + global current_notification + level = int(level) + if level < 0 or level > 2: + raise ValueError("This function expects a valid notification level.") + current_notification = level + + +cdef inline bytes cpp_to_bytes(_re2.cpp_string input): + """Convert from a std::string object to a python string.""" + # By taking the slice we go to the right size, + # despite spurious or missing null characters. + return input.c_str()[:input.length()] + + +cdef inline unicode cpp_to_unicode(_re2.cpp_string input): + """Convert a std::string object to a unicode string.""" + return cpython.unicode.PyUnicode_DecodeUTF8( + input.c_str(), input.length(), 'strict') + + +cdef inline unicode char_to_unicode(_re2.const_char_ptr input, int length): + """Convert a C string to a unicode string.""" + return cpython.unicode.PyUnicode_DecodeUTF8(input, length, 'strict') + + +cdef inline unicode_to_bytes(object pystring, int * encoded): + """Convert a unicode string to a utf8 bytes object, if necessary. + + If pystring is a bytes string or a buffer, return unchanged.""" + if cpython.unicode.PyUnicode_Check(pystring): + encoded[0] = 1 + return pystring.encode('utf8') + encoded[0] = 0 + return pystring + + +cdef inline int pystring_to_cstring( + object pystring, char ** cstring, Py_ssize_t * length): + """Get a C string from a bytes/buffer object.""" + # FIXME: use Python 3 buffer interface when available + return _re2.PyObject_AsCharBuffer( + pystring, <_re2.const_char_ptr*> cstring, length) + + diff --git a/tests/issue4.txt b/tests/issue4.txt index 29f4bb41..8787e1a6 100644 --- a/tests/issue4.txt +++ b/tests/issue4.txt @@ -3,23 +3,28 @@ issue #4 >>> import re >>> import re2 - >>> re2.set_fallback_notification(re2.FALLBACK_WARNING) - >>> regex = '([\W\d_]*)(([^\W\d_]*[-\.]*)*[^\W\d_])([\W\d_]*[^\W\d_]*)' - >>> TERM_SPEC = re.compile(regex, re.UNICODE) - >>> TERM_SPEC2 = re2.compile(regex, re2.UNICODE) - >>> TERM_SPEC.search("a").groups() - ('', 'a', '', '') - >>> TERM_SPEC2.search("a").groups() - ('', 'a', '', '') + >>> re2.set_fallback_notification(re2.FALLBACK_WARNING) + >>> regex = r'([\d_]*)(([^\d_]*[-\.]*)*[^\d_])([\d_]*[^\d_]*)' + >>> TERM_SPEC = re.compile(regex) + >>> TERM_SPEC2 = re2.compile(regex) - >>> TERM_SPEC2.search(u"Hello").groups() - (u'', u'Hello', u'Hell', u'') - >>> TERM_SPEC.search(u"Hello").groups() - (u'', u'Hello', u'Hell', u'') + Unused vs. empty group: - >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) + >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) >>> regex = '(foo)?((.*).)(bar)?' - >>> re.search(regex, "a", flags=re.UNICODE).groups() + >>> re.search(regex, 'a').groups() (None, 'a', '', None) - >>> re2.search(regex, "a", flags=re.UNICODE).groups() + >>> re2.search(regex, 'a').groups() (None, 'a', '', None) + + >>> TERM_SPEC.search('a').groups() + ('', 'a', '', '') + >>> TERM_SPEC2.search('a').groups() + ('', 'a', '', '') + + Nested group: + + >>> TERM_SPEC2.search('Hello').groups() + ('', 'Hello', '', '') + >>> TERM_SPEC.search('Hello').groups() + ('', 'Hello', '', '') diff --git a/tests/match_expand.txt b/tests/match_expand.txt index 9225658b..537e18d8 100644 --- a/tests/match_expand.txt +++ b/tests/match_expand.txt @@ -6,13 +6,13 @@ expand templates as if the .sub() method was called on the pattern. >>> import re2 >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) - >>> m = re2.match(r"(\w+) (\w+)\W+(?P<title>\w+)", "Isaac Newton, physicist") - >>> m.expand(r"\2, \1") + >>> m = re2.match("(\\w+) (\\w+)\\W+(?P<title>\\w+)", "Isaac Newton, physicist") + >>> m.expand("\\2, \\1") 'Newton, Isaac' - >>> m.expand(r"\1 \g<title>") + >>> m.expand("\\1 \\g<title>") 'Isaac physicist' - >>> m.expand(r"\2, \1 \2") + >>> m.expand("\\2, \\1 \\2") 'Newton, Isaac Newton' - >>> m.expand(r"\3") + >>> m.expand("\\3") 'physicist' diff --git a/tests/namedgroups.txt b/tests/namedgroups.txt index 59199bd6..c2aefedb 100644 --- a/tests/namedgroups.txt +++ b/tests/namedgroups.txt @@ -15,13 +15,25 @@ Testing some aspects of named groups >>> m.regs ((0, 16), (0, 7), (8, 16)) + >>> m = re2.match(u"(?P<first_name>\\w+) (?P<last_name>\\w+)", u"Malcolm Reynolds") + >>> m.start(u"first_name") + 0 + >>> m.start(u"last_name") + 8 + + >>> m.span(u"last_name") + (8, 16) + >>> m.regs + ((0, 16), (0, 7), (8, 16)) + Compare patterns with and without unicode - >>> re2.compile(r"(?P<first_name>\w+) (?P<last_name>\w+)")._print_pattern() + >>> pattern = re2.compile(r"(?P<first_name>\w+) (?P<last_name>\w+)") + >>> print(pattern._dump_pattern()) (?P<first_name>\w+) (?P<last_name>\w+) >>> pattern = re2.compile(u"(?P<first_name>\\w+) (?P<last_name>\\w+)", ... re2.UNICODE) - >>> pattern._print_pattern() + >>> print(pattern._dump_pattern()) (?P<first_name>[_\p{L}\p{Nd}]+) (?P<last_name>[_\p{L}\p{Nd}]+) Make sure positions are converted properly for unicode diff --git a/tests/split.txt b/tests/split.txt index d6ceed31..a597a8c6 100644 --- a/tests/split.txt +++ b/tests/split.txt @@ -5,7 +5,8 @@ This one tests to make sure that unicode / utf8 data is parsed correctly. >>> import re2 >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) - >>> a = u'我很好, 你呢?' + >>> a = u'\u6211\u5f88\u597d, \u4f60\u5462?' + >>> re2.split(u' ', a) == [u'\u6211\u5f88\u597d,', u'\u4f60\u5462?'] True >>> re2.split(b' ', a.encode('utf8')) == [ diff --git a/tests/unicode.txt b/tests/unicode.txt index 2433aefe..86b56a94 100644 --- a/tests/unicode.txt +++ b/tests/unicode.txt @@ -54,15 +54,15 @@ Test unicode character groups >>> re.set_fallback_notification(re.FALLBACK_EXCEPTION) -Group positions need to be fixed with unicode +Positions are translated transparently between unicode and UTF-8 >>> re.search(u' (.)', u'\U0001d200xxx\u1234 x').span(1) (6, 7) >>> re.search(u' (.)', u'\U0001d200xxx\u1234 x'.encode('utf-8')).span(1) (11, 12) - -Pos and endpos also need to be corrected - - >>> re.compile(u'x').findall(u'\u1234x', 1, 2) == [u'x'] # fix pos and endpos. + >>> re.compile(u'x').findall(u'\u1234x', 1, 2) == [u'x'] + True + >>> data = u'\U0001d200xxx\u1234 x' + >>> re.search(u' (.)', data).string == data True From 27786c25b7a4ebb22190c53fa68857d477fda4fc Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Mon, 3 Aug 2015 14:52:56 +0200 Subject: [PATCH 006/114] new buffer API; precompute groups/spans; &c. - use new buffer API NB: even though the old buffer interface is deprecated from Python 2.6, the new buffer interface is only supported on mmap starting from Python 3. - avoid creating Match objects in findall() - precompute groups and spans of Match objects, so that possibly encoded version of search string (bytestr / cstring) does not need to be kept. - in _make_spans(), keep state for converting utf8 to unicode indices; so that there is no quadratic behavior on repeated invocations for different Match objects. - release GIL in pattern_Replace / pattern_GlobalReplace - prepare_pattern: loop over pattern as char * - advertise Python 3 support in setup.py, remove python 2.5 --- setup.py | 5 +- src/_re2.pxd | 11 +- src/compile.pxi | 168 +++++++++----------- src/match.pxi | 137 +++++++--------- src/pattern.pxi | 411 +++++++++++++++++++++++++----------------------- src/re2.pyx | 47 +++++- 6 files changed, 395 insertions(+), 384 deletions(-) diff --git a/setup.py b/setup.py index c90f4438..e165e6c7 100755 --- a/setup.py +++ b/setup.py @@ -98,7 +98,8 @@ def main(): runtime_library_dirs=runtime_library_dirs, )] if use_cython: - ext_modules = cythonize(ext_modules, + ext_modules = cythonize( + ext_modules, language_level=3, annotate=True, compiler_directives={ @@ -120,8 +121,8 @@ def main(): classifiers = [ 'License :: OSI Approved :: BSD License', 'Programming Language :: Cython', - 'Programming Language :: Python :: 2.5', 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 3.3', 'Intended Audience :: Developers', 'Topic :: Software Development :: Libraries :: Python Modules', ], diff --git a/src/_re2.pxd b/src/_re2.pxd index 43d5def5..67f28bdc 100644 --- a/src/_re2.pxd +++ b/src/_re2.pxd @@ -29,10 +29,6 @@ cdef extern from "<map>" namespace "std": int operator[](cpp_string) -cdef extern from "Python.h": - int PyObject_AsCharBuffer(object, const_char_ptr *, Py_ssize_t *) - char * PyString_AS_STRING(object) - cdef extern from "re2/stringpiece.h" namespace "re2": cdef cppclass StringPiece: StringPiece() @@ -43,7 +39,8 @@ cdef extern from "re2/stringpiece.h" namespace "re2": int length() ctypedef StringPiece const_StringPiece "const StringPiece" - + + cdef extern from "re2/re2.h" namespace "re2": cdef enum Anchor: UNANCHORED "RE2::UNANCHORED" @@ -124,7 +121,7 @@ cdef extern from "_re2macros.h": # cython to just break for Cpp arguments. int pattern_Replace(cpp_string *str, const_RE2 pattern, - const_StringPiece rewrite) + const_StringPiece rewrite) nogil int pattern_GlobalReplace(cpp_string *str, const_RE2 pattern, - const_StringPiece rewrite) + const_StringPiece rewrite) nogil diff --git a/src/compile.pxi b/src/compile.pxi index e7bf6a88..4ee7fd15 100644 --- a/src/compile.pxi +++ b/src/compile.pxi @@ -11,46 +11,12 @@ def compile(pattern, int flags=0, int max_mem=8388608): return p -WHITESPACE = b' \t\n\r\v\f' - - -cdef class Tokenizer: - cdef bytes string - cdef bytes next - cdef int length - cdef int index - - def __init__(self, bytes string): - self.string = string - self.length = len(string) - self.index = 0 - self._next() - - cdef _next(self): - cdef bytes ch - if self.index >= self.length: - self.next = None - return - ch = self.string[self.index:self.index + 1] - if ch[0:1] == b'\\': - if self.index + 2 > self.length: - raise RegexError("bogus escape (end of line)") - ch = self.string[self.index:self.index + 2] - self.index += 1 - self.index += 1 - # FIXME: return indices instead of creating new bytes objects - self.next = ch - - cdef bytes get(self): - cdef bytes this = self.next - self._next() - return this - - -def prepare_pattern(object pattern, int flags): +def prepare_pattern(bytes pattern, int flags): cdef bytearray result = bytearray() - cdef bytes this - cdef Tokenizer source = Tokenizer(pattern) + cdef unsigned char this, that + cdef unsigned char * cstring = pattern + cdef int size = len(pattern) + cdef int n = 0 if flags & (_S | _M): result.extend(b'(?') @@ -59,96 +25,115 @@ def prepare_pattern(object pattern, int flags): if flags & _M: result.extend(b'm') result.extend(b')') - - while True: - this = source.get() - if this is None: - break + while n < size: + this = cstring[n] if flags & _X: - if this in WHITESPACE: + if this in b' \t\n\r\f\v': + n += 1 continue - if this == b"#": + elif this == b'#': while True: - this = source.get() - if this in (None, b'\n'): + n += 1 + if n >= size: + break + this = cstring[n] + if this == b'\n': break + n += 1 continue + if this != b'[' and this != b'\\': + try: + result.append(this) + except: + raise ValueError(repr(this)) + n += 1 + continue - if this[0:1] != b'[' and this[0:1] != b'\\': - result.extend(this) + if this != b'[' and this != b'\\': + result.append(this) + n += 1 continue elif this == b'[': - result.extend(this) + result.append(this) while True: - this = source.get() - if this is None: + n += 1 + if n >= size: raise RegexError("unexpected end of regular expression") - elif this == b']': - result.extend(this) + this = cstring[n] + if this == b']': + result.append(this) break - elif this[0:1] == b'\\': + elif this == b'\\': + n += 1 + that = cstring[n] if flags & _U: - if this[1:2] == b'd': + if that == b'd': result.extend(br'\p{Nd}') - elif this[1:2] == b'w': + elif that == b'w': result.extend(br'_\p{L}\p{Nd}') - elif this[1:2] == b's': + elif that == b's': result.extend(br'\s\p{Z}') - elif this[1:2] == b'D': + elif that == b'D': result.extend(br'\P{Nd}') - elif this[1:2] == b'W': + elif that == b'W': # Since \w and \s are made out of several character # groups, I don't see a way to convert their # complements into a group without rewriting the # whole expression, which seems too complicated. - raise CharClassProblemException(repr(this)) - elif this[1:2] == b'S': - raise CharClassProblemException(repr(this)) + raise CharClassProblemException() + elif that == b'S': + raise CharClassProblemException() else: - result.extend(this) + result.append(this) + result.append(that) else: - result.extend(this) + result.append(this) + result.append(that) else: - result.extend(this) - elif this[0:1] == b'\\': - if b'8' <= this[1:2] <= b'9': - raise BackreferencesException('%r %r' % (this, pattern)) - elif b'1' <= this[1:2] <= b'7': - if source.next and source.next in b'1234567': - this += source.get() - if source.next and source.next in b'1234567': + result.append(this) + elif this == b'\\': + n += 1 + that = cstring[n] + if b'8' <= that <= b'9': + raise BackreferencesException() + elif b'1' <= that <= b'7': + if n + 1 < size and cstring[n + 1] in b'1234567': + n += 1 + if n + 1 < size and cstring[n + 1] in b'1234567': # all clear, this is an octal escape - result.extend(this) + result.append(this) + result.append(that) + result.append(cstring[n]) else: - raise BackreferencesException('%r %r' % (this, pattern)) + raise BackreferencesException() else: - raise BackreferencesException('%r %r' % (this, pattern)) + raise BackreferencesException() elif flags & _U: - if this[1:2] == b'd': + if that == b'd': result.extend(br'\p{Nd}') - elif this[1:2] == b'w': + elif that == b'w': result.extend(br'[_\p{L}\p{Nd}]') - elif this[1:2] == b's': + elif that == b's': result.extend(br'[\s\p{Z}]') - elif this[1:2] == b'D': + elif that == b'D': result.extend(br'[^\p{Nd}]') - elif this[1:2] == b'W': + elif that == b'W': result.extend(br'[^_\p{L}\p{Nd}]') - elif this[1:2] == b'S': + elif that == b'S': result.extend(br'[^\s\p{Z}]') else: - result.extend(this) + result.append(this) + result.append(that) else: - result.extend(this) - + result.append(this) + result.append(that) + n += 1 return <bytes>result def _compile(object pattern, int flags=0, int max_mem=8388608): """Compile a regular expression pattern, returning a pattern object.""" - cdef char * string - cdef Py_ssize_t length cdef _re2.StringPiece * s cdef _re2.Options opts cdef int error_code @@ -189,10 +174,7 @@ def _compile(object pattern, int flags=0, int max_mem=8388608): opts.set_log_errors(0) opts.set_encoding(_re2.EncodingUTF8) - # We use this function to get the proper length of the string. - if pystring_to_cstring(pattern, &string, &length) == -1: - raise TypeError("first argument must be a string or compiled pattern") - s = new _re2.StringPiece(string, length) + s = new _re2.StringPiece(<char *><bytes>pattern, len(pattern)) cdef _re2.RE2 *re_pattern with nogil: @@ -201,7 +183,7 @@ def _compile(object pattern, int flags=0, int max_mem=8388608): if not re_pattern.ok(): # Something went wrong with the compilation. del s - error_msg = cpp_to_bytes(re_pattern.error()) + error_msg = cpp_to_unicode(re_pattern.error()) error_code = re_pattern.error_code() del re_pattern if current_notification == FALLBACK_EXCEPTION: diff --git a/src/match.pxi b/src/match.pxi index 3eb1c359..d6066aac 100644 --- a/src/match.pxi +++ b/src/match.pxi @@ -1,20 +1,16 @@ - cdef class Match: cdef readonly Pattern re cdef readonly object string cdef readonly int pos cdef readonly int endpos + cdef readonly tuple regs cdef _re2.StringPiece * matches cdef _re2.const_stringintmap * named_groups - cdef object bytestr - cdef char * cstring - cdef int size cdef bint encoded cdef int nmatches cdef int _lastindex cdef tuple _groups - cdef tuple _spans cdef dict _named_groups cdef dict _named_indexes @@ -27,23 +23,12 @@ cdef class Match: self.nmatches = num_groups self.re = pattern_object - property regs: - def __get__(self): - if self._spans is None: - self._make_spans() - return self._spans - property lastindex: def __get__(self): - self.init_groups() - if self._lastindex < 1: - return None - else: - return self._lastindex + return None if self._lastindex < 1 else self._lastindex property lastgroup: def __get__(self): - self.init_groups() cdef _re2.stringintmapiterator it if self._lastindex < 1: @@ -52,6 +37,8 @@ cdef class Match: it = self.named_groups.begin() while it != self.named_groups.end(): if deref(it).second == self._lastindex: + if self.encoded: + return cpp_to_unicode(deref(it).first) return cpp_to_bytes(deref(it).first) inc(it) return None @@ -89,7 +76,6 @@ cdef class Match: cdef bytes _group(self, object groupnum): cdef int idx - self.init_groups() if isinstance(groupnum, int): idx = groupnum if idx > self.nmatches - 1: @@ -103,7 +89,6 @@ cdef class Match: return groupdict[groupnum] cdef dict _groupdict(self): - self.init_groups() if self._named_groups is not None: return self._named_groups @@ -124,7 +109,6 @@ cdef class Match: return result def groups(self, default=None): - self.init_groups() if self.encoded: return tuple([default if g is None else g.decode('utf8') for g in self._groups[1:]]) @@ -180,12 +164,11 @@ cdef class Match: return self.span(group)[0] def span(self, group=0): - self._make_spans() if isinstance(group, int): - if group > len(self._spans): + if group > len(self.regs): raise IndexError("no such group %d; available groups: %r" - % (group, list(range(len(self._spans))))) - return self._spans[group] + % (group, list(range(len(self.regs))))) + return self.regs[group] else: self._groupdict() if self.encoded: @@ -193,13 +176,42 @@ cdef class Match: if group not in self._named_indexes: raise IndexError("no such group %r; available groups: %r" % (group, list(self._named_indexes))) - return self._spans[self._named_indexes[group]] + return self.regs[self._named_indexes[group]] + + cdef _make_spans(self, char * cstring, int size, int * cpos, int * upos): + cdef int start, end + cdef _re2.StringPiece * piece + + spans = [] + for i in range(self.nmatches): + if self.matches[i].data() == NULL: + spans.append((-1, -1)) + else: + piece = &self.matches[i] + if piece.data() == NULL: + return (-1, -1) + start = piece.data() - cstring + end = start + piece.length() + spans.append((start, end)) + + if self.encoded: + spans = self._convert_spans(spans, cstring, size, cpos, upos) + + self.regs = tuple(spans) + + cdef list _convert_spans(self, spans, + char * cstring, int size, int * cpos, int * upos): + positions = [x for x, _ in spans] + [y for _, y in spans] + positions = sorted(set(positions)) + posdict = dict(zip(positions, self._convert_positions( + positions, cstring, size, cpos, upos))) + + return [(posdict[x], posdict[y]) for x, y in spans] - cdef list _convert_positions(self, positions): + cdef list _convert_positions(self, positions, + char * cstring, int size, int * cpos, int * upos): """Convert a list of UTF-8 byte indices to unicode indices.""" - cdef unsigned char * s = <unsigned char *>self.cstring - cdef int cpos = 0 - cdef int upos = 0 + cdef unsigned char * s = <unsigned char *>cstring cdef int i = 0 cdef list result = [] @@ -208,75 +220,42 @@ cdef class Match: i += 1 if i == len(positions): return result - if positions[i] == 0: - result.append(0) + if positions[i] == cpos[0]: + result.append(upos[0]) i += 1 if i == len(positions): return result - while cpos < self.size: - if s[cpos] < 0x80: - cpos += 1 - upos += 1 - elif s[cpos] < 0xe0: - cpos += 2 - upos += 1 - elif s[cpos] < 0xf0: - cpos += 3 - upos += 1 + while cpos[0] < size: + if s[cpos[0]] < 0x80: + cpos[0] += 1 + upos[0] += 1 + elif s[cpos[0]] < 0xe0: + cpos[0] += 2 + upos[0] += 1 + elif s[cpos[0]] < 0xf0: + cpos[0] += 3 + upos[0] += 1 else: - cpos += 4 - upos += 1 + cpos[0] += 4 + upos[0] += 1 # wide unicode chars get 2 unichars when python is compiled # with --enable-unicode=ucs2 # TODO: verify this emit_ifndef_py_unicode_wide() - upos += 1 + upos[0] += 1 emit_endif() - if positions[i] == cpos: - result.append(upos) + if positions[i] == cpos[0]: + result.append(upos[0]) i += 1 if i == len(positions): break return result - def _convert_spans(self, spans): - positions = [x for x, _ in spans] + [y for _, y in spans] - positions = sorted(set(positions)) - posdict = dict(zip(positions, self._convert_positions(positions))) - - return [(posdict[x], posdict[y]) for x, y in spans] - - cdef _make_spans(self): - if self._spans is not None: - return - - cdef int start, end - cdef _re2.StringPiece * piece - - spans = [] - for i in range(self.nmatches): - if self.matches[i].data() == NULL: - spans.append((-1, -1)) - else: - piece = &self.matches[i] - if piece.data() == NULL: - return (-1, -1) - start = piece.data() - self.cstring - end = start + piece.length() - spans.append((start, end)) - - if self.encoded: - spans = self._convert_spans(spans) - - self._spans = tuple(spans) - def __dealloc__(self): _re2.delete_StringPiece_array(self.matches) def __repr__(self): return '<re2.Match object; span=%r, match=%r>' % ( (self.pos, self.endpos), self.string) - - diff --git a/src/pattern.pxi b/src/pattern.pxi index a08760a2..9dfd85da 100644 --- a/src/pattern.pxi +++ b/src/pattern.pxi @@ -1,4 +1,3 @@ - cdef class Pattern: cdef readonly int flags cdef readonly int groups @@ -22,200 +21,213 @@ cdef class Pattern: of strings.""" cdef char * cstring cdef Py_ssize_t size + cdef Py_buffer buf cdef int result cdef _re2.StringPiece * sp - cdef Match m cdef list resultlist = [] cdef int encoded = 0 + cdef _re2.StringPiece * matches bytestr = unicode_to_bytes(string, &encoded) - if pystring_to_cstring(bytestr, &cstring, &size) == -1: - raise TypeError("expected string or buffer") - if encoded and (pos or endpos != -1): - utf8indices(cstring, size, &pos, &endpos) - if pos > size: - return [] - if 0 <= endpos < size: - size = endpos + if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1: + raise TypeError('expected string or buffer') + try: + if encoded and (pos or endpos != -1): + utf8indices(cstring, size, &pos, &endpos) + if pos > size: + return [] + if 0 <= endpos < size: + size = endpos - sp = new _re2.StringPiece(cstring, size) + sp = new _re2.StringPiece(cstring, size) + matches = _re2.new_StringPiece_array(self.groups + 1) - while True: - # FIXME: can probably avoid creating Match objects - m = Match(self, self.groups + 1) - with nogil: - result = self.re_pattern.Match( - sp[0], - <int>pos, - <int>size, - _re2.UNANCHORED, - m.matches, - self.groups + 1) - if result == 0: - break - m.encoded = encoded - m.named_groups = _re2.addressof( - self.re_pattern.NamedCapturingGroups()) - m.nmatches = self.groups + 1 - m.string = string - m.bytestr = bytestr - m.cstring = cstring - m.size = size - m.pos = pos - if endpos == -1: - m.endpos = size - else: - m.endpos = endpos - if self.groups > 1: - resultlist.append(m.groups("")) - else: - resultlist.append(m.group(self.groups)) - if pos == size: - break - # offset the pos to move to the next point - if m.matches[0].length() == 0: - pos += 1 - else: - pos = m.matches[0].data() - cstring + m.matches[0].length() + while True: + with nogil: + result = self.re_pattern.Match( + sp[0], + pos, + size, + _re2.UNANCHORED, + matches, + self.groups + 1) + if result == 0: + break + if self.groups > 1: + if encoded: + resultlist.append(tuple([ + '' if matches[i].data() is NULL else + matches[i].data()[:matches[i].length() + ].decode('utf8') + for i in range(1, self.groups + 1)])) + else: + resultlist.append(tuple([ + b'' if matches[i].data() is NULL + else matches[i].data()[:matches[i].length()] + for i in range(1, self.groups + 1)])) + else: + if encoded: + resultlist.append(matches[self.groups].data()[ + :matches[self.groups].length()].decode('utf8')) + else: + resultlist.append(matches[self.groups].data()[ + :matches[self.groups].length()]) + if pos == size: + break + # offset the pos to move to the next point + if matches[0].length() == 0: + pos += 1 + else: + pos = matches[0].data() - cstring + matches[0].length() + finally: + release_cstring(&buf) del sp return resultlist def finditer(self, object string, int pos=0, int endpos=-1): """Yield all non-overlapping matches of pattern in string as Match objects.""" + cdef char * cstring cdef Py_ssize_t size + cdef Py_buffer buf cdef int result - cdef char * cstring cdef _re2.StringPiece * sp cdef Match m cdef int encoded = 0 + cdef int cpos = 0, upos = pos bytestr = unicode_to_bytes(string, &encoded) - if pystring_to_cstring(bytestr, &cstring, &size) == -1: - raise TypeError("expected string or buffer") - if encoded and (pos or endpos != -1): - utf8indices(cstring, size, &pos, &endpos) - if pos > size: - return - if 0 <= endpos < size: - size = endpos + if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1: + raise TypeError('expected string or buffer') + try: + if encoded and (pos or endpos != -1): + utf8indices(cstring, size, &pos, &endpos) + cpos = pos + if pos > size: + return + if 0 <= endpos < size: + size = endpos - sp = new _re2.StringPiece(cstring, size) + sp = new _re2.StringPiece(cstring, size) - while True: - m = Match(self, self.groups + 1) - with nogil: - result = self.re_pattern.Match( - sp[0], - <int>pos, - <int>size, - _re2.UNANCHORED, - m.matches, - self.groups + 1) - if result == 0: - break - m.encoded = encoded - m.named_groups = _re2.addressof( - self.re_pattern.NamedCapturingGroups()) - m.nmatches = self.groups + 1 - m.string = string - m.bytestr = bytestr - m.cstring = cstring - m.size = size - m.pos = pos - if endpos == -1: - m.endpos = size - else: - m.endpos = endpos - yield m - if pos == size: - break - # offset the pos to move to the next point - if m.matches[0].length() == 0: - pos += 1 - else: - pos = m.matches[0].data() - cstring + m.matches[0].length() + while True: + m = Match(self, self.groups + 1) + m.string = string + with nogil: + result = self.re_pattern.Match( + sp[0], + <int>pos, + <int>size, + _re2.UNANCHORED, + m.matches, + self.groups + 1) + if result == 0: + break + m.encoded = encoded + m.named_groups = _re2.addressof( + self.re_pattern.NamedCapturingGroups()) + m.nmatches = self.groups + 1 + m.pos = pos + if endpos == -1: + m.endpos = size + else: + m.endpos = endpos + m._make_spans(cstring, size, &cpos, &upos) + m.init_groups() + yield m + if pos == size: + break + # offset the pos to move to the next point + if m.matches[0].length() == 0: + pos += 1 + else: + pos = m.matches[0].data() - cstring + m.matches[0].length() + finally: + release_cstring(&buf) del sp def split(self, string, int maxsplit=0): """split(string[, maxsplit = 0]) --> list Split a string by the occurrences of the pattern.""" + cdef char * cstring cdef Py_ssize_t size cdef int result cdef int pos = 0 cdef int lookahead = 0 cdef int num_split = 0 - cdef char * cstring cdef _re2.StringPiece * sp cdef _re2.StringPiece * matches cdef list resultlist = [] cdef int encoded = 0 + cdef Py_buffer buf if maxsplit < 0: maxsplit = 0 bytestr = unicode_to_bytes(string, &encoded) - if pystring_to_cstring(bytestr, &cstring, &size) == -1: - raise TypeError("expected string or buffer") + if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1: + raise TypeError('expected string or buffer') + try: + matches = _re2.new_StringPiece_array(self.groups + 1) + sp = new _re2.StringPiece(cstring, size) - matches = _re2.new_StringPiece_array(self.groups + 1) - sp = new _re2.StringPiece(cstring, size) + while True: + with nogil: + result = self.re_pattern.Match( + sp[0], + pos + lookahead, + size, + _re2.UNANCHORED, + matches, + self.groups + 1) + if result == 0: + break - while True: - with nogil: - result = self.re_pattern.Match( - sp[0], - <int>(pos + lookahead), - <int>size, - _re2.UNANCHORED, - matches, - self.groups + 1) - if result == 0: - break + match_start = matches[0].data() - cstring + match_end = match_start + matches[0].length() - match_start = matches[0].data() - cstring - match_end = match_start + matches[0].length() + # If an empty match, just look ahead until you find something + if match_start == match_end: + if pos + lookahead == size: + break + lookahead += 1 + continue - # If an empty match, just look ahead until you find something - if match_start == match_end: - if pos + lookahead == size: + if encoded: + resultlist.append( + char_to_unicode(&sp.data()[pos], match_start - pos)) + else: + resultlist.append(sp.data()[pos:match_start]) + if self.groups > 0: + for group in range(self.groups): + if matches[group + 1].data() == NULL: + resultlist.append(None) + else: + if encoded: + resultlist.append(char_to_unicode( + matches[group + 1].data(), + matches[group + 1].length())) + else: + resultlist.append(matches[group + 1].data()[: + matches[group + 1].length()]) + + # offset the pos to move to the next point + pos = match_end + lookahead = 0 + + num_split += 1 + if maxsplit and num_split >= maxsplit: break - lookahead += 1 - continue if encoded: resultlist.append( - char_to_unicode(&sp.data()[pos], match_start - pos)) + char_to_unicode(&sp.data()[pos], sp.length() - pos)) else: - resultlist.append(sp.data()[pos:match_start]) - if self.groups > 0: - for group in range(self.groups): - if matches[group + 1].data() == NULL: - resultlist.append(None) - else: - if encoded: - resultlist.append(char_to_unicode( - matches[group + 1].data(), - matches[group + 1].length())) - else: - resultlist.append(matches[group + 1].data()[: - matches[group + 1].length()]) - - # offset the pos to move to the next point - pos = match_end - lookahead = 0 - - num_split += 1 - if maxsplit and num_split >= maxsplit: - break - - if encoded: - resultlist.append( - char_to_unicode(&sp.data()[pos], sp.length() - pos)) - else: - resultlist.append(sp.data()[pos:]) - _re2.delete_StringPiece_array(matches) + resultlist.append(sp.data()[pos:]) + _re2.delete_StringPiece_array(matches) + finally: + release_cstring(&buf) del sp return resultlist @@ -232,12 +244,12 @@ cdef class Pattern: Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.""" - cdef Py_ssize_t size cdef char * cstring + cdef Py_ssize_t size cdef _re2.cpp_string * fixed_repl cdef _re2.StringPiece * sp cdef _re2.cpp_string * input_str - cdef total_replacements = 0 + cdef int total_replacements = 0 cdef int string_encoded = 0 cdef int repl_encoded = 0 @@ -247,8 +259,8 @@ cdef class Pattern: bytestr = unicode_to_bytes(string, &string_encoded) repl = unicode_to_bytes(repl, &repl_encoded) - if pystring_to_cstring(repl, &cstring, &size) == -1: - raise TypeError("expected string or buffer") + cstring = <bytes>repl + size = len(repl) fixed_repl = NULL cdef _re2.const_char_ptr s = cstring @@ -285,13 +297,16 @@ cdef class Pattern: else: sp = new _re2.StringPiece(cstring, size) + # FIXME: bytestr may be a buffer input_str = new _re2.cpp_string(bytestr) if not count: - total_replacements = _re2.pattern_GlobalReplace( - input_str, self.re_pattern[0], sp[0]) + with nogil: + total_replacements = _re2.pattern_GlobalReplace( + input_str, self.re_pattern[0], sp[0]) elif count == 1: - total_replacements = _re2.pattern_Replace( - input_str, self.re_pattern[0], sp[0]) + with nogil: + total_replacements = _re2.pattern_Replace( + input_str, self.re_pattern[0], sp[0]) else: del fixed_repl del input_str @@ -312,34 +327,35 @@ cdef class Pattern: # This function is probably the hardest to implement correctly. # This is my first attempt, but if anybody has a better solution, # please help out. + cdef char * cstring cdef Py_ssize_t size + cdef Py_buffer buf cdef int result cdef int endpos cdef int pos = 0 cdef int encoded = 0 cdef int num_repl = 0 - cdef char * cstring cdef _re2.StringPiece * sp cdef Match m cdef list resultlist = [] + cdef int cpos = 0, upos = 0 if count < 0: count = 0 bytestr = unicode_to_bytes(string, &encoded) - if pystring_to_cstring(bytestr, &cstring, &size) == -1: - raise TypeError("expected string or buffer") - + if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1: + raise TypeError('expected string or buffer') sp = new _re2.StringPiece(cstring, size) - try: while True: m = Match(self, self.groups + 1) + m.string = string with nogil: result = self.re_pattern.Match( sp[0], - <int>pos, - <int>size, + pos, + size, _re2.UNANCHORED, m.matches, self.groups + 1) @@ -358,10 +374,8 @@ cdef class Pattern: m.named_groups = _re2.addressof( self.re_pattern.NamedCapturingGroups()) m.nmatches = self.groups + 1 - m.string = string - m.bytestr = bytestr - m.cstring = cstring - m.size = size + m._make_spans(cstring, size, &cpos, &upos) + m.init_groups() resultlist.append(callback(m) or '') num_repl += 1 @@ -376,57 +390,64 @@ cdef class Pattern: resultlist.append(sp.data()[pos:]) return (b''.join(resultlist), num_repl) finally: + release_cstring(&buf) del sp cdef _search(self, object string, int pos, int endpos, _re2.re2_Anchor anchoring): """Scan through string looking for a match, and return a corresponding Match instance. Return None if no position in the string matches.""" + cdef char * cstring cdef Py_ssize_t size + cdef Py_buffer buf cdef int result - cdef char * cstring cdef int encoded = 0 cdef _re2.StringPiece * sp cdef Match m = Match(self, self.groups + 1) + cdef int cpos = 0, upos = pos if 0 <= endpos <= pos: return None bytestr = unicode_to_bytes(string, &encoded) - if pystring_to_cstring(bytestr, &cstring, &size) == -1: - raise TypeError("expected string or buffer") - if encoded and (pos or endpos != -1): - utf8indices(cstring, size, &pos, &endpos) - if pos > size: - return None - if 0 <= endpos < size: - size = endpos + if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1: + raise TypeError('expected string or buffer') + try: + if encoded and (pos or endpos != -1): + utf8indices(cstring, size, &pos, &endpos) + cpos = pos + if pos > size: + return None + if 0 <= endpos < size: + size = endpos - sp = new _re2.StringPiece(cstring, size) - with nogil: - result = self.re_pattern.Match( - sp[0], - <int>pos, - <int>size, - anchoring, - m.matches, - self.groups + 1) + sp = new _re2.StringPiece(cstring, size) + with nogil: + result = self.re_pattern.Match( + sp[0], + <int>pos, + <int>size, + anchoring, + m.matches, + self.groups + 1) + del sp + if result == 0: + return None - del sp - if result == 0: - return None - m.encoded = encoded - m.named_groups = _re2.addressof(self.re_pattern.NamedCapturingGroups()) - m.nmatches = self.groups + 1 - m.string = string - m.bytestr = bytestr - m.cstring = cstring - m.size = size - m.pos = pos - if endpos == -1: - m.endpos = size - else: - m.endpos = endpos + m.encoded = encoded + m.named_groups = _re2.addressof( + self.re_pattern.NamedCapturingGroups()) + m.nmatches = self.groups + 1 + m.string = string + m.pos = pos + if endpos == -1: + m.endpos = size + else: + m.endpos = endpos + m._make_spans(cstring, size, &cpos, &upos) + m.init_groups() + finally: + release_cstring(&buf) return m def __repr__(self): diff --git a/src/re2.pyx b/src/re2.pyx index 3ba6285e..154998f9 100644 --- a/src/re2.pyx +++ b/src/re2.pyx @@ -25,11 +25,20 @@ import warnings cimport _re2 cimport cpython.unicode from cython.operator cimport preincrement as inc, dereference as deref -from cpython.buffer cimport Py_buffer, PyObject_GetBuffer, PyBuffer_Release +from cpython.buffer cimport Py_buffer, PyBUF_SIMPLE +from cpython.buffer cimport PyObject_GetBuffer, PyBuffer_Release +from cpython.version cimport PY_MAJOR_VERSION cdef extern from *: cdef void emit_ifndef_py_unicode_wide "#if !defined(Py_UNICODE_WIDE) //" () + cdef void emit_if_py2 "#if PY_MAJOR_VERSION == 2 //" () + cdef void emit_else "#else //" () cdef void emit_endif "#endif //" () + ctypedef char* const_char_ptr "const char*" + +cdef extern from "Python.h": + int PY_MAJOR_VERSION + int PyObject_AsCharBuffer(object, const_char_ptr *, Py_ssize_t *) # Import re flags to be compatible. @@ -194,10 +203,32 @@ cdef inline unicode_to_bytes(object pystring, int * encoded): cdef inline int pystring_to_cstring( - object pystring, char ** cstring, Py_ssize_t * length): - """Get a C string from a bytes/buffer object.""" - # FIXME: use Python 3 buffer interface when available - return _re2.PyObject_AsCharBuffer( - pystring, <_re2.const_char_ptr*> cstring, length) - - + object pystring, char ** cstring, Py_ssize_t * size, + Py_buffer * buf): + """Get a pointer from a bytes/buffer object.""" + cdef int result + cstring[0] = NULL + size[0] = 0 + + emit_if_py2() + result = PyObject_AsCharBuffer(pystring, <const_char_ptr *> cstring, size) + + emit_else() + # Python 3 + result = PyObject_GetBuffer(pystring, buf, PyBUF_SIMPLE) + if result == 0: + cstring[0] = <char *>buf.buf + size[0] = buf.len + + emit_endif() + return result + + +cdef inline void release_cstring(Py_buffer *buf): + """Release buffer if necessary.""" + emit_if_py2() + pass + emit_else() + # Python 3 + PyBuffer_Release(buf) + emit_endif() From 4dd5ec0bbb6135d2bbb0999bfe130d7f49a0b0ac Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Mon, 3 Aug 2015 17:31:19 +0200 Subject: [PATCH 007/114] pickle Patterns; non-char buffers; &c. - support pickling of Pattern objects - support buffers from objects that do not support char buffer (e.g., integer arrays); does not make a lot of sense, but this is what re does. - enable benchmarks shown in readme by default; fix typo. - fix typo in test_re.py --- src/pattern.pxi | 10 ++++++++-- src/re2.pyx | 5 +++-- tests/performance.py | 40 ++++++++++++++++++++-------------------- tests/test_re.py | 2 +- 4 files changed, 32 insertions(+), 25 deletions(-) diff --git a/src/pattern.pxi b/src/pattern.pxi index 9dfd85da..20034a03 100644 --- a/src/pattern.pxi +++ b/src/pattern.pxi @@ -450,14 +450,20 @@ cdef class Pattern: release_cstring(&buf) return m - def __repr__(self): - return 're2.compile(%r, %r)' % (self.pattern, self.flags) + def scanner(a): + raise NotImplementedError def _dump_pattern(self): cdef _re2.cpp_string * s s = <_re2.cpp_string *>_re2.addressofs(self.re_pattern.pattern()) return cpp_to_bytes(s[0]).decode('utf8') + def __repr__(self): + return 're2.compile(%r, %r)' % (self.pattern, self.flags) + + def __reduce__(self): + return (compile, (self.pattern, self.flags)) + def __dealloc__(self): del self.re_pattern diff --git a/src/re2.pyx b/src/re2.pyx index 154998f9..09c6ccc0 100644 --- a/src/re2.pyx +++ b/src/re2.pyx @@ -35,10 +35,11 @@ cdef extern from *: cdef void emit_else "#else //" () cdef void emit_endif "#endif //" () ctypedef char* const_char_ptr "const char*" + ctypedef void* const_void_ptr "const void*" cdef extern from "Python.h": int PY_MAJOR_VERSION - int PyObject_AsCharBuffer(object, const_char_ptr *, Py_ssize_t *) + int PyObject_AsReadBuffer(object, const_void_ptr *, Py_ssize_t *) # Import re flags to be compatible. @@ -211,7 +212,7 @@ cdef inline int pystring_to_cstring( size[0] = 0 emit_if_py2() - result = PyObject_AsCharBuffer(pystring, <const_char_ptr *> cstring, size) + result = PyObject_AsReadBuffer(pystring, <const_void_ptr *>cstring, size) emit_else() # Python 3 diff --git a/tests/performance.py b/tests/performance.py index a944ee7c..85258443 100755 --- a/tests/performance.py +++ b/tests/performance.py @@ -77,14 +77,14 @@ def benchmarks_to_ReST(benchmarks): if regex is not None: headers = ('Test', 'Description', '# total runs', '``re`` time(s)', '``re2`` time(s)', '% ``re`` time', '``regex`` time(s)', '% ``regex`` time') else: - headers = ('Test', 'Description', '# total runs', '``re`` time(s)', '``re2`` time(s)', '% ``regex`` time') + headers = ('Test', 'Description', '# total runs', '``re`` time(s)', '``re2`` time(s)', '% ``re`` time') table = [headers] f = lambda x: "%0.3f" % x p = lambda x: "%0.2f%%" % (x * 100) for test, data in benchmarks.items(): row = [test, data["re"][1], str(data["re"][3]), f(data["re"][0]), f(data["re2"][0])] - + row.append(p(data["re2"][0] / data["re"][0])) if regex is not None: row.extend((f(data["regex"][0]), p(data["re2"][0] / data["regex"][0]))) @@ -138,10 +138,10 @@ def getwikidata(): -#register_test("Findall URI|Email", -# r'([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?|([^ @]+)@([^ @]+)', -# 2, -# data=getwikidata()) +@register_test("Findall URI|Email", + r'([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?|([^ @]+)@([^ @]+)', + 2, + data=getwikidata()) def findall_uriemail(pattern, data): """ Find list of '([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?|([^ @]+)@([^ @]+)' @@ -150,9 +150,9 @@ def findall_uriemail(pattern, data): -#register_test("Replace WikiLinks", -# r'(\[\[(^\|)+.*?\]\])', -# data=getwikidata()) +@register_test("Replace WikiLinks", + r'(\[\[(^\|)+.*?\]\])', + data=getwikidata()) def replace_wikilinks(pattern, data): """ This test replaces links of the form [[Obama|Barack_Obama]] to Obama. @@ -161,9 +161,9 @@ def replace_wikilinks(pattern, data): -#register_test("Remove WikiLinks", -# r'(\[\[(^\|)+.*?\]\])', -# data=getwikidata()) +@register_test("Remove WikiLinks", + r'(\[\[(^\|)+.*?\]\])', + data=getwikidata()) def remove_wikilinks(pattern, data): """ This test replaces links of the form [[Obama|Barack_Obama]] to the empty string @@ -174,9 +174,9 @@ def remove_wikilinks(pattern, data): -#register_test("Remove WikiLinks", -# r'(<page[^>]*>)', -# data=getwikidata()) +@register_test("Remove WikiLinks", + r'(<page[^>]*>)', + data=getwikidata()) def split_pages(pattern, data): """ This test splits the data by the <page> tag. @@ -187,11 +187,11 @@ def split_pages(pattern, data): def getweblogdata(): return open(os.path.join(os.path.dirname(__file__), 'access.log'), 'rb') -@register_test("weblog scan", - #r'^(\S+) (\S+) (\S+) \[(\d{1,2})/(\w{3})/(\d{4}):(\d{2}):(\d{2}):(\d{2}) -(\d{4})\] "(\S+) (\S+) (\S+)" (\d+) (\d+|-) "([^"]+)" "([^"]+)"\n', -# '(\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) ? (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (".*?"|-) (\S+) (\S+) (\S+) (\S+)', - '(\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) ? (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+)', - data=getweblogdata()) +#@register_test("weblog scan", +# #r'^(\S+) (\S+) (\S+) \[(\d{1,2})/(\w{3})/(\d{4}):(\d{2}):(\d{2}):(\d{2}) -(\d{4})\] "(\S+) (\S+) (\S+)" (\d+) (\d+|-) "([^"]+)" "([^"]+)"\n', +## '(\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) ? (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (".*?"|-) (\S+) (\S+) (\S+) (\S+)', +# '(\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) ? (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+)', +# data=getweblogdata()) def weblog_matches(pattern, data): """ Match weblog data line by line. diff --git a/tests/test_re.py b/tests/test_re.py index ffe78198..ca136844 100644 --- a/tests/test_re.py +++ b/tests/test_re.py @@ -1,4 +1,4 @@ -from __future import print_function +from __future__ import print_function from test.test_support import verbose, run_unittest, import_module import re2 as re from re import Scanner From 1ea4aeb624874adb6d5811ed9a02d72056ce5f9c Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Mon, 3 Aug 2015 22:06:45 +0200 Subject: [PATCH 008/114] handle named groups in replacement string; &c. - handle named groups in replacement string - store index of named groups in Pattern object instead of Match object. - use bytearray for result in _subn_callback --- src/_re2.pxd | 1 + src/compile.pxi | 10 +++ src/match.pxi | 42 +++------- src/pattern.pxi | 211 ++++++++++++++++++++++++++++++------------------ src/re2.pyx | 7 +- 5 files changed, 158 insertions(+), 113 deletions(-) diff --git a/src/_re2.pxd b/src/_re2.pxd index 67f28bdc..47fb8785 100644 --- a/src/_re2.pxd +++ b/src/_re2.pxd @@ -8,6 +8,7 @@ cdef extern from "<string>" namespace "std": const_char_ptr c_str() int length() void push_back(char c) + void append(char * s) ctypedef string cpp_string "std::string" ctypedef string const_string "const std::string" diff --git a/src/compile.pxi b/src/compile.pxi index 4ee7fd15..1785e68e 100644 --- a/src/compile.pxi +++ b/src/compile.pxi @@ -177,6 +177,8 @@ def _compile(object pattern, int flags=0, int max_mem=8388608): s = new _re2.StringPiece(<char *><bytes>pattern, len(pattern)) cdef _re2.RE2 *re_pattern + cdef _re2.const_stringintmap * named_groups + cdef _re2.stringintmapiterator it with nogil: re_pattern = new _re2.RE2(s[0], opts) @@ -204,6 +206,14 @@ def _compile(object pattern, int flags=0, int max_mem=8388608): pypattern.groups = re_pattern.NumberOfCapturingGroups() pypattern.encoded = encoded pypattern.flags = flags + pypattern._named_indexes = {} + named_groups = _re2.addressof(re_pattern.NamedCapturingGroups()) + it = named_groups.begin() + while it != named_groups.end(): + pypattern._named_indexes[cpp_to_bytes(deref(it).first) + ] = deref(it).second + inc(it) + del s return pypattern diff --git a/src/match.pxi b/src/match.pxi index d6066aac..cf36f0bd 100644 --- a/src/match.pxi +++ b/src/match.pxi @@ -6,13 +6,11 @@ cdef class Match: cdef readonly tuple regs cdef _re2.StringPiece * matches - cdef _re2.const_stringintmap * named_groups cdef bint encoded cdef int nmatches cdef int _lastindex cdef tuple _groups cdef dict _named_groups - cdef dict _named_indexes def __init__(self, Pattern pattern_object, int num_groups): self._lastindex = -1 @@ -29,18 +27,12 @@ cdef class Match: property lastgroup: def __get__(self): - cdef _re2.stringintmapiterator it if self._lastindex < 1: return None - - it = self.named_groups.begin() - while it != self.named_groups.end(): - if deref(it).second == self._lastindex: - if self.encoded: - return cpp_to_unicode(deref(it).first) - return cpp_to_bytes(deref(it).first) - inc(it) + for name, n in self.re._named_indexes.items(): + if n == self._lastindex: + return name.decode('utf8') if self.encoded else name return None cdef init_groups(self): @@ -89,24 +81,10 @@ cdef class Match: return groupdict[groupnum] cdef dict _groupdict(self): - if self._named_groups is not None: - return self._named_groups - - cdef _re2.stringintmapiterator it - cdef dict result = {} - cdef dict indexes = {} - - self._named_groups = result - it = self.named_groups.begin() - while it != self.named_groups.end(): - indexes[cpp_to_bytes(deref(it).first)] = deref(it).second - result[cpp_to_bytes(deref(it).first)] = self._groups[ - deref(it).second] - inc(it) - - self._named_groups = result - self._named_indexes = indexes - return result + if self._named_groups is None: + self._named_groups = {name: self._groups[n] + for name, n in self.re._named_indexes.items()} + return self._named_groups def groups(self, default=None): if self.encoded: @@ -173,10 +151,10 @@ cdef class Match: self._groupdict() if self.encoded: group = group.encode('utf8') - if group not in self._named_indexes: + if group not in self.re._named_indexes: raise IndexError("no such group %r; available groups: %r" - % (group, list(self._named_indexes))) - return self.regs[self._named_indexes[group]] + % (group, list(self.re._named_indexes))) + return self.regs[self.re._named_indexes[group]] cdef _make_spans(self, char * cstring, int size, int * cpos, int * upos): cdef int start, end diff --git a/src/pattern.pxi b/src/pattern.pxi index 20034a03..d4a3dc16 100644 --- a/src/pattern.pxi +++ b/src/pattern.pxi @@ -1,11 +1,12 @@ cdef class Pattern: + cdef readonly object pattern cdef readonly int flags cdef readonly int groups - cdef readonly object pattern + cdef object __weakref__ - cdef _re2.RE2 * re_pattern cdef bint encoded - cdef object __weakref__ + cdef _re2.RE2 * re_pattern + cdef dict _named_indexes def search(self, object string, int pos=0, int endpos=-1): """Scan through string looking for a match, and return a corresponding @@ -22,7 +23,7 @@ cdef class Pattern: cdef char * cstring cdef Py_ssize_t size cdef Py_buffer buf - cdef int result + cdef int retval cdef _re2.StringPiece * sp cdef list resultlist = [] cdef int encoded = 0 @@ -44,14 +45,14 @@ cdef class Pattern: while True: with nogil: - result = self.re_pattern.Match( + retval = self.re_pattern.Match( sp[0], pos, size, _re2.UNANCHORED, matches, self.groups + 1) - if result == 0: + if retval == 0: break if self.groups > 1: if encoded: @@ -87,10 +88,15 @@ cdef class Pattern: def finditer(self, object string, int pos=0, int endpos=-1): """Yield all non-overlapping matches of pattern in string as Match objects.""" + result = self._finditer(string, pos, endpos) + next(result) # dummy value to raise error before start of generator + return result + + def _finditer(self, object string, int pos=0, int endpos=-1): cdef char * cstring cdef Py_ssize_t size cdef Py_buffer buf - cdef int result + cdef int retval cdef _re2.StringPiece * sp cdef Match m cdef int encoded = 0 @@ -110,22 +116,21 @@ cdef class Pattern: sp = new _re2.StringPiece(cstring, size) + yield while True: m = Match(self, self.groups + 1) m.string = string with nogil: - result = self.re_pattern.Match( + retval = self.re_pattern.Match( sp[0], - <int>pos, - <int>size, + pos, + size, _re2.UNANCHORED, m.matches, self.groups + 1) - if result == 0: + if retval == 0: break m.encoded = encoded - m.named_groups = _re2.addressof( - self.re_pattern.NamedCapturingGroups()) m.nmatches = self.groups + 1 m.pos = pos if endpos == -1: @@ -152,7 +157,7 @@ cdef class Pattern: Split a string by the occurrences of the pattern.""" cdef char * cstring cdef Py_ssize_t size - cdef int result + cdef int retval cdef int pos = 0 cdef int lookahead = 0 cdef int num_split = 0 @@ -174,14 +179,14 @@ cdef class Pattern: while True: with nogil: - result = self.re_pattern.Match( + retval = self.re_pattern.Match( sp[0], pos + lookahead, size, _re2.UNANCHORED, matches, self.groups + 1) - if result == 0: + if retval == 0: break match_start = matches[0].data() - cstring @@ -210,7 +215,7 @@ cdef class Pattern: matches[group + 1].length())) else: resultlist.append(matches[group + 1].data()[: - matches[group + 1].length()]) + matches[group + 1].length()]) # offset the pos to move to the next point pos = match_end @@ -236,7 +241,8 @@ cdef class Pattern: Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.""" - return self.subn(repl, string, count)[0] + cdef int num_repl = 0 + return self._subn(repl, string, count, &num_repl) def subn(self, repl, string, int count=0): """subn(repl, string[, count = 0]) --> (newstring, number of subs) @@ -244,100 +250,161 @@ cdef class Pattern: Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.""" + cdef int num_repl = 0 + result = self._subn(repl, string, count, &num_repl) + return result, num_repl + + cdef _subn(self, repl, string, int count, int *num_repl): cdef char * cstring + cdef object result cdef Py_ssize_t size - cdef _re2.cpp_string * fixed_repl + cdef _re2.cpp_string * fixed_repl = NULL cdef _re2.StringPiece * sp cdef _re2.cpp_string * input_str - cdef int total_replacements = 0 cdef int string_encoded = 0 cdef int repl_encoded = 0 + cdef int n = 0, start if callable(repl): # This is a callback, so let's use the custom function - return self._subn_callback(repl, string, count) + return self._subn_callback(repl, string, count, num_repl) - bytestr = unicode_to_bytes(string, &string_encoded) repl = unicode_to_bytes(repl, &repl_encoded) - cstring = <bytes>repl + cstring = <bytes>repl # FIXME: repl can be a buffer as well size = len(repl) - fixed_repl = NULL - cdef _re2.const_char_ptr s = cstring - cdef _re2.const_char_ptr end = s + size - cdef int c = 0 - while s < end: - c = s[0] - if (c == b'\\'): - s += 1 - if s == end: + while n < size: + if cstring[n] == b'\\': + n += 1 + if n == size: raise RegexError("Invalid rewrite pattern") - c = s[0] - if c == b'\\' or (c >= b'0' and c <= b'9'): + elif cstring[n] == b'0': # insert NUL-terminator + if fixed_repl == NULL: + fixed_repl = new _re2.cpp_string(cstring, n - 1) + fixed_repl.push_back(b'\0') # FIXME: terminates C++ string + # numbered group + elif cstring[n] == b'\\' or b'1' <= cstring[n] <= b'9': if fixed_repl != NULL: fixed_repl.push_back(b'\\') - fixed_repl.push_back(c) - else: + fixed_repl.push_back(cstring[n]) + elif cstring[n] == b'g': # named group + n += 1 + if n >= size or cstring[n] != b'<': + raise RegexError('missing group name') + start = n + 1 + if not (b'a' <= cstring[start] <= b'z' + or b'A' <= cstring[start] <= b'Z' + or b'0' <= cstring[start] <= b'9' + or cstring[start] == b'_'): + raise RegexError('bad character in group name') + while n < size: + n += 1 + if cstring[n] == b'>': + break + elif not (b'a' <= cstring[n] <= b'z' + or b'A' <= cstring[n] <= b'Z' + or b'0' <= cstring[n] <= b'9' + or cstring[n] == b'_'): + raise RegexError('bad character in group name') + if n == size: + raise RegexError('missing group name') if fixed_repl == NULL: - fixed_repl = new _re2.cpp_string( - cstring, s - cstring - 1) - if c == b'n': + fixed_repl = new _re2.cpp_string(cstring, start - 3) + if repl[start:n].isdigit(): + groupno = int(repl[start:n]) + else: + if b'0' <= cstring[start] <= b'9': + raise RegexError('bad character in group name') + if repl[start:n] not in self._named_indexes: + raise IndexError('unknown group name: %r' + % repl[start:n]) + groupno = self._named_indexes[repl[start:n]] + if groupno > 99: + raise RegexError('too many groups (> 99).') + fixed_repl.push_back(b'\\') + fixed_repl.append(str(groupno).encode('ascii')) + else: # escape sequences + if fixed_repl == NULL: + fixed_repl = new _re2.cpp_string(cstring, n - 1) + if cstring[n] == b'n': fixed_repl.push_back(b'\n') + elif cstring[n] == b'r': + fixed_repl.push_back(b'\r') + elif cstring[n] == b't': + fixed_repl.push_back(b'\t') + elif cstring[n] == b'v': + fixed_repl.push_back(b'\v') + elif cstring[n] == b'f': + fixed_repl.push_back(b'\f') + elif cstring[n] == b'a': + fixed_repl.push_back(b'\a') + elif cstring[n] == b'b': + fixed_repl.push_back(b'\b') else: fixed_repl.push_back(b'\\') fixed_repl.push_back(b'\\') - fixed_repl.push_back(c) - else: + fixed_repl.push_back(cstring[n]) + else: # copy verbatim if fixed_repl != NULL: - fixed_repl.push_back(c) + fixed_repl.push_back(cstring[n]) + n += 1 - s += 1 if fixed_repl != NULL: sp = new _re2.StringPiece(fixed_repl.c_str()) else: sp = new _re2.StringPiece(cstring, size) + bytestr = unicode_to_bytes(string, &string_encoded) # FIXME: bytestr may be a buffer input_str = new _re2.cpp_string(bytestr) + # FIXME: RE2 treats unmatched groups in repl as empty string; + # Python raises an error. if not count: with nogil: - total_replacements = _re2.pattern_GlobalReplace( + num_repl[0] = _re2.pattern_GlobalReplace( input_str, self.re_pattern[0], sp[0]) elif count == 1: with nogil: - total_replacements = _re2.pattern_Replace( + num_repl[0] = _re2.pattern_Replace( input_str, self.re_pattern[0], sp[0]) else: + # with nogil: + # for n in range(count): + # # set start position to previous + 1 + # retval = _re2.pattern_Replace( + # input_str, self.re_pattern[0], sp[0]) + # if retval == 0: + # break + # num_repl[0] += retval del fixed_repl del input_str del sp raise NotImplementedError( "So far pyre2 does not support custom replacement counts") - if string_encoded or (repl_encoded and total_replacements > 0): + if string_encoded or (repl_encoded and num_repl[0] > 0): result = cpp_to_unicode(input_str[0]) else: result = cpp_to_bytes(input_str[0]) del fixed_repl del input_str del sp - return (result, total_replacements) + return result - def _subn_callback(self, callback, string, int count=0): + cdef _subn_callback(self, callback, string, int count, int * num_repl): # This function is probably the hardest to implement correctly. # This is my first attempt, but if anybody has a better solution, # please help out. cdef char * cstring cdef Py_ssize_t size cdef Py_buffer buf - cdef int result + cdef int retval cdef int endpos cdef int pos = 0 cdef int encoded = 0 - cdef int num_repl = 0 cdef _re2.StringPiece * sp cdef Match m - cdef list resultlist = [] + cdef bytearray result = bytearray() cdef int cpos = 0, upos = 0 if count < 0: @@ -352,46 +419,34 @@ cdef class Pattern: m = Match(self, self.groups + 1) m.string = string with nogil: - result = self.re_pattern.Match( + retval = self.re_pattern.Match( sp[0], pos, size, _re2.UNANCHORED, m.matches, self.groups + 1) - if result == 0: + if retval == 0: break endpos = m.matches[0].data() - cstring - if encoded: - resultlist.append( - char_to_unicode(&sp.data()[pos], endpos - pos)) - else: - resultlist.append(sp.data()[pos:endpos]) + result.extend(sp.data()[pos:endpos]) pos = endpos + m.matches[0].length() m.encoded = encoded - m.named_groups = _re2.addressof( - self.re_pattern.NamedCapturingGroups()) m.nmatches = self.groups + 1 m._make_spans(cstring, size, &cpos, &upos) m.init_groups() - resultlist.append(callback(m) or '') + result.extend(callback(m) or b'') - num_repl += 1 - if count and num_repl >= count: + num_repl[0] += 1 + if count and num_repl[0] >= count: break - - if encoded: - resultlist.append( - char_to_unicode(&sp.data()[pos], sp.length() - pos)) - return (u''.join(resultlist), num_repl) - else: - resultlist.append(sp.data()[pos:]) - return (b''.join(resultlist), num_repl) + result.extend(sp.data()[pos:]) finally: release_cstring(&buf) del sp + return result.decode('utf8') if encoded else <bytes>result cdef _search(self, object string, int pos, int endpos, _re2.re2_Anchor anchoring): @@ -400,7 +455,7 @@ cdef class Pattern: cdef char * cstring cdef Py_ssize_t size cdef Py_buffer buf - cdef int result + cdef int retval cdef int encoded = 0 cdef _re2.StringPiece * sp cdef Match m = Match(self, self.groups + 1) @@ -423,20 +478,18 @@ cdef class Pattern: sp = new _re2.StringPiece(cstring, size) with nogil: - result = self.re_pattern.Match( + retval = self.re_pattern.Match( sp[0], - <int>pos, - <int>size, + pos, + size, anchoring, m.matches, self.groups + 1) del sp - if result == 0: + if retval == 0: return None m.encoded = encoded - m.named_groups = _re2.addressof( - self.re_pattern.NamedCapturingGroups()) m.nmatches = self.groups + 1 m.string = string m.pos = pos @@ -450,7 +503,7 @@ cdef class Pattern: release_cstring(&buf) return m - def scanner(a): + def scanner(self, _): raise NotImplementedError def _dump_pattern(self): diff --git a/src/re2.pyx b/src/re2.pyx index 09c6ccc0..818f90b3 100644 --- a/src/re2.pyx +++ b/src/re2.pyx @@ -39,6 +39,7 @@ cdef extern from *: cdef extern from "Python.h": int PY_MAJOR_VERSION + int PyObject_CheckReadBuffer(object) int PyObject_AsReadBuffer(object, const_void_ptr *, Py_ssize_t *) @@ -207,12 +208,14 @@ cdef inline int pystring_to_cstring( object pystring, char ** cstring, Py_ssize_t * size, Py_buffer * buf): """Get a pointer from a bytes/buffer object.""" - cdef int result + cdef int result = -1 cstring[0] = NULL size[0] = 0 emit_if_py2() - result = PyObject_AsReadBuffer(pystring, <const_void_ptr *>cstring, size) + if PyObject_CheckReadBuffer(pystring) == 1: + result = PyObject_AsReadBuffer( + pystring, <const_void_ptr *>cstring, size) emit_else() # Python 3 From 195661799db9d0176dd39bb75e7ccc4f7df17866 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Mon, 3 Aug 2015 22:20:47 +0200 Subject: [PATCH 009/114] workaround for sub(...) with count > 1 --- src/pattern.pxi | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/src/pattern.pxi b/src/pattern.pxi index d4a3dc16..ab4535de 100644 --- a/src/pattern.pxi +++ b/src/pattern.pxi @@ -268,6 +268,10 @@ cdef class Pattern: if callable(repl): # This is a callback, so let's use the custom function return self._subn_callback(repl, string, count, num_repl) + elif count > 1: + return self._subn_callback( + lambda m: m.expand(repl), + string, count, num_repl) repl = unicode_to_bytes(repl, &repl_encoded) cstring = <bytes>repl # FIXME: repl can be a buffer as well @@ -359,7 +363,7 @@ cdef class Pattern: input_str = new _re2.cpp_string(bytestr) # FIXME: RE2 treats unmatched groups in repl as empty string; # Python raises an error. - if not count: + if count == 0: with nogil: num_repl[0] = _re2.pattern_GlobalReplace( input_str, self.re_pattern[0], sp[0]) @@ -367,20 +371,6 @@ cdef class Pattern: with nogil: num_repl[0] = _re2.pattern_Replace( input_str, self.re_pattern[0], sp[0]) - else: - # with nogil: - # for n in range(count): - # # set start position to previous + 1 - # retval = _re2.pattern_Replace( - # input_str, self.re_pattern[0], sp[0]) - # if retval == 0: - # break - # num_repl[0] += retval - del fixed_repl - del input_str - del sp - raise NotImplementedError( - "So far pyre2 does not support custom replacement counts") if string_encoded or (repl_encoded and num_repl[0] > 0): result = cpp_to_unicode(input_str[0]) From f295a00e69bdf28750ea262eb58d66525d443c26 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Mon, 3 Aug 2015 22:35:35 +0200 Subject: [PATCH 010/114] update performance table / missing features --- README.rst | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/README.rst b/README.rst index 2b1b0feb..a57368a4 100644 --- a/README.rst +++ b/README.rst @@ -111,11 +111,11 @@ To see more details, please see the `performance script <http://github.com/axiak +-----------------+---------------------------------------------------------------------------+------------+--------------+---------------+-------------+-----------------+----------------+ |Test |Description |# total runs|``re`` time(s)|``re2`` time(s)|% ``re`` time|``regex`` time(s)|% ``regex`` time| +=================+===========================================================================+============+==============+===============+=============+=================+================+ -|Findall URI|Email|Find list of '([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?|([^ @]+)@([^ @]+)'|2 |19.961 |0.336 |1.68% |11.463 |2.93% | +|Findall URI|Email|Find list of '([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?|([^ @]+)@([^ @]+)'|2 |6.262 |0.131 |2.08% |5.119 |2.55% | +-----------------+---------------------------------------------------------------------------+------------+--------------+---------------+-------------+-----------------+----------------+ -|Replace WikiLinks|This test replaces links of the form [[Obama|Barack_Obama]] to Obama. |100 |16.032 |2.622 |16.35% |2.895 |90.54% | +|Replace WikiLinks|This test replaces links of the form [[Obama|Barack_Obama]] to Obama. |100 |4.374 |0.815 |18.63% |1.176 |69.33% | +-----------------+---------------------------------------------------------------------------+------------+--------------+---------------+-------------+-----------------+----------------+ -|Remove WikiLinks |This test splits the data by the <page> tag. |100 |15.983 |1.406 |8.80% |2.252 |62.43% | +|Remove WikiLinks |This test splits the data by the <page> tag. |100 |4.153 |0.225 |5.43% |0.537 |42.01% | +-----------------+---------------------------------------------------------------------------+------------+--------------+---------------+-------------+-----------------+----------------+ Feel free to add more speed tests to the bottom of the script and send a pull request my way! @@ -143,13 +143,6 @@ is writing comprehensive tests for this. It's actually really easy: * Replace your ``import re`` with ``import re2 as re``. * Save it as a .txt file in the tests directory. You can comment on it however you like and indent the code with 4 spaces. -Missing Features -================ - -Currently the features missing are: - -* If you use substitution methods without a callback, a non 0/1 maxsplit argument is not supported. - Credits ======= From ce4899d03725b0e2d37d1e09c41f79280d7e118b Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Wed, 5 Aug 2015 13:42:47 +0200 Subject: [PATCH 011/114] add flags parameter to toplevel functions --- src/re2.pyx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/re2.pyx b/src/re2.pyx index 818f90b3..6436a42f 100644 --- a/src/re2.pyx +++ b/src/re2.pyx @@ -104,23 +104,23 @@ def findall(pattern, string, int flags=0): return compile(pattern, flags).findall(string) -def split(pattern, string, int maxsplit=0): +def split(pattern, string, int maxsplit=0, int flags=0): """Split the source string by the occurrences of the pattern, returning a list containing the resulting substrings.""" - return compile(pattern).split(string, maxsplit) + return compile(pattern, flags).split(string, maxsplit) -def sub(pattern, repl, string, int count=0): +def sub(pattern, repl, string, int count=0, int flags=0): """Return the string obtained by replacing the leftmost non-overlapping occurrences of the pattern in string by the replacement repl. repl can be either a string or a callable; if a string, backslash escapes in it are processed. If it is a callable, it's passed the match object and must return a replacement string to be used.""" - return compile(pattern).sub(repl, string, count) + return compile(pattern, flags).sub(repl, string, count) -def subn(pattern, repl, string, int count=0): +def subn(pattern, repl, string, int count=0, int flags=0): """Return a 2-tuple containing (new_string, number). new_string is the string obtained by replacing the leftmost non-overlapping occurrences of the pattern in the source @@ -129,7 +129,7 @@ def subn(pattern, repl, string, int count=0): callable; if a string, backslash escapes in it are processed. If it is a callable, it's passed the match object and must return a replacement string to be used.""" - return compile(pattern).subn(repl, string, count) + return compile(pattern, flags).subn(repl, string, count) def escape(pattern): From cbb681bef72d46d0b6c1b5dd40e26e25b09efc48 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Mon, 10 Aug 2015 14:31:59 +0200 Subject: [PATCH 012/114] Improve substitutions, Python 3 compatibility. - when running under Python 3+, reject unicode patterns on bytes data, and vice versa, in according with general Python 3 behavior. - improve Match.expand() implementation. - The substitutions by RE2 behave differently from Python (character escapes, named groups, etc.), so use Match.expand() for anything but simple literal replacement strings. - make groupindex of pattern objects public. - add Pattern.fullmatch() method. - use #define PY2 from setup.py instead of #ifdef hack. - debug option for compilation. - use data() instead of c_str() on C++ strings, and always supply length, so that strings with null characters are supported. - bump minimum cython version due to use of bytearray typing - adapt tests to Python 3; add b and u string prefixes where needed, &c. - update README --- .gitignore | 1 + Makefile | 25 +++- README.rst | 14 +- setup.py | 15 +- src/_re2.pxd | 4 +- src/compile.pxi | 105 ++++++++------ src/match.pxi | 171 +++++++++++++++++------ src/pattern.pxi | 306 ++++++++++++++++++++--------------------- src/re2.pyx | 105 +++++++++----- tests/charliterals.txt | 45 ++++++ tests/emptygroups.txt | 33 +++++ tests/issue4.txt | 30 ---- tests/match_expand.txt | 10 ++ tests/mmap.txt | 2 +- tests/namedgroups.txt | 12 +- tests/test_re.py | 77 ++++++----- tests/unicode.txt | 8 +- 17 files changed, 602 insertions(+), 361 deletions(-) create mode 100644 tests/charliterals.txt create mode 100644 tests/emptygroups.txt delete mode 100644 tests/issue4.txt diff --git a/.gitignore b/.gitignore index 6b6d7429..4d9a9c8f 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ MANIFEST src/re2.so src/re2.cpp src/*.html +tests/re2.so tests/access.log *~ *.pyc diff --git a/Makefile b/Makefile index bf901c6d..b8930469 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,24 @@ all: - rm -rf build &>/dev/null - rm -rf src/*.so &>/dev/null - rm -rf re2.so &>/dev/null - rm -rf src/re2.cpp &>/dev/null - python setup.py --cython build_ext --inplace + python setup.py build_ext --cython + +install: + python setup.py install --user --cython test: all - cp -v re2.so tests + cp build/lib*-2.*/re2.so tests/ (cd tests && python re2_test.py) (cd tests && python test_re.py) + +py3: + python3 setup.py build_ext --cython + +test3: py3 + cp build/lib*-3.*/re2*.so tests/re2.so + (cd tests && python3 re2_test.py) + (cd tests && python3 test_re.py) + +clean: + rm -rf build &>/dev/null + rm -rf src/*.so src/*.html &>/dev/null + rm -rf re2.so tests/re2.so &>/dev/null + rm -rf src/re2.cpp &>/dev/null diff --git a/README.rst b/README.rst index a57368a4..27a3f11a 100644 --- a/README.rst +++ b/README.rst @@ -123,8 +123,18 @@ Feel free to add more speed tests to the bottom of the script and send a pull re Current Status ============== -pyre2 has only received basic testing. Please use it -and let me know if you run into any issues! +The tests show the following differences with Python's ``re`` module: + +* The ``$`` operator in Python's ``re`` matches twice if the string ends + with ``\n``. This can be simulated using ``\n?$``, except when doing + substitutions. +* ``pyre2`` and Python's ``re`` behave differently with nested and empty groups; + ``pyre2`` will return an empty string in cases where Python would return None + for a group that did not participate in a match. +* Any bytestrings with invalid UTF-8 or other non-ASCII data may behave + differently. + +Please report any further issues with ``pyre2``. Contact ======= diff --git a/setup.py b/setup.py index e165e6c7..188f23f5 100755 --- a/setup.py +++ b/setup.py @@ -4,8 +4,10 @@ import re from distutils.core import setup, Extension, Command -MINIMUM_CYTHON_VERSION = '0.15' - +MINIMUM_CYTHON_VERSION = '0.20' +BASE_DIR = os.path.dirname(__file__) +PY2 = sys.version_info[0] == 2 +DEBUG = False def cmp(a, b): return (a > b) - (a < b) @@ -33,7 +35,7 @@ def normalize(v): cmdclass = {'test': TestCommand} ext_files = [] -if '--cython' in sys.argv[1:] or not os.path.exists('src/re2.cpp'): +if '--cython' in sys.argv or not os.path.exists('src/re2.cpp'): # Using Cython try: sys.argv.remove('--cython') @@ -66,8 +68,6 @@ def normalize(v): else: re2_prefix = "" -BASE_DIR = os.path.dirname(__file__) - def get_long_description(): readme_f = open(os.path.join(BASE_DIR, "README.rst")) readme = readme_f.read() @@ -82,6 +82,7 @@ def get_authors(): return ', '.join(authors) def main(): + os.environ['GCC_COLORS'] = 'auto' include_dirs = [os.path.join(re2_prefix, "include")] if re2_prefix else [] libraries = ["re2"] library_dirs = [os.path.join(re2_prefix, "lib")] if re2_prefix else [] @@ -96,6 +97,10 @@ def main(): libraries=libraries, library_dirs=library_dirs, runtime_library_dirs=runtime_library_dirs, + extra_compile_args=['-DPY2=%d' % PY2] + + (['-g', '-O0'] if DEBUG else + ['-O3', '-march=native', '-DNDEBUG']), + extra_link_args=['-g'] if DEBUG else ['-DNDEBUG'], )] if use_cython: ext_modules = cythonize( diff --git a/src/_re2.pxd b/src/_re2.pxd index 47fb8785..c23fca18 100644 --- a/src/_re2.pxd +++ b/src/_re2.pxd @@ -5,8 +5,8 @@ cdef extern from "<string>" namespace "std": cdef cppclass string: string(char *) string(char *, size_t n) - const_char_ptr c_str() - int length() + const_char_ptr data() + size_t length() void push_back(char c) void append(char * s) diff --git a/src/compile.pxi b/src/compile.pxi index 1785e68e..93c565f9 100644 --- a/src/compile.pxi +++ b/src/compile.pxi @@ -13,8 +13,8 @@ def compile(pattern, int flags=0, int max_mem=8388608): def prepare_pattern(bytes pattern, int flags): cdef bytearray result = bytearray() - cdef unsigned char this, that cdef unsigned char * cstring = pattern + cdef unsigned char this, that cdef int size = len(pattern) cdef int n = 0 @@ -41,19 +41,11 @@ def prepare_pattern(bytes pattern, int flags): break n += 1 continue - if this != b'[' and this != b'\\': - try: - result.append(this) - except: - raise ValueError(repr(this)) - n += 1 - continue if this != b'[' and this != b'\\': result.append(this) n += 1 continue - elif this == b'[': result.append(this) while True: @@ -97,18 +89,24 @@ def prepare_pattern(bytes pattern, int flags): that = cstring[n] if b'8' <= that <= b'9': raise BackreferencesException() - elif b'1' <= that <= b'7': - if n + 1 < size and cstring[n + 1] in b'1234567': - n += 1 - if n + 1 < size and cstring[n + 1] in b'1234567': - # all clear, this is an octal escape - result.append(this) - result.append(that) - result.append(cstring[n]) - else: - raise BackreferencesException() + elif isoct(that): + if (n + 2 < size and isoct(cstring[n + 1]) + and isoct(cstring[n + 2])): + # all clear, this is an octal escape + result.extend(cstring[n - 1:n + 3]) + n += 2 + else: + raise BackreferencesException() + elif that == b'x': + if (n + 2 < size and ishex(cstring[n + 1]) + and ishex(cstring[n + 2])): + # hex escape + result.extend(cstring[n - 1:n + 3]) + n += 2 else: raise BackreferencesException() + elif that == b'Z': + result.extend(b'\\z') elif flags & _U: if that == b'd': result.extend(br'\p{Nd}') @@ -129,15 +127,29 @@ def prepare_pattern(bytes pattern, int flags): result.append(this) result.append(that) n += 1 - return <bytes>result + return bytes(result) def _compile(object pattern, int flags=0, int max_mem=8388608): """Compile a regular expression pattern, returning a pattern object.""" + def fallback(pattern, flags, error_msg): + """Raise error, warn, or simply return fallback from re module.""" + error_msg = "re.LOCALE not supported" + if current_notification == FALLBACK_EXCEPTION: + raise RegexError(error_msg) + elif current_notification == FALLBACK_WARNING: + warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) + try: + result = re.compile(pattern, flags) + except re.error as err: + raise RegexError(*err.args) + return result + cdef _re2.StringPiece * s cdef _re2.Options opts cdef int error_code cdef int encoded = 0 + cdef object original_pattern if isinstance(pattern, (Pattern, SREPattern)): if flags: @@ -145,26 +157,28 @@ def _compile(object pattern, int flags=0, int max_mem=8388608): 'Cannot process flags argument with a compiled pattern') return pattern - cdef object original_pattern = pattern - pattern = unicode_to_bytes(pattern, &encoded) + original_pattern = pattern + if flags & _L: + return fallback(original_pattern, flags, "re.LOCALE not supported") + pattern = unicode_to_bytes(pattern, &encoded, -1) + newflags = flags + if not PY2: + if not encoded and flags & _U: + pass + # raise ValueError("can't use UNICODE flag with a bytes pattern") + elif encoded and not (flags & re.ASCII): + newflags = flags | re.UNICODE + elif encoded and flags & re.ASCII: + newflags = flags & ~re.UNICODE + tryagain = 0 try: - pattern = prepare_pattern(pattern, flags) + pattern = prepare_pattern(pattern, newflags) except BackreferencesException: - error_msg = "Backreferences not supported" - if current_notification == FALLBACK_EXCEPTION: - # Raise an exception regardless of the type of error. - raise RegexError(error_msg) - elif current_notification == FALLBACK_WARNING: - warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) - return re.compile(original_pattern, flags) + return fallback(original_pattern, flags, "Backreferences not supported") except CharClassProblemException: - error_msg = "\W and \S not supported inside character classes" - if current_notification == FALLBACK_EXCEPTION: - # Raise an exception regardless of the type of error. - raise RegexError(error_msg) - elif current_notification == FALLBACK_WARNING: - warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) - return re.compile(original_pattern, flags) + return fallback(original_pattern, flags, + "\W and \S not supported inside character classes") + # Set the options given the flags above. if flags & _I: @@ -192,7 +206,8 @@ def _compile(object pattern, int flags=0, int max_mem=8388608): # Raise an exception regardless of the type of error. raise RegexError(error_msg) elif error_code not in (_re2.ErrorBadPerlOp, _re2.ErrorRepeatSize, - _re2.ErrorBadEscape): + # _re2.ErrorBadEscape, + _re2.ErrorPatternTooLarge): # Raise an error because these will not be fixed by using the # ``re`` module. raise RegexError(error_msg) @@ -206,15 +221,19 @@ def _compile(object pattern, int flags=0, int max_mem=8388608): pypattern.groups = re_pattern.NumberOfCapturingGroups() pypattern.encoded = encoded pypattern.flags = flags - pypattern._named_indexes = {} + pypattern.groupindex = {} named_groups = _re2.addressof(re_pattern.NamedCapturingGroups()) it = named_groups.begin() while it != named_groups.end(): - pypattern._named_indexes[cpp_to_bytes(deref(it).first) - ] = deref(it).second + if encoded: + pypattern.groupindex[cpp_to_unicode(deref(it).first) + ] = deref(it).second + else: + pypattern.groupindex[cpp_to_bytes(deref(it).first) + ] = deref(it).second inc(it) + if flags & re.DEBUG: + print(repr(pypattern._dump_pattern())) del s return pypattern - - diff --git a/src/match.pxi b/src/match.pxi index cf36f0bd..02414934 100644 --- a/src/match.pxi +++ b/src/match.pxi @@ -12,15 +12,6 @@ cdef class Match: cdef tuple _groups cdef dict _named_groups - def __init__(self, Pattern pattern_object, int num_groups): - self._lastindex = -1 - self._groups = None - self.pos = 0 - self.endpos = -1 - self.matches = _re2.new_StringPiece_array(num_groups + 1) - self.nmatches = num_groups - self.re = pattern_object - property lastindex: def __get__(self): return None if self._lastindex < 1 else self._lastindex @@ -30,15 +21,21 @@ cdef class Match: if self._lastindex < 1: return None - for name, n in self.re._named_indexes.items(): + for name, n in self.re.groupindex.items(): if n == self._lastindex: - return name.decode('utf8') if self.encoded else name + return name return None - cdef init_groups(self): - if self._groups is not None: - return + def __init__(self, Pattern pattern_object, int num_groups): + self._lastindex = -1 + self._groups = None + self.pos = 0 + self.endpos = -1 + self.matches = _re2.new_StringPiece_array(num_groups + 1) + self.nmatches = num_groups + self.re = pattern_object + cdef _init_groups(self): cdef list groups = [] cdef int i cdef _re2.const_char_ptr last_end = NULL @@ -77,13 +74,13 @@ cdef class Match: groupdict = self._groupdict() if groupnum not in groupdict: raise IndexError("no such group %r; available groups: %r" - % (groupnum, list(groupdict.keys()))) + % (groupnum, list(groupdict))) return groupdict[groupnum] cdef dict _groupdict(self): if self._named_groups is None: self._named_groups = {name: self._groups[n] - for name, n in self.re._named_indexes.items()} + for name, n in self.re.groupindex.items()} return self._named_groups def groups(self, default=None): @@ -101,39 +98,131 @@ cdef class Match: else: # len(args) > 1: return tuple([self.group(i) for i in args]) if self.encoded: - return self._group(groupnum).decode('utf8') + result = self._group(groupnum) + return None if result is None else result.decode('utf8') return self._group(groupnum) def groupdict(self): result = self._groupdict() if self.encoded: - return {a.decode('utf8') if isinstance(a, bytes) else a: - b.decode('utf8') for a, b in result.items()} + return {a: b.decode('utf8') for a, b in result.items()} return result def expand(self, object template): """Expand a template with groups.""" - # TODO - This can be optimized to work a bit faster in C. + cdef bytearray result = bytearray() if isinstance(template, unicode): - template = template.encode('utf8') - items = template.split(b'\\') - for i, item in enumerate(items[1:]): - if item[0:1].isdigit(): - # Number group - if item[0] == b'0': - items[i + 1] = b'\x00' + item[1:] # ??? + if not PY2 and not self.encoded: + raise ValueError( + 'cannot expand unicode template on bytes pattern') + templ = template.encode('utf8') + else: + if not PY2 and self.encoded: + raise ValueError( + 'cannot expand bytes template on unicode pattern') + templ = bytes(template) + self._expand(templ, result) + return result.decode('utf8') if self.encoded else bytes(result) + + cdef _expand(self, bytes templ, bytearray result): + """Expand template by appending to an existing bytearray. + Everything remains UTF-8 encoded.""" + cdef char * cstring + cdef int n = 0, prev = 0, size + + # NB: cstring is used to get single characters, to avoid difference in + # Python 2/3 behavior of bytes objects. + cstring = templ + size = len(templ) + while True: + prev = n + n = templ.find(b'\\', prev) + if n == -1: + result.extend(templ[prev:]) + break + result.extend(templ[prev:n]) + n += 1 + if (n + 2 < size and cstring[n] == b'x' + and ishex(cstring[n + 1]) and ishex(cstring[n + 2])): + # hex char reference \x1f + result.append(int(templ[n + 1:n + 3], base=16) & 255) + n += 3 + elif (n + 2 < size and isoct(cstring[n]) and isoct(cstring[n + 1]) + and isoct(cstring[n + 2])): + # octal char reference \123 + result.append(int(templ[n:n + 3], base=8) & 255) + n += 3 + elif cstring[n] == b'0': + if n + 1 < size and isoct(cstring[n + 1]): + # 2 character octal: \01 + result.append(int(templ[n:n + 2], base=8)) + n += 2 + else: # nul-terminator literal \0 + result.append(b'\0') + n += 1 + elif b'0' <= cstring[n] <= b'9': # numeric group reference + if n + 1 < size and isdigit(cstring[n + 1]): + # 2 digit group ref \12 + groupno = int(templ[n:n + 2]) + n += 2 + else: + # 1 digit group ref \1 + groupno = int(templ[n:n + 1]) + n += 1 + if groupno <= self.re.groups: + groupval = self._group(groupno) + if groupval is None: + raise RegexError('unmatched group') + result.extend(groupval) else: - items[i + 1] = self._group(int(item[0:1])) + item[1:] - elif item[:2] == b'g<' and b'>' in item: - # This is a named group - name, rest = item[2:].split(b'>', 1) - items[i + 1] = self._group(name) + rest + raise RegexError('invalid group reference.') + elif cstring[n] == b'g': # named group reference + n += 1 + if n >= size or cstring[n] != b'<': + raise RegexError('missing group name') + n += 1 + start = n + while cstring[n] != b'>': + if not isident(cstring[n]): + raise RegexError('bad character in group name') + n += 1 + if n >= size: + raise RegexError('unterminated group name') + if templ[start:n].isdigit(): + name = int(templ[start:n]) + elif isdigit(cstring[start]): + raise RegexError('bad character in group name') + else: + name = templ[start:n] + if self.encoded: + name = name.decode('utf8') + groupval = self._group(name) + if groupval is None: + raise RegexError('unmatched group') + result.extend(groupval) + n += 1 else: - # This isn't a template at all - items[i + 1] = b'\\' + item - if self.encoded: - return b''.join(items).decode('utf8') - return b''.join(items) + if cstring[n] == b'n': + result.append(b'\n') + elif cstring[n] == b'r': + result.append(b'\r') + elif cstring[n] == b't': + result.append(b'\t') + elif cstring[n] == b'v': + result.append(b'\v') + elif cstring[n] == b'f': + result.append(b'\f') + elif cstring[n] == b'a': + result.append(b'\a') + elif cstring[n] == b'b': + result.append(b'\b') + elif cstring[n] == b'\\': + result.append(b'\\') + else: # copy verbatim + result.append(b'\\') + result.append(cstring[n]) + n += 1 + return bytes(result) def end(self, group=0): return self.span(group)[1] @@ -149,12 +238,10 @@ cdef class Match: return self.regs[group] else: self._groupdict() - if self.encoded: - group = group.encode('utf8') - if group not in self.re._named_indexes: + if group not in self.re.groupindex: raise IndexError("no such group %r; available groups: %r" - % (group, list(self.re._named_indexes))) - return self.regs[self.re._named_indexes[group]] + % (group, list(self.re.groupindex))) + return self.regs[self.re.groupindex[group]] cdef _make_spans(self, char * cstring, int size, int * cpos, int * upos): cdef int start, end diff --git a/src/pattern.pxi b/src/pattern.pxi index ab4535de..726086ea 100644 --- a/src/pattern.pxi +++ b/src/pattern.pxi @@ -1,12 +1,12 @@ cdef class Pattern: - cdef readonly object pattern + cdef readonly object pattern # original pattern in Python format cdef readonly int flags - cdef readonly int groups + cdef readonly int groups # number of groups + cdef readonly dict groupindex # name => group number cdef object __weakref__ - cdef bint encoded + cdef bint encoded # True if this was originally a Unicode pattern cdef _re2.RE2 * re_pattern - cdef dict _named_indexes def search(self, object string, int pos=0, int endpos=-1): """Scan through string looking for a match, and return a corresponding @@ -17,6 +17,67 @@ cdef class Pattern: """Matches zero or more characters at the beginning of the string.""" return self._search(string, pos, endpos, _re2.ANCHOR_START) + def fullmatch(self, object string, int pos=0, int endpos=-1): + """"fullmatch(string[, pos[, endpos]]) --> Match object or None." + + Matches the entire string.""" + return self._search(string, pos, endpos, _re2.ANCHOR_BOTH) + + cdef _search(self, object string, int pos, int endpos, + _re2.re2_Anchor anchoring): + """Scan through string looking for a match, and return a corresponding + Match instance. Return None if no position in the string matches.""" + cdef char * cstring + cdef Py_ssize_t size + cdef Py_buffer buf + cdef int retval + cdef int encoded = 0 + cdef _re2.StringPiece * sp + cdef Match m = Match(self, self.groups + 1) + cdef int cpos = 0, upos = pos + + if 0 <= endpos <= pos: + return None + + bytestr = unicode_to_bytes(string, &encoded, self.encoded) + if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1: + raise TypeError('expected string or buffer') + try: + if encoded and (pos or endpos != -1): + utf8indices(cstring, size, &pos, &endpos) + cpos = pos + if pos > size: + return None + if 0 <= endpos < size: + size = endpos + + sp = new _re2.StringPiece(cstring, size) + with nogil: + retval = self.re_pattern.Match( + sp[0], + pos, + size, + anchoring, + m.matches, + self.groups + 1) + del sp + if retval == 0: + return None + + m.encoded = encoded + m.nmatches = self.groups + 1 + m.string = string + m.pos = pos + if endpos == -1: + m.endpos = size + else: + m.endpos = endpos + m._make_spans(cstring, size, &cpos, &upos) + m._init_groups() + finally: + release_cstring(&buf) + return m + def findall(self, object string, int pos=0, int endpos=-1): """Return all non-overlapping matches of pattern in string as a list of strings.""" @@ -29,7 +90,7 @@ cdef class Pattern: cdef int encoded = 0 cdef _re2.StringPiece * matches - bytestr = unicode_to_bytes(string, &encoded) + bytestr = unicode_to_bytes(string, &encoded, self.encoded) if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1: raise TypeError('expected string or buffer') try: @@ -88,7 +149,7 @@ cdef class Pattern: def finditer(self, object string, int pos=0, int endpos=-1): """Yield all non-overlapping matches of pattern in string as Match objects.""" - result = self._finditer(string, pos, endpos) + result = iter(self._finditer(string, pos, endpos)) next(result) # dummy value to raise error before start of generator return result @@ -102,7 +163,7 @@ cdef class Pattern: cdef int encoded = 0 cdef int cpos = 0, upos = pos - bytestr = unicode_to_bytes(string, &encoded) + bytestr = unicode_to_bytes(string, &encoded, self.encoded) if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1: raise TypeError('expected string or buffer') try: @@ -138,7 +199,7 @@ cdef class Pattern: else: m.endpos = endpos m._make_spans(cstring, size, &cpos, &upos) - m.init_groups() + m._init_groups() yield m if pos == size: break @@ -170,7 +231,7 @@ cdef class Pattern: if maxsplit < 0: maxsplit = 0 - bytestr = unicode_to_bytes(string, &encoded) + bytestr = unicode_to_bytes(string, &encoded, self.encoded) if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1: raise TypeError('expected string or buffer') try: @@ -255,6 +316,7 @@ cdef class Pattern: return result, num_repl cdef _subn(self, repl, string, int count, int *num_repl): + cdef bytes repl_b cdef char * cstring cdef object result cdef Py_ssize_t size @@ -263,112 +325,38 @@ cdef class Pattern: cdef _re2.cpp_string * input_str cdef int string_encoded = 0 cdef int repl_encoded = 0 - cdef int n = 0, start if callable(repl): # This is a callback, so let's use the custom function return self._subn_callback(repl, string, count, num_repl) - elif count > 1: - return self._subn_callback( - lambda m: m.expand(repl), - string, count, num_repl) - - repl = unicode_to_bytes(repl, &repl_encoded) - cstring = <bytes>repl # FIXME: repl can be a buffer as well - size = len(repl) - - while n < size: - if cstring[n] == b'\\': - n += 1 - if n == size: - raise RegexError("Invalid rewrite pattern") - elif cstring[n] == b'0': # insert NUL-terminator - if fixed_repl == NULL: - fixed_repl = new _re2.cpp_string(cstring, n - 1) - fixed_repl.push_back(b'\0') # FIXME: terminates C++ string - # numbered group - elif cstring[n] == b'\\' or b'1' <= cstring[n] <= b'9': - if fixed_repl != NULL: - fixed_repl.push_back(b'\\') - fixed_repl.push_back(cstring[n]) - elif cstring[n] == b'g': # named group - n += 1 - if n >= size or cstring[n] != b'<': - raise RegexError('missing group name') - start = n + 1 - if not (b'a' <= cstring[start] <= b'z' - or b'A' <= cstring[start] <= b'Z' - or b'0' <= cstring[start] <= b'9' - or cstring[start] == b'_'): - raise RegexError('bad character in group name') - while n < size: - n += 1 - if cstring[n] == b'>': - break - elif not (b'a' <= cstring[n] <= b'z' - or b'A' <= cstring[n] <= b'Z' - or b'0' <= cstring[n] <= b'9' - or cstring[n] == b'_'): - raise RegexError('bad character in group name') - if n == size: - raise RegexError('missing group name') - if fixed_repl == NULL: - fixed_repl = new _re2.cpp_string(cstring, start - 3) - if repl[start:n].isdigit(): - groupno = int(repl[start:n]) - else: - if b'0' <= cstring[start] <= b'9': - raise RegexError('bad character in group name') - if repl[start:n] not in self._named_indexes: - raise IndexError('unknown group name: %r' - % repl[start:n]) - groupno = self._named_indexes[repl[start:n]] - if groupno > 99: - raise RegexError('too many groups (> 99).') - fixed_repl.push_back(b'\\') - fixed_repl.append(str(groupno).encode('ascii')) - else: # escape sequences - if fixed_repl == NULL: - fixed_repl = new _re2.cpp_string(cstring, n - 1) - if cstring[n] == b'n': - fixed_repl.push_back(b'\n') - elif cstring[n] == b'r': - fixed_repl.push_back(b'\r') - elif cstring[n] == b't': - fixed_repl.push_back(b'\t') - elif cstring[n] == b'v': - fixed_repl.push_back(b'\v') - elif cstring[n] == b'f': - fixed_repl.push_back(b'\f') - elif cstring[n] == b'a': - fixed_repl.push_back(b'\a') - elif cstring[n] == b'b': - fixed_repl.push_back(b'\b') - else: - fixed_repl.push_back(b'\\') - fixed_repl.push_back(b'\\') - fixed_repl.push_back(cstring[n]) - else: # copy verbatim - if fixed_repl != NULL: - fixed_repl.push_back(cstring[n]) - n += 1 - - if fixed_repl != NULL: - sp = new _re2.StringPiece(fixed_repl.c_str()) - else: - sp = new _re2.StringPiece(cstring, size) - bytestr = unicode_to_bytes(string, &string_encoded) - # FIXME: bytestr may be a buffer - input_str = new _re2.cpp_string(bytestr) - # FIXME: RE2 treats unmatched groups in repl as empty string; + repl_b = unicode_to_bytes(repl, &repl_encoded, self.encoded) + if not repl_encoded and not isinstance(repl, bytes): + repl_b = bytes(repl) # coerce buffer to bytes object + + if count > 1 or (b'\\' if PY2 else <char>b'\\') in repl_b: + # Limit on number of substitution or replacement string contains + # escape sequences, handle with Match.expand() implementation. + # RE2 does support simple numeric group references \1, \2, + # but the number of differences with Python behavior is + # non-trivial. + return self._subn_expand(repl_b, string, count, num_repl) + + cstring = repl_b + size = len(repl_b) + sp = new _re2.StringPiece(cstring, size) + + bytestr = unicode_to_bytes(string, &string_encoded, self.encoded) + if not string_encoded and not isinstance(bytestr, bytes): + bytestr = bytes(bytestr) # coerce buffer to bytes object + input_str = new _re2.cpp_string(bytestr, len(bytestr)) + # NB: RE2 treats unmatched groups in repl as empty string; # Python raises an error. - if count == 0: - with nogil: + with nogil: + if count == 0: num_repl[0] = _re2.pattern_GlobalReplace( input_str, self.re_pattern[0], sp[0]) - elif count == 1: - with nogil: + elif count == 1: num_repl[0] = _re2.pattern_Replace( input_str, self.re_pattern[0], sp[0]) @@ -376,9 +364,7 @@ cdef class Pattern: result = cpp_to_unicode(input_str[0]) else: result = cpp_to_bytes(input_str[0]) - del fixed_repl - del input_str - del sp + del fixed_repl, input_str, sp return result cdef _subn_callback(self, callback, string, int count, int * num_repl): @@ -400,7 +386,7 @@ cdef class Pattern: if count < 0: count = 0 - bytestr = unicode_to_bytes(string, &encoded) + bytestr = unicode_to_bytes(string, &encoded, self.encoded) if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1: raise TypeError('expected string or buffer') sp = new _re2.StringPiece(cstring, size) @@ -426,8 +412,12 @@ cdef class Pattern: m.encoded = encoded m.nmatches = self.groups + 1 m._make_spans(cstring, size, &cpos, &upos) - m.init_groups() - result.extend(callback(m) or b'') + m._init_groups() + tmp = callback(m) + if tmp: + result.extend(tmp.encode('utf8') if encoded else tmp) + else: + result.extend(b'') num_repl[0] += 1 if count and num_repl[0] >= count: @@ -436,72 +426,76 @@ cdef class Pattern: finally: release_cstring(&buf) del sp - return result.decode('utf8') if encoded else <bytes>result + return result.decode('utf8') if encoded else bytes(result) - cdef _search(self, object string, int pos, int endpos, - _re2.re2_Anchor anchoring): - """Scan through string looking for a match, and return a corresponding - Match instance. Return None if no position in the string matches.""" + cdef _subn_expand(self, bytes repl, string, int count, int * num_repl): + """Perform ``count`` substitutions with replacement string and + Match.expand.""" cdef char * cstring cdef Py_ssize_t size cdef Py_buffer buf cdef int retval + cdef int endpos + cdef int pos = 0 cdef int encoded = 0 cdef _re2.StringPiece * sp - cdef Match m = Match(self, self.groups + 1) - cdef int cpos = 0, upos = pos + cdef Match m + cdef bytearray result = bytearray() - if 0 <= endpos <= pos: - return None + if count < 0: + count = 0 - bytestr = unicode_to_bytes(string, &encoded) + bytestr = unicode_to_bytes(string, &encoded, self.encoded) if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1: raise TypeError('expected string or buffer') + sp = new _re2.StringPiece(cstring, size) try: - if encoded and (pos or endpos != -1): - utf8indices(cstring, size, &pos, &endpos) - cpos = pos - if pos > size: - return None - if 0 <= endpos < size: - size = endpos + while True: + m = Match(self, self.groups + 1) + m.string = string + with nogil: + retval = self.re_pattern.Match( + sp[0], + pos, + size, + _re2.UNANCHORED, + m.matches, + self.groups + 1) + if retval == 0: + break - sp = new _re2.StringPiece(cstring, size) - with nogil: - retval = self.re_pattern.Match( - sp[0], - pos, - size, - anchoring, - m.matches, - self.groups + 1) - del sp - if retval == 0: - return None + endpos = m.matches[0].data() - cstring + result.extend(sp.data()[pos:endpos]) + pos = endpos + m.matches[0].length() - m.encoded = encoded - m.nmatches = self.groups + 1 - m.string = string - m.pos = pos - if endpos == -1: - m.endpos = size - else: - m.endpos = endpos - m._make_spans(cstring, size, &cpos, &upos) - m.init_groups() + m.encoded = encoded + m.nmatches = self.groups + 1 + m._init_groups() + m._expand(repl, result) + + num_repl[0] += 1 + if count and num_repl[0] >= count: + break + result.extend(sp.data()[pos:]) finally: release_cstring(&buf) - return m + del sp + return result.decode('utf8') if encoded else bytes(result) - def scanner(self, _): - raise NotImplementedError + def scanner(self, arg): + return re.compile(self.pattern).scanner(arg) + # raise NotImplementedError def _dump_pattern(self): cdef _re2.cpp_string * s s = <_re2.cpp_string *>_re2.addressofs(self.re_pattern.pattern()) - return cpp_to_bytes(s[0]).decode('utf8') + if self.encoded: + return cpp_to_bytes(s[0]).decode('utf8') + return cpp_to_bytes(s[0]) def __repr__(self): + if self.flags == 0: + return 're2.compile(%r)' % self.pattern return 're2.compile(%r, %r)' % (self.pattern, self.flags) def __reduce__(self): diff --git a/src/re2.pyx b/src/re2.pyx index 6436a42f..12331d7a 100644 --- a/src/re2.pyx +++ b/src/re2.pyx @@ -30,9 +30,8 @@ from cpython.buffer cimport PyObject_GetBuffer, PyBuffer_Release from cpython.version cimport PY_MAJOR_VERSION cdef extern from *: + cdef int PY2 cdef void emit_ifndef_py_unicode_wide "#if !defined(Py_UNICODE_WIDE) //" () - cdef void emit_if_py2 "#if PY_MAJOR_VERSION == 2 //" () - cdef void emit_else "#else //" () cdef void emit_endif "#endif //" () ctypedef char* const_char_ptr "const char*" ctypedef void* const_void_ptr "const void*" @@ -51,6 +50,7 @@ DOTALL = re.DOTALL UNICODE = re.UNICODE VERBOSE = re.VERBOSE LOCALE = re.LOCALE +DEBUG = re.DEBUG FALLBACK_QUIETLY = 0 FALLBACK_WARNING = 1 @@ -88,6 +88,12 @@ def match(pattern, string, int flags=0): return compile(pattern, flags).match(string) +def fullmatch(pattern, string, int flags=0): + """Try to apply the pattern to the entire string, returning + a match object, or None if no match was found.""" + return compile(pattern, flags).fullmatch(string) + + def finditer(pattern, string, int flags=0): """Return an list of all non-overlapping matches in the string. For each match, the iterator returns a match object. @@ -134,15 +140,27 @@ def subn(pattern, repl, string, int count=0, int flags=0): def escape(pattern): """Escape all non-alphanumeric characters in pattern.""" - s = list(pattern) + cdef bint uni = isinstance(pattern, unicode) + cdef list s + if PY2 or uni: + s = list(pattern) + else: + s = [bytes([c]) for c in pattern] for i in range(len(pattern)): - c = pattern[i] + # c = pattern[i] + c = s[i] if ord(c) < 0x80 and not c.isalnum(): - if c == "\000": - s[i] = "\\000" + if uni: + if c == u'\000': + s[i] = u'\\000' + else: + s[i] = u"\\" + c else: - s[i] = "\\" + c - return pattern[:0].join(s) + if c == b'\000': + s[i] = b'\\000' + else: + s[i] = b'\\' + c + return u''.join(s) if uni else b''.join(s) class RegexError(re.error): @@ -175,17 +193,38 @@ def set_fallback_notification(level): current_notification = level +cdef bint ishex(unsigned char c): + """Test whether ``c`` is in ``[0-9a-fA-F]``""" + return (b'0' <= c <= b'9' or b'a' <= c <= b'f' or b'A' <= c <= b'F') + + +cdef bint isoct(unsigned char c): + """Test whether ``c`` is in ``[0-7]``""" + return b'0' <= c <= b'7' + + +cdef bint isdigit(unsigned char c): + """Test whether ``c`` is in ``[0-9]``""" + return b'0' <= c <= b'9' + + +cdef bint isident(unsigned char c): + """Test whether ``c`` is in ``[a-zA-Z0-9_]``""" + return (b'a' <= c <= b'z' or b'A' <= c <= b'Z' + or b'0' <= c <= b'9' or c == b'_') + + cdef inline bytes cpp_to_bytes(_re2.cpp_string input): """Convert from a std::string object to a python string.""" # By taking the slice we go to the right size, # despite spurious or missing null characters. - return input.c_str()[:input.length()] + return input.data()[:input.length()] cdef inline unicode cpp_to_unicode(_re2.cpp_string input): """Convert a std::string object to a unicode string.""" return cpython.unicode.PyUnicode_DecodeUTF8( - input.c_str(), input.length(), 'strict') + input.data(), input.length(), 'strict') cdef inline unicode char_to_unicode(_re2.const_char_ptr input, int length): @@ -193,14 +232,22 @@ cdef inline unicode char_to_unicode(_re2.const_char_ptr input, int length): return cpython.unicode.PyUnicode_DecodeUTF8(input, length, 'strict') -cdef inline unicode_to_bytes(object pystring, int * encoded): +cdef inline unicode_to_bytes(object pystring, int * encoded, + int checkotherencoding): """Convert a unicode string to a utf8 bytes object, if necessary. - If pystring is a bytes string or a buffer, return unchanged.""" + If pystring is a bytes string or a buffer, return unchanged. + If checkotherencoding is 0 or 1 and using Python 3, raise an error + if encoded is not equal to it.""" if cpython.unicode.PyUnicode_Check(pystring): + pystring = pystring.encode('utf8') encoded[0] = 1 - return pystring.encode('utf8') - encoded[0] = 0 + else: + encoded[0] = 0 + if not PY2 and checkotherencoding > 0 and not encoded[0]: + raise TypeError("can't use a string pattern on a bytes-like object") + elif not PY2 and checkotherencoding == 0 and encoded[0]: + raise TypeError("can't use a bytes pattern on a string-like object") return pystring @@ -212,27 +259,19 @@ cdef inline int pystring_to_cstring( cstring[0] = NULL size[0] = 0 - emit_if_py2() - if PyObject_CheckReadBuffer(pystring) == 1: - result = PyObject_AsReadBuffer( - pystring, <const_void_ptr *>cstring, size) - - emit_else() - # Python 3 - result = PyObject_GetBuffer(pystring, buf, PyBUF_SIMPLE) - if result == 0: - cstring[0] = <char *>buf.buf - size[0] = buf.len - - emit_endif() + if PY2: + if PyObject_CheckReadBuffer(pystring) == 1: + result = PyObject_AsReadBuffer( + pystring, <const_void_ptr *>cstring, size) + else: # Python 3 + result = PyObject_GetBuffer(pystring, buf, PyBUF_SIMPLE) + if result == 0: + cstring[0] = <char *>buf.buf + size[0] = buf.len return result cdef inline void release_cstring(Py_buffer *buf): """Release buffer if necessary.""" - emit_if_py2() - pass - emit_else() - # Python 3 - PyBuffer_Release(buf) - emit_endif() + if not PY2: + PyBuffer_Release(buf) diff --git a/tests/charliterals.txt b/tests/charliterals.txt new file mode 100644 index 00000000..8362a9c7 --- /dev/null +++ b/tests/charliterals.txt @@ -0,0 +1,45 @@ + >>> import re2 as re + +character literals: + + >>> i = 126 + >>> re.compile(r"\%03o" % i) + re2.compile('\\176') + >>> re.compile(r"\%03o" % i)._dump_pattern() + '\\176' + >>> re.match(r"\%03o" % i, chr(i)) is None + False + >>> re.match(r"\%03o0" % i, chr(i) + "0") is None + False + >>> re.match(r"\%03o8" % i, chr(i) + "8") is None + False + >>> re.match(r"\x%02x" % i, chr(i)) is None + False + >>> re.match(r"\x%02x0" % i, chr(i) + "0") is None + False + >>> re.match(r"\x%02xz" % i, chr(i) + "z") is None + False + >>> re.match("\911", "") # doctest: +IGNORE_EXCEPTION_DETAIL +ELLIPSIS + Traceback (most recent call last): + ... + RegexError: invalid escape sequence: \9 + +character class literals: + + >>> re.match(r"[\%03o]" % i, chr(i)) is None + False + >>> re.match(r"[\%03o0]" % i, chr(i) + "0") is None + False + >>> re.match(r"[\%03o8]" % i, chr(i) + "8") is None + False + >>> re.match(r"[\x%02x]" % i, chr(i)) is None + False + >>> re.match(r"[\x%02x0]" % i, chr(i) + "0") is None + False + >>> re.match(r"[\x%02xz]" % i, chr(i) + "z") is None + False + >>> re.match("[\911]", "") # doctest: +IGNORE_EXCEPTION_DETAIL +ELLIPSIS + Traceback (most recent call last): + ... + RegexError: invalid escape sequence: \9 + diff --git a/tests/emptygroups.txt b/tests/emptygroups.txt new file mode 100644 index 00000000..a356a306 --- /dev/null +++ b/tests/emptygroups.txt @@ -0,0 +1,33 @@ +Empty/unused groups +=================== + + >>> import re + >>> import re2 + >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) + + Unused vs. empty group: + + >>> re.search( '(foo)?((.*).)(bar)?', 'a').groups() + (None, 'a', '', None) + >>> re2.search('(foo)?((.*).)(bar)?', 'a').groups() + (None, 'a', '', None) + + >>> re.search(r'((.*)?.)', 'a').groups() + ('a', '') + >>> re2.search(r'((.*)?.)', 'a').groups() + ('a', '') + >>> re.search(r'((.*)+.)', 'a').groups() + ('a', '') + >>> re2.search(r'((.*)+.)', 'a').groups() + ('a', '') + >>> re.search(r'((.*)*.)', 'a').groups() + ('a', '') + >>> re2.search(r'((.*)*.)', 'a').groups() + ('a', '') + + Nested group: + + >>> re.search(r'((.*)*.)', 'Hello').groups() + ('Hello', '') + >>> re2.search(r'((.*)*.)', 'Hello').groups() + ('Hello', '') diff --git a/tests/issue4.txt b/tests/issue4.txt deleted file mode 100644 index 8787e1a6..00000000 --- a/tests/issue4.txt +++ /dev/null @@ -1,30 +0,0 @@ -issue #4 -======== - - >>> import re - >>> import re2 - >>> re2.set_fallback_notification(re2.FALLBACK_WARNING) - >>> regex = r'([\d_]*)(([^\d_]*[-\.]*)*[^\d_])([\d_]*[^\d_]*)' - >>> TERM_SPEC = re.compile(regex) - >>> TERM_SPEC2 = re2.compile(regex) - - Unused vs. empty group: - - >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) - >>> regex = '(foo)?((.*).)(bar)?' - >>> re.search(regex, 'a').groups() - (None, 'a', '', None) - >>> re2.search(regex, 'a').groups() - (None, 'a', '', None) - - >>> TERM_SPEC.search('a').groups() - ('', 'a', '', '') - >>> TERM_SPEC2.search('a').groups() - ('', 'a', '', '') - - Nested group: - - >>> TERM_SPEC2.search('Hello').groups() - ('', 'Hello', '', '') - >>> TERM_SPEC.search('Hello').groups() - ('', 'Hello', '', '') diff --git a/tests/match_expand.txt b/tests/match_expand.txt index 537e18d8..72ae77f2 100644 --- a/tests/match_expand.txt +++ b/tests/match_expand.txt @@ -15,4 +15,14 @@ expand templates as if the .sub() method was called on the pattern. 'Newton, Isaac Newton' >>> m.expand("\\3") 'physicist' + >>> m.expand("\\1 \\g<foo>") # doctest: +IGNORE_EXCEPTION_DETAIL +ELLIPSIS + Traceback (most recent call last): + ... + IndexError: no such group 'foo'; available groups: ['title'] + >>> m.expand("\\0") + '\x00' + >>> m.expand("\01") + '\x01' + >>> m.expand('\t\n\x0b\r\x0c\x07\x08\\B\\Z\x07\\A\\w\\W\\s\\S\\d\\D') + '\t\n\x0b\r\x0c\x07\x08\\B\\Z\x07\\A\\w\\W\\s\\S\\d\\D' diff --git a/tests/mmap.txt b/tests/mmap.txt index 5c61cbee..24034a18 100644 --- a/tests/mmap.txt +++ b/tests/mmap.txt @@ -9,7 +9,7 @@ Testing re2 on buffer object >>> tmp = open("cnn_homepage.dat", "r+b") >>> data = mmap.mmap(tmp.fileno(), 0) - >>> len(list(re2.finditer(r'\w+', data))) + >>> len(list(re2.finditer(b'\\w+', data))) 14230 >>> data.close() diff --git a/tests/namedgroups.txt b/tests/namedgroups.txt index c2aefedb..25598653 100644 --- a/tests/namedgroups.txt +++ b/tests/namedgroups.txt @@ -28,8 +28,8 @@ Testing some aspects of named groups Compare patterns with and without unicode - >>> pattern = re2.compile(r"(?P<first_name>\w+) (?P<last_name>\w+)") - >>> print(pattern._dump_pattern()) + >>> pattern = re2.compile(br"(?P<first_name>\w+) (?P<last_name>\w+)") + >>> print(pattern._dump_pattern().decode('utf8')) (?P<first_name>\w+) (?P<last_name>\w+) >>> pattern = re2.compile(u"(?P<first_name>\\w+) (?P<last_name>\\w+)", ... re2.UNICODE) @@ -40,16 +40,16 @@ Make sure positions are converted properly for unicode >>> m = pattern.match( ... u'\u05d9\u05e9\u05e8\u05d0\u05dc \u05e6\u05d3\u05d5\u05e7') - >>> m.start("first_name") + >>> m.start(u"first_name") 0 - >>> m.start("last_name") + >>> m.start(u"last_name") 6 - >>> m.end("last_name") + >>> m.end(u"last_name") 10 >>> m.regs ((0, 10), (0, 5), (6, 10)) >>> m.span(2) (6, 10) - >>> m.span("last_name") + >>> m.span(u"last_name") (6, 10) diff --git a/tests/test_re.py b/tests/test_re.py index ca136844..a2aa15e2 100644 --- a/tests/test_re.py +++ b/tests/test_re.py @@ -1,9 +1,17 @@ from __future__ import print_function -from test.test_support import verbose, run_unittest, import_module +try: + from test.test_support import verbose, run_unittest, import_module +except ImportError: + from test.support import verbose, run_unittest, import_module import re2 as re from re import Scanner -import sys, os, traceback +import os +import sys +import traceback from weakref import proxy +if sys.version_info[0] > 2: + unicode = str + unichr = chr # Misc tests from Tim Peters' re.doc @@ -13,6 +21,7 @@ import unittest + class ReTests(unittest.TestCase): def test_weakref(self): @@ -67,8 +76,8 @@ def test_basic_re_sub(self): def test_bug_449964(self): # fails for group followed by other escape - self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'), - 'xx\bxx\b') + self.assertEqual( + re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'), 'xx\bxx\b') def test_bug_449000(self): # Test for sub() on escaped characters @@ -137,8 +146,8 @@ def test_sub_template_numeric_escape(self): self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9') self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a') - self.assertEqual(re.sub('x', r'\400', 'x'), '\0') - self.assertEqual(re.sub('x', r'\777', 'x'), '\377') + self.assertEqual(re.sub(b'x', br'\400', b'x'), b'\0') + self.assertEqual(re.sub(b'x', br'\777', b'x'), b'\377') self.assertRaises(re.error, re.sub, 'x', r'\1', 'x') self.assertRaises(re.error, re.sub, 'x', r'\8', 'x') @@ -148,10 +157,10 @@ def test_sub_template_numeric_escape(self): self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x') self.assertRaises(re.error, re.sub, 'x', r'\90', 'x') self.assertRaises(re.error, re.sub, 'x', r'\99', 'x') - self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8' + self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8' self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x') - self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1' - self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0' + self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1' + self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0' # in python2.3 (etc), these loop endlessly in sre_parser.py self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x') @@ -225,7 +234,7 @@ def test_re_findall(self): def test_bug_117612(self): self.assertEqual(re.findall(r"(a|(b))", "aba"), - [("a", ""),("b", "b"),("a", "")]) + [("a", ""), ("b", "b"), ("a", "")]) def test_re_match(self): self.assertEqual(re.match('a', 'a').groups(), ()) @@ -279,7 +288,6 @@ def test_re_groupref_exists(self): self.assertEqual(p.match('abd'), None) self.assertEqual(p.match('ac'), None) - def test_re_groupref(self): self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(), ('|', 'a')) @@ -295,7 +303,7 @@ def test_re_groupref(self): def test_groupdict(self): self.assertEqual(re.match('(?P<first>first) (?P<second>second)', 'first second').groupdict(), - {'first':'first', 'second':'second'}) + {'first': 'first', 'second': 'second'}) def test_expand(self): self.assertEqual(re.match("(?P<first>first) (?P<second>second)", @@ -431,7 +439,7 @@ def test_search_coverage(self): self.assertEqual(re.search("a\s", "a ").group(0), "a ") def test_re_escape(self): - p="" + p = "" # This had to change from the original test of range(0,256) # because we can't support non-ascii non-utf8 strings for i in range(0, 128): @@ -440,15 +448,18 @@ def test_re_escape(self): True) self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1)) - pat=re.compile(re.escape(p)) + pat = re.compile(re.escape(p)) self.assertEqual(pat.match(p) is not None, True) self.assertEqual(pat.match(p).span(), (0,128)) def test_pickling(self): import pickle self.pickle_test(pickle) - import cPickle - self.pickle_test(cPickle) + try: + import cPickle as pickle + except ImportError: + import pickle + self.pickle_test(pickle) # old pickles expect the _compile() reconstructor in sre module import_module("sre", deprecated=True) from sre import _compile @@ -478,7 +489,7 @@ def test_sre_character_literals(self): self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None) self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None) self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None) - self.assertRaises(re.error, re.match, "\911", "") + self.assertRaises(re.error, re.match, b"\911", b"") def test_sre_character_class_literals(self): for i in [0, 8, 16, 32, 64, 127, 128, 255]: @@ -488,7 +499,7 @@ def test_sre_character_class_literals(self): self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None) self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None) self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None) - self.assertRaises(re.error, re.match, "[\911]", "") + self.assertRaises(re.error, re.match, b"[\911]", b"") def test_bug_113254(self): self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1) @@ -606,7 +617,7 @@ def test_bug_926075(self): unicode except NameError: return # no problem if we have no unicode - self.assert_(re.compile('bug_926075') is not + self.assert_(re.compile(b'bug_926075') is not re.compile(eval("u'bug_926075'"))) def test_bug_931848(self): @@ -619,27 +630,28 @@ def test_bug_931848(self): ['a','b','c']) def test_bug_581080(self): - iter = re.finditer(r"\s", "a b") - self.assertEqual(iter.next().span(), (1,2)) - self.assertRaises(StopIteration, iter.next) + it = re.finditer(r"\s", "a b") + self.assertEqual(next(it).span(), (1,2)) + self.assertRaises(StopIteration, next, it) scanner = re.compile(r"\s").scanner("a b") self.assertEqual(scanner.search().span(), (1, 2)) self.assertEqual(scanner.search(), None) def test_bug_817234(self): - iter = re.finditer(r".*", "asdf") - self.assertEqual(iter.next().span(), (0, 4)) - self.assertEqual(iter.next().span(), (4, 4)) - self.assertRaises(StopIteration, iter.next) + it = re.finditer(r".*", "asdf") + self.assertEqual(next(it).span(), (0, 4)) + self.assertEqual(next(it).span(), (4, 4)) + self.assertRaises(StopIteration, next, it) def test_empty_array(self): # SF buf 1647541 import array - for typecode in 'cbBuhHiIlLfd': + typecodes = 'bBuhHiIlLfd' + for typecode in typecodes: a = array.array(typecode) - self.assertEqual(re.compile("bla").match(a), None) - self.assertEqual(re.compile("").match(a).groups(), ()) + self.assertEqual(re.compile(b"bla").match(a), None) + self.assertEqual(re.compile(b"").match(a).groups(), ()) def test_inline_flags(self): # Bug #1700 @@ -710,7 +722,7 @@ def run_re_tests(): elif len(t) == 3: pattern, s, outcome = t else: - raise ValueError, ('Test tuples should have 3 or 5 fields', t) + raise ValueError('Test tuples should have 3 or 5 fields', t) try: obj = re.compile(pattern) @@ -718,7 +730,8 @@ def run_re_tests(): if outcome == SYNTAX_ERROR: pass # Expected a syntax error else: print('=== Syntax error:', t) - except KeyboardInterrupt: raise KeyboardInterrupt + except KeyboardInterrupt: + raise KeyboardInterrupt except: print('*** Unexpected error ***', t) if verbose: @@ -726,7 +739,7 @@ def run_re_tests(): else: try: result = obj.search(s) - except re.error, msg: + except re.error as msg: print('=== Unexpected exception', t, repr(msg)) if outcome == SYNTAX_ERROR: # This should have been a syntax error; forget it. diff --git a/tests/unicode.txt b/tests/unicode.txt index 86b56a94..53019221 100644 --- a/tests/unicode.txt +++ b/tests/unicode.txt @@ -1,6 +1,7 @@ Here are some tests to make sure that utf-8 works ================================================= + >>> import sys >>> import re2 as re >>> re.set_fallback_notification(re.FALLBACK_EXCEPTION) >>> a = u'\u6211\u5f88\u597d' @@ -33,7 +34,8 @@ Test unicode character groups True >>> int(re.search(u'\\d', u'\u0661', re.UNICODE).group(0)) == 1 True - >>> re.search(u'\\w', u'\u0401') + >>> (re.search(u'\\w', u'\u0401') is None) == (sys.version_info[0] == 2) + True >>> re.search(u'\\w', u'\u0401', re.UNICODE).group(0) == u'\u0401' True >>> re.search(u'\\s', u'\u1680', re.UNICODE).group(0) == u'\u1680' @@ -58,11 +60,11 @@ Positions are translated transparently between unicode and UTF-8 >>> re.search(u' (.)', u'\U0001d200xxx\u1234 x').span(1) (6, 7) - >>> re.search(u' (.)', u'\U0001d200xxx\u1234 x'.encode('utf-8')).span(1) + >>> re.search(b' (.)', u'\U0001d200xxx\u1234 x'.encode('utf-8')).span(1) (11, 12) >>> re.compile(u'x').findall(u'\u1234x', 1, 2) == [u'x'] True - >>> data = u'\U0001d200xxx\u1234 x' + >>> data = u'\U0001d200xxx\u1234 x' >>> re.search(u' (.)', data).string == data True From 35359ce16b28452ace3f41850562af9bb75343dd Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Mon, 10 Aug 2015 15:26:18 +0200 Subject: [PATCH 013/114] move functions around --- src/compile.pxi | 1 - src/match.pxi | 50 ++--------------------------- src/pattern.pxi | 38 ---------------------- src/re2.pyx | 84 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 87 insertions(+), 86 deletions(-) diff --git a/src/compile.pxi b/src/compile.pxi index 93c565f9..8aaa359f 100644 --- a/src/compile.pxi +++ b/src/compile.pxi @@ -170,7 +170,6 @@ def _compile(object pattern, int flags=0, int max_mem=8388608): newflags = flags | re.UNICODE elif encoded and flags & re.ASCII: newflags = flags & ~re.UNICODE - tryagain = 0 try: pattern = prepare_pattern(pattern, newflags) except BackreferencesException: diff --git a/src/match.pxi b/src/match.pxi index 02414934..1dd7d5b5 100644 --- a/src/match.pxi +++ b/src/match.pxi @@ -268,56 +268,12 @@ cdef class Match: char * cstring, int size, int * cpos, int * upos): positions = [x for x, _ in spans] + [y for _, y in spans] positions = sorted(set(positions)) - posdict = dict(zip(positions, self._convert_positions( - positions, cstring, size, cpos, upos))) + posdict = dict(zip( + positions, + unicodeindices(positions, cstring, size, cpos, upos))) return [(posdict[x], posdict[y]) for x, y in spans] - cdef list _convert_positions(self, positions, - char * cstring, int size, int * cpos, int * upos): - """Convert a list of UTF-8 byte indices to unicode indices.""" - cdef unsigned char * s = <unsigned char *>cstring - cdef int i = 0 - cdef list result = [] - - if positions[i] == -1: - result.append(-1) - i += 1 - if i == len(positions): - return result - if positions[i] == cpos[0]: - result.append(upos[0]) - i += 1 - if i == len(positions): - return result - - while cpos[0] < size: - if s[cpos[0]] < 0x80: - cpos[0] += 1 - upos[0] += 1 - elif s[cpos[0]] < 0xe0: - cpos[0] += 2 - upos[0] += 1 - elif s[cpos[0]] < 0xf0: - cpos[0] += 3 - upos[0] += 1 - else: - cpos[0] += 4 - upos[0] += 1 - # wide unicode chars get 2 unichars when python is compiled - # with --enable-unicode=ucs2 - # TODO: verify this - emit_ifndef_py_unicode_wide() - upos[0] += 1 - emit_endif() - - if positions[i] == cpos[0]: - result.append(upos[0]) - i += 1 - if i == len(positions): - break - return result - def __dealloc__(self): _re2.delete_StringPiece_array(self.matches) diff --git a/src/pattern.pxi b/src/pattern.pxi index 726086ea..aece2404 100644 --- a/src/pattern.pxi +++ b/src/pattern.pxi @@ -503,41 +503,3 @@ cdef class Pattern: def __dealloc__(self): del self.re_pattern - - -cdef utf8indices(char * cstring, int size, int *pos, int *endpos): - """Convert unicode indices pos and endpos to UTF-8 indices. - - If the indices are out of range, leave them unchanged.""" - cdef unsigned char * data = <unsigned char *>cstring - cdef int newpos = pos[0], newendpos = -1 - cdef int cpos = 0, upos = 0 - while cpos < size: - if data[cpos] < 0x80: - cpos += 1 - upos += 1 - elif data[cpos] < 0xe0: - cpos += 2 - upos += 1 - elif data[cpos] < 0xf0: - cpos += 3 - upos += 1 - else: - cpos += 4 - upos += 1 - # wide unicode chars get 2 unichars when python is compiled - # with --enable-unicode=ucs2 - # TODO: verify this - emit_ifndef_py_unicode_wide() - upos += 1 - emit_endif() - - if upos == pos[0]: - newpos = cpos - if endpos[0] == -1: - break - elif upos == endpos[0]: - newendpos = cpos - break - pos[0] = newpos - endpos[0] = newendpos diff --git a/src/re2.pyx b/src/re2.pyx index 12331d7a..94a8afbe 100644 --- a/src/re2.pyx +++ b/src/re2.pyx @@ -275,3 +275,87 @@ cdef inline void release_cstring(Py_buffer *buf): """Release buffer if necessary.""" if not PY2: PyBuffer_Release(buf) + + +cdef utf8indices(char * cstring, int size, int *pos, int *endpos): + """Convert unicode indices ``pos`` and ``endpos`` to UTF-8 indices. + + If the indices are out of range, leave them unchanged.""" + cdef unsigned char * data = <unsigned char *>cstring + cdef int newpos = pos[0], newendpos = -1 + cdef int cpos = 0, upos = 0 + while cpos < size: + if data[cpos] < 0x80: + cpos += 1 + upos += 1 + elif data[cpos] < 0xe0: + cpos += 2 + upos += 1 + elif data[cpos] < 0xf0: + cpos += 3 + upos += 1 + else: + cpos += 4 + upos += 1 + # wide unicode chars get 2 unichars when python is compiled + # with --enable-unicode=ucs2 + # TODO: verify this + emit_ifndef_py_unicode_wide() + upos += 1 + emit_endif() + + if upos == pos[0]: + newpos = cpos + if endpos[0] == -1: + break + elif upos == endpos[0]: + newendpos = cpos + break + pos[0] = newpos + endpos[0] = newendpos + + +cdef list unicodeindices(list positions, + char * cstring, int size, int * cpos, int * upos): + """Convert a list of UTF-8 byte indices to unicode indices.""" + cdef unsigned char * s = <unsigned char *>cstring + cdef int i = 0 + cdef list result = [] + + if positions[i] == -1: + result.append(-1) + i += 1 + if i == len(positions): + return result + if positions[i] == cpos[0]: + result.append(upos[0]) + i += 1 + if i == len(positions): + return result + + while cpos[0] < size: + if s[cpos[0]] < 0x80: + cpos[0] += 1 + upos[0] += 1 + elif s[cpos[0]] < 0xe0: + cpos[0] += 2 + upos[0] += 1 + elif s[cpos[0]] < 0xf0: + cpos[0] += 3 + upos[0] += 1 + else: + cpos[0] += 4 + upos[0] += 1 + # wide unicode chars get 2 unichars when python is compiled + # with --enable-unicode=ucs2 + # TODO: verify this + emit_ifndef_py_unicode_wide() + upos[0] += 1 + emit_endif() + + if positions[i] == cpos[0]: + result.append(upos[0]) + i += 1 + if i == len(positions): + break + return result From 073074f96c73ee83139710d61c11a9a238273532 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Sun, 25 Oct 2015 01:09:30 +0200 Subject: [PATCH 014/114] add count method - add count method, equivalent to len(findall(...)) - use arrays in utf8indices - tweak docstrings --- Makefile | 3 +++ src/match.pxi | 3 +-- src/pattern.pxi | 49 +++++++++++++++++++++++++++++++++++++ src/re2.pyx | 57 +++++++++++++++++++++++++------------------- tests/performance.py | 0 tests/re2_test.py | 0 tests/re_tests.py | 0 7 files changed, 86 insertions(+), 26 deletions(-) mode change 100755 => 100644 tests/performance.py mode change 100755 => 100644 tests/re2_test.py mode change 100755 => 100644 tests/re_tests.py diff --git a/Makefile b/Makefile index b8930469..8b8c31c6 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,9 @@ test: all py3: python3 setup.py build_ext --cython +install3: + python3 setup.py install --user --cython + test3: py3 cp build/lib*-3.*/re2*.so tests/re2.so (cd tests && python3 re2_test.py) diff --git a/src/match.pxi b/src/match.pxi index 1dd7d5b5..628344fe 100644 --- a/src/match.pxi +++ b/src/match.pxi @@ -267,11 +267,10 @@ cdef class Match: cdef list _convert_spans(self, spans, char * cstring, int size, int * cpos, int * upos): positions = [x for x, _ in spans] + [y for _, y in spans] - positions = sorted(set(positions)) + positions = array.array(b'l' if PY2 else 'l', sorted(set(positions))) posdict = dict(zip( positions, unicodeindices(positions, cstring, size, cpos, upos))) - return [(posdict[x], posdict[y]) for x, y in spans] def __dealloc__(self): diff --git a/src/pattern.pxi b/src/pattern.pxi index aece2404..49ba6c7a 100644 --- a/src/pattern.pxi +++ b/src/pattern.pxi @@ -78,6 +78,55 @@ cdef class Pattern: release_cstring(&buf) return m + def count(self, object string, int pos=0, int endpos=-1): + """Return number of non-overlapping matches of pattern in string.""" + cdef char * cstring + cdef Py_ssize_t size + cdef Py_buffer buf + cdef int retval + cdef _re2.StringPiece * sp + cdef int encoded = 0 + cdef int result = 0 + cdef _re2.StringPiece * matches + + bytestr = unicode_to_bytes(string, &encoded, self.encoded) + if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1: + raise TypeError('expected string or buffer') + try: + if encoded and (pos or endpos != -1): + utf8indices(cstring, size, &pos, &endpos) + if pos > size: + return 0 + if 0 <= endpos < size: + size = endpos + + sp = new _re2.StringPiece(cstring, size) + matches = _re2.new_StringPiece_array(1) + + while True: + with nogil: + retval = self.re_pattern.Match( + sp[0], + pos, + size, + _re2.UNANCHORED, + matches, + 1) + if retval == 0: + break + result += 1 + if pos == size: + break + # offset the pos to move to the next point + if matches[0].length() == 0: + pos += 1 + else: + pos = matches[0].data() - cstring + matches[0].length() + finally: + release_cstring(&buf) + del sp + return result + def findall(self, object string, int pos=0, int endpos=-1): """Return all non-overlapping matches of pattern in string as a list of strings.""" diff --git a/src/re2.pyx b/src/re2.pyx index 94a8afbe..a9f35072 100644 --- a/src/re2.pyx +++ b/src/re2.pyx @@ -21,12 +21,14 @@ Syntax reference: https://github.com/google/re2/wiki/Syntax import sys import re +import array import warnings cimport _re2 cimport cpython.unicode from cython.operator cimport preincrement as inc, dereference as deref from cpython.buffer cimport Py_buffer, PyBUF_SIMPLE from cpython.buffer cimport PyObject_GetBuffer, PyBuffer_Release +from cpython cimport array from cpython.version cimport PY_MAJOR_VERSION cdef extern from *: @@ -78,38 +80,45 @@ include "match.pxi" def search(pattern, string, int flags=0): """Scan through string looking for a match to the pattern, returning - a match object or none if no match was found.""" + a ``Match`` object or none if no match was found.""" return compile(pattern, flags).search(string) def match(pattern, string, int flags=0): """Try to apply the pattern at the start of the string, returning - a match object, or None if no match was found.""" + a ``Match`` object, or ``None`` if no match was found.""" return compile(pattern, flags).match(string) def fullmatch(pattern, string, int flags=0): """Try to apply the pattern to the entire string, returning - a match object, or None if no match was found.""" + a ``Match`` object, or ``None`` if no match was found.""" return compile(pattern, flags).fullmatch(string) def finditer(pattern, string, int flags=0): - """Return an list of all non-overlapping matches in the - string. For each match, the iterator returns a match object. + """Yield all non-overlapping matches in the string. + For each match, the iterator returns a ``Match`` object. Empty matches are included in the result.""" return compile(pattern, flags).finditer(string) def findall(pattern, string, int flags=0): - """Return an list of all non-overlapping matches in the - string. For each match, the iterator returns a match object. + """Return a list of all non-overlapping matches in the string. - Empty matches are included in the result.""" + Each match is represented as a string or a tuple (when there are two ore + more groups). Empty matches are included in the result.""" return compile(pattern, flags).findall(string) +def count(pattern, string, int flags=0): + """Return number of non-overlapping matches in the string. + + Empty matches are included in the count.""" + return compile(pattern, flags).count(string) + + def split(pattern, string, int maxsplit=0, int flags=0): """Split the source string by the occurrences of the pattern, returning a list containing the resulting substrings.""" @@ -119,21 +128,21 @@ def split(pattern, string, int maxsplit=0, int flags=0): def sub(pattern, repl, string, int count=0, int flags=0): """Return the string obtained by replacing the leftmost non-overlapping occurrences of the pattern in string by the - replacement repl. repl can be either a string or a callable; - if a string, backslash escapes in it are processed. If it is - a callable, it's passed the match object and must return + replacement ``repl``. ``repl`` can be either a string or a callable; + if a string, backslash escapes in it are processed. If it is + a callable, it's passed the ``Match`` object and must return a replacement string to be used.""" return compile(pattern, flags).sub(repl, string, count) def subn(pattern, repl, string, int count=0, int flags=0): - """Return a 2-tuple containing (new_string, number). + """Return a 2-tuple containing ``(new_string, number)``. new_string is the string obtained by replacing the leftmost non-overlapping occurrences of the pattern in the source - string by the replacement repl. number is the number of - substitutions that were made. repl can be either a string or a + string by the replacement ``repl``. ``number`` is the number of + substitutions that were made. ``repl`` can be either a string or a callable; if a string, backslash escapes in it are processed. - If it is a callable, it's passed the match object and must + If it is a callable, it's passed the ``Match`` object and must return a replacement string to be used.""" return compile(pattern, flags).subn(repl, string, count) @@ -315,20 +324,20 @@ cdef utf8indices(char * cstring, int size, int *pos, int *endpos): endpos[0] = newendpos -cdef list unicodeindices(list positions, +cdef array.array unicodeindices(array.array positions, char * cstring, int size, int * cpos, int * upos): - """Convert a list of UTF-8 byte indices to unicode indices.""" + """Convert an array of UTF-8 byte indices to unicode indices.""" cdef unsigned char * s = <unsigned char *>cstring cdef int i = 0 - cdef list result = [] + cdef array.array result = array.clone(positions, len(positions), False) - if positions[i] == -1: - result.append(-1) + if positions.data.as_longs[i] == -1: + result.data.as_longs[i] = -1 i += 1 if i == len(positions): return result - if positions[i] == cpos[0]: - result.append(upos[0]) + if positions.data.as_longs[i] == cpos[0]: + result.data.as_longs[i] = upos[0] i += 1 if i == len(positions): return result @@ -353,8 +362,8 @@ cdef list unicodeindices(list positions, upos[0] += 1 emit_endif() - if positions[i] == cpos[0]: - result.append(upos[0]) + if positions.data.as_longs[i] == cpos[0]: + result.data.as_longs[i] = upos[0] i += 1 if i == len(positions): break diff --git a/tests/performance.py b/tests/performance.py old mode 100755 new mode 100644 diff --git a/tests/re2_test.py b/tests/re2_test.py old mode 100755 new mode 100644 diff --git a/tests/re_tests.py b/tests/re_tests.py old mode 100755 new mode 100644 From d62d055dbc69e6b8ea7418980c0c5e8404f76559 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Wed, 25 Nov 2015 15:14:07 +0100 Subject: [PATCH 015/114] document support syntax &c. - add reference of supported syntax to main docstring - add __all__ attribute defining public members - add re's purge() function - add tests for count method - switch order of prepare_pattern() and _compile() - rename prepare_pattern() to _prepare_pattern() to signal that it is semi-private --- src/compile.pxi | 219 ++++++++++++++++++++++++------------------------ src/re2.pyx | 108 +++++++++++++++++++++++- tests/count.txt | 33 ++++++++ 3 files changed, 250 insertions(+), 110 deletions(-) create mode 100644 tests/count.txt diff --git a/src/compile.pxi b/src/compile.pxi index 8aaa359f..06a72f50 100644 --- a/src/compile.pxi +++ b/src/compile.pxi @@ -11,7 +11,116 @@ def compile(pattern, int flags=0, int max_mem=8388608): return p -def prepare_pattern(bytes pattern, int flags): +def _compile(object pattern, int flags=0, int max_mem=8388608): + """Compile a regular expression pattern, returning a pattern object.""" + def fallback(pattern, flags, error_msg): + """Raise error, warn, or simply return fallback from re module.""" + error_msg = "re.LOCALE not supported" + if current_notification == FALLBACK_EXCEPTION: + raise RegexError(error_msg) + elif current_notification == FALLBACK_WARNING: + warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) + try: + result = re.compile(pattern, flags) + except re.error as err: + raise RegexError(*err.args) + return result + + cdef _re2.StringPiece * s + cdef _re2.Options opts + cdef int error_code + cdef int encoded = 0 + cdef object original_pattern + + if isinstance(pattern, (Pattern, SREPattern)): + if flags: + raise ValueError( + 'Cannot process flags argument with a compiled pattern') + return pattern + + original_pattern = pattern + if flags & _L: + return fallback(original_pattern, flags, "re.LOCALE not supported") + pattern = unicode_to_bytes(pattern, &encoded, -1) + newflags = flags + if not PY2: + if not encoded and flags & _U: + pass + # raise ValueError("can't use UNICODE flag with a bytes pattern") + elif encoded and not (flags & re.ASCII): + newflags = flags | re.UNICODE + elif encoded and flags & re.ASCII: + newflags = flags & ~re.UNICODE + try: + pattern = _prepare_pattern(pattern, newflags) + except BackreferencesException: + return fallback(original_pattern, flags, "Backreferences not supported") + except CharClassProblemException: + return fallback(original_pattern, flags, + "\W and \S not supported inside character classes") + + + # Set the options given the flags above. + if flags & _I: + opts.set_case_sensitive(0); + + opts.set_max_mem(max_mem) + opts.set_log_errors(0) + opts.set_encoding(_re2.EncodingUTF8) + + s = new _re2.StringPiece(<char *><bytes>pattern, len(pattern)) + + cdef _re2.RE2 *re_pattern + cdef _re2.const_stringintmap * named_groups + cdef _re2.stringintmapiterator it + with nogil: + re_pattern = new _re2.RE2(s[0], opts) + + if not re_pattern.ok(): + # Something went wrong with the compilation. + del s + error_msg = cpp_to_unicode(re_pattern.error()) + error_code = re_pattern.error_code() + del re_pattern + if current_notification == FALLBACK_EXCEPTION: + # Raise an exception regardless of the type of error. + raise RegexError(error_msg) + elif error_code not in (_re2.ErrorBadPerlOp, _re2.ErrorRepeatSize, + # _re2.ErrorBadEscape, + _re2.ErrorPatternTooLarge): + # Raise an error because these will not be fixed by using the + # ``re`` module. + raise RegexError(error_msg) + elif current_notification == FALLBACK_WARNING: + warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) + return re.compile(original_pattern, flags) + + cdef Pattern pypattern = Pattern() + pypattern.pattern = original_pattern + pypattern.re_pattern = re_pattern + pypattern.groups = re_pattern.NumberOfCapturingGroups() + pypattern.encoded = encoded + pypattern.flags = flags + pypattern.groupindex = {} + named_groups = _re2.addressof(re_pattern.NamedCapturingGroups()) + it = named_groups.begin() + while it != named_groups.end(): + if encoded: + pypattern.groupindex[cpp_to_unicode(deref(it).first) + ] = deref(it).second + else: + pypattern.groupindex[cpp_to_bytes(deref(it).first) + ] = deref(it).second + inc(it) + + if flags & re.DEBUG: + print(repr(pypattern._dump_pattern())) + del s + return pypattern + + +def _prepare_pattern(bytes pattern, int flags): + """Translate pattern to RE2 syntax.""" cdef bytearray result = bytearray() cdef unsigned char * cstring = pattern cdef unsigned char this, that @@ -128,111 +237,3 @@ def prepare_pattern(bytes pattern, int flags): result.append(that) n += 1 return bytes(result) - - -def _compile(object pattern, int flags=0, int max_mem=8388608): - """Compile a regular expression pattern, returning a pattern object.""" - def fallback(pattern, flags, error_msg): - """Raise error, warn, or simply return fallback from re module.""" - error_msg = "re.LOCALE not supported" - if current_notification == FALLBACK_EXCEPTION: - raise RegexError(error_msg) - elif current_notification == FALLBACK_WARNING: - warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) - try: - result = re.compile(pattern, flags) - except re.error as err: - raise RegexError(*err.args) - return result - - cdef _re2.StringPiece * s - cdef _re2.Options opts - cdef int error_code - cdef int encoded = 0 - cdef object original_pattern - - if isinstance(pattern, (Pattern, SREPattern)): - if flags: - raise ValueError( - 'Cannot process flags argument with a compiled pattern') - return pattern - - original_pattern = pattern - if flags & _L: - return fallback(original_pattern, flags, "re.LOCALE not supported") - pattern = unicode_to_bytes(pattern, &encoded, -1) - newflags = flags - if not PY2: - if not encoded and flags & _U: - pass - # raise ValueError("can't use UNICODE flag with a bytes pattern") - elif encoded and not (flags & re.ASCII): - newflags = flags | re.UNICODE - elif encoded and flags & re.ASCII: - newflags = flags & ~re.UNICODE - try: - pattern = prepare_pattern(pattern, newflags) - except BackreferencesException: - return fallback(original_pattern, flags, "Backreferences not supported") - except CharClassProblemException: - return fallback(original_pattern, flags, - "\W and \S not supported inside character classes") - - - # Set the options given the flags above. - if flags & _I: - opts.set_case_sensitive(0); - - opts.set_max_mem(max_mem) - opts.set_log_errors(0) - opts.set_encoding(_re2.EncodingUTF8) - - s = new _re2.StringPiece(<char *><bytes>pattern, len(pattern)) - - cdef _re2.RE2 *re_pattern - cdef _re2.const_stringintmap * named_groups - cdef _re2.stringintmapiterator it - with nogil: - re_pattern = new _re2.RE2(s[0], opts) - - if not re_pattern.ok(): - # Something went wrong with the compilation. - del s - error_msg = cpp_to_unicode(re_pattern.error()) - error_code = re_pattern.error_code() - del re_pattern - if current_notification == FALLBACK_EXCEPTION: - # Raise an exception regardless of the type of error. - raise RegexError(error_msg) - elif error_code not in (_re2.ErrorBadPerlOp, _re2.ErrorRepeatSize, - # _re2.ErrorBadEscape, - _re2.ErrorPatternTooLarge): - # Raise an error because these will not be fixed by using the - # ``re`` module. - raise RegexError(error_msg) - elif current_notification == FALLBACK_WARNING: - warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) - return re.compile(original_pattern, flags) - - cdef Pattern pypattern = Pattern() - pypattern.pattern = original_pattern - pypattern.re_pattern = re_pattern - pypattern.groups = re_pattern.NumberOfCapturingGroups() - pypattern.encoded = encoded - pypattern.flags = flags - pypattern.groupindex = {} - named_groups = _re2.addressof(re_pattern.NamedCapturingGroups()) - it = named_groups.begin() - while it != named_groups.end(): - if encoded: - pypattern.groupindex[cpp_to_unicode(deref(it).first) - ] = deref(it).second - else: - pypattern.groupindex[cpp_to_bytes(deref(it).first) - ] = deref(it).second - inc(it) - - if flags & re.DEBUG: - print(repr(pypattern._dump_pattern())) - del s - return pypattern diff --git a/src/re2.pyx b/src/re2.pyx index a9f35072..1f06f2de 100644 --- a/src/re2.pyx +++ b/src/re2.pyx @@ -1,5 +1,5 @@ # cython: infer_types(False) -"""Regular expressions using Google's RE2 engine. +r"""Regular expressions using Google's RE2 engine. Compared to Python's ``re``, the RE2 engine converts regular expressions to deterministic finite automata, which guarantees linear-time behavior. @@ -17,6 +17,89 @@ fallback to ``re``. Examples of features not supported by RE2: On the other hand, unicode character classes are supported. Syntax reference: https://github.com/google/re2/wiki/Syntax + +What follows is a reference for the regular expression syntax supported by this +module (i.e., without requiring fallback to `re`). + +Regular expressions can contain both special and ordinary characters. +Most ordinary characters, like "A", "a", or "0", are the simplest +regular expressions; they simply match themselves. + +The special characters are:: + + "." Matches any character except a newline. + "^" Matches the start of the string. + "$" Matches the end of the string or just before the newline at + the end of the string. + "*" Matches 0 or more (greedy) repetitions of the preceding RE. + Greedy means that it will match as many repetitions as possible. + "+" Matches 1 or more (greedy) repetitions of the preceding RE. + "?" Matches 0 or 1 (greedy) of the preceding RE. + *?,+?,?? Non-greedy versions of the previous three special characters. + {m,n} Matches from m to n repetitions of the preceding RE. + {m,n}? Non-greedy version of the above. + "\\" Either escapes special characters or signals a special sequence. + [] Indicates a set of characters. + A "^" as the first character indicates a complementing set. + "|" A|B, creates an RE that will match either A or B. + (...) Matches the RE inside the parentheses. + The contents can be retrieved or matched later in the string. + (?:...) Non-grouping version of regular parentheses. + (?imsux) Set the I, M, S, U, or X flag for the RE (see below). + +The special sequences consist of "\\" and a character from the list +below. If the ordinary character is not on the list, then the +resulting RE will match the second character:: + + \A Matches only at the start of the string. + \Z Matches only at the end of the string. + \b Matches the empty string, but only at the start or end of a word. + \B Matches the empty string, but not at the start or end of a word. + \d Matches any decimal digit. + \D Matches any non-digit character. + \s Matches any whitespace character. + \S Matches any non-whitespace character. + \w Matches any alphanumeric character. + \W Matches the complement of \w. + \\ Matches a literal backslash. + \pN Unicode character class (one-letter name) + \p{Greek} Unicode character class + \PN negated Unicode character class (one-letter name) + \P{Greek} negated Unicode character class + +This module exports the following functions:: + + count Count all occurrences of a pattern in a string. + match Match a regular expression pattern to the beginning of a string. + fullmatch Match a regular expression pattern to all of a string. + search Search a string for the presence of a pattern. + sub Substitute occurrences of a pattern found in a string. + subn Same as sub, but also return the number of substitutions made. + split Split a string by the occurrences of a pattern. + findall Find all occurrences of a pattern in a string. + finditer Return an iterator yielding a match object for each match. + compile Compile a pattern into a RegexObject. + purge Clear the regular expression cache. + escape Backslash all non-alphanumerics in a string. + +Some of the functions in this module takes flags as optional parameters:: + + A ASCII Make \w, \W, \b, \B, \d, \D match the corresponding ASCII + character categories (rather than the whole Unicode + categories, which is the default). + I IGNORECASE Perform case-insensitive matching. + M MULTILINE "^" matches the beginning of lines (after a newline) + as well as the string. + "$" matches the end of lines (before a newline) as well + as the end of the string. + S DOTALL "." matches any character at all, including the newline. + X VERBOSE Ignore whitespace and comments for nicer looking RE's. + U UNICODE Enable Unicode character classes and make \w, \W, \b, \B, + Unicode-aware (default for unicode patterns). + +This module also defines an exception 'RegexError' (also available under the +alias 'error'). + """ import sys @@ -78,6 +161,12 @@ include "pattern.pxi" include "match.pxi" +def purge(): + """Clear the regular expression caches.""" + _cache.clear() + _cache_repl.clear() + + def search(pattern, string, int flags=0): """Scan through string looking for a match to the pattern, returning a ``Match`` object or none if no match was found.""" @@ -368,3 +457,20 @@ cdef array.array unicodeindices(array.array positions, if i == len(positions): break return result + + +__all__ = [ + # exceptions + 'BackreferencesException', 'CharClassProblemException', + 'RegexError', 'error', + # constants + 'FALLBACK_EXCEPTION', 'FALLBACK_QUIETLY', 'FALLBACK_WARNING', 'DEBUG', + 'S', 'DOTALL', 'I', 'IGNORECASE', 'L', 'LOCALE', 'M', 'MULTILINE', + 'U', 'UNICODE', 'X', 'VERBOSE', 'VERSION', 'VERSION_HEX', + # classes + 'Match', 'Pattern', 'SREPattern', + # functions + 'compile', 'count', 'escape', 'findall', 'finditer', 'fullmatch', + 'match', 'purge', 'search', 'split', 'sub', 'subn', + 'set_fallback_notification', + ] diff --git a/tests/count.txt b/tests/count.txt new file mode 100644 index 00000000..0097ba0c --- /dev/null +++ b/tests/count.txt @@ -0,0 +1,33 @@ +count tests +=========== + + >>> import re2 + >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) + +This one is from http://docs.python.org/library/re.html?#finding-all-adverbs: + + >>> re2.count(r"\w+ly", "He was carefully disguised but captured quickly by police.") + 2 + +This one makes sure all groups are found: + + >>> re2.count(r"(\w+)=(\d+)", "foo=1,foo=2") + 2 + +When there's only one matched group, it should not be returned in a tuple: + + >>> re2.count(r"(\w)\w", "fx") + 1 + +Zero matches is an empty list: + + >>> re2.count("(f)", "gggg") + 0 + +If pattern matches an empty string, do it only once at the end: + + >>> re2.count(".*", "foo") + 2 + + >>> re2.count("", "foo") + 3 From bb304504159598e5ce3b7915da0cf89b27d32049 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Mon, 21 Dec 2015 01:09:39 +0100 Subject: [PATCH 016/114] add tests for bug with \\b --- tests/findall.txt | 8 ++++++++ tests/finditer.txt | 9 +++++++++ 2 files changed, 17 insertions(+) diff --git a/tests/findall.txt b/tests/findall.txt index 58342b61..9b7d50c1 100644 --- a/tests/findall.txt +++ b/tests/findall.txt @@ -31,3 +31,11 @@ If pattern matches an empty string, do it only once at the end: >>> re2.findall("", "foo") ['', '', '', ''] + + + >>> import re + >>> re.findall(r'\b', 'The quick brown fox jumped over the lazy dog') + ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''] + >>> re2.findall(r'\b', 'The quick brown fox jumped over the lazy dog') + ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''] + diff --git a/tests/finditer.txt b/tests/finditer.txt index 1fa4cc44..ec717bc6 100644 --- a/tests/finditer.txt +++ b/tests/finditer.txt @@ -14,3 +14,12 @@ Simple tests for the ``finditer`` function. ... open("cnn_homepage.dat").read(), re2.M)] [' a { text-decoration:none; }', ' li { padding:0 10px; }', ' ul li.no-pad-left span { font-size:12px; }'] + >>> for a in re2.finditer(br'\b', b'foo bar zed'): print(a) + <re2.Match object; span=(0, 0), match=''> + <re2.Match object; span=(3, 3), match=''> + <re2.Match object; span=(4, 4), match=''> + <re2.Match object; span=(7, 7), match=''> + <re2.Match object; span=(8, 8), match=''> + <re2.Match object; span=(11, 11), match=''> + + From 415fd39d099d5e5f86d84506d2c4f073f2118a69 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Mon, 21 Dec 2015 01:09:58 +0100 Subject: [PATCH 017/114] fix Match repr --- src/match.pxi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/match.pxi b/src/match.pxi index 628344fe..e989aef6 100644 --- a/src/match.pxi +++ b/src/match.pxi @@ -224,12 +224,12 @@ cdef class Match: n += 1 return bytes(result) - def end(self, group=0): - return self.span(group)[1] - def start(self, group=0): return self.span(group)[0] + def end(self, group=0): + return self.span(group)[1] + def span(self, group=0): if isinstance(group, int): if group > len(self.regs): @@ -278,4 +278,4 @@ cdef class Match: def __repr__(self): return '<re2.Match object; span=%r, match=%r>' % ( - (self.pos, self.endpos), self.string) + self.span(), self.group()) From 224abc53fc0a8e8270c577622e2ead71d28aad94 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Wed, 27 Apr 2016 01:05:35 +0200 Subject: [PATCH 018/114] Fix bugs; ensure memory is released; simplify C++ interfacing; - Fix bug causing zero-length matches to be returned multiple times - Use Latin 1 encoding with RE2 when unicode not requested - Ensure memory is released: - put del calls in finally blocks - add missing del call for 'matches' array - Remove Cython hacks for C++ that are no longer needed; use const keyword that has been supported for some time. Fixes Cython 0.24 compilation issue. - Turn _re2.pxd into includes.pxi. - remove some tests that are specific to internal Python modules _sre and sre --- Makefile | 24 ++-- README.rst | 75 +++++++------ setup.py | 7 +- src/_re2macros.h | 11 -- src/compile.pxi | 48 ++++---- src/{_re2.pxd => includes.pxi} | 98 +++++++--------- src/match.pxi | 12 +- src/pattern.pxi | 197 ++++++++++++++++----------------- src/re2.pyx | 50 ++++----- tests/count.txt | 6 +- tests/findall.txt | 10 +- tests/finditer.txt | 22 ++-- tests/mmap.txt | 2 +- tests/search.txt | 4 +- tests/sub.txt | 7 +- tests/test_re.py | 22 +--- 16 files changed, 275 insertions(+), 320 deletions(-) rename src/{_re2.pxd => includes.pxi} (52%) diff --git a/Makefile b/Makefile index 8b8c31c6..8aa13914 100644 --- a/Makefile +++ b/Makefile @@ -1,22 +1,14 @@ -all: - python setup.py build_ext --cython - install: python setup.py install --user --cython -test: all - cp build/lib*-2.*/re2.so tests/ +test: install (cd tests && python re2_test.py) (cd tests && python test_re.py) -py3: - python3 setup.py build_ext --cython - install3: python3 setup.py install --user --cython -test3: py3 - cp build/lib*-3.*/re2*.so tests/re2.so +test3: install3 (cd tests && python3 re2_test.py) (cd tests && python3 test_re.py) @@ -25,3 +17,15 @@ clean: rm -rf src/*.so src/*.html &>/dev/null rm -rf re2.so tests/re2.so &>/dev/null rm -rf src/re2.cpp &>/dev/null + +valgrind: + python3.5-dbg setup.py install --user --cython && \ + (cd tests && valgrind --tool=memcheck --suppressions=../valgrind-python.supp \ + --leak-check=full --show-leak-kinds=definite \ + python3.5-dbg test_re.py) + +valgrind2: + python3.5-dbg setup.py install --user --cython && \ + (cd tests && valgrind --tool=memcheck --suppressions=../valgrind-python.supp \ + --leak-check=full --show-leak-kinds=definite \ + python3.5-dbg re2_test.py) diff --git a/README.rst b/README.rst index 27a3f11a..8d86cbf6 100644 --- a/README.rst +++ b/README.rst @@ -47,52 +47,59 @@ And in the above example, ``set_fallback_notification`` can handle 3 values: ``re.FALLBACK_QUIETLY`` (default), ``re.FALLBACK_WARNING`` (raises a warning), and ``re.FALLBACK_EXCEPTION`` (which raises an exception). -**Note**: The re2 module treats byte strings as UTF-8. This is fully backwards compatible with 7-bit ascii. -However, bytes containing values larger than 0x7f are going to be treated very differently in re2 than in re. -The RE library quietly ignores invalid utf8 in input strings, and throws an exception on invalid utf8 in patterns. -For example: - - >>> re.findall(r'.', '\x80\x81\x82') - ['\x80', '\x81', '\x82'] - >>> re2.findall(r'.', '\x80\x81\x82') - [] - -If you require the use of regular expressions over an arbitrary stream of bytes, then this library might not be for you. - Installation ============ To install, you must first install the prerequisites: * The `re2 library from Google <http://code.google.com/p/re2/>`_ -* The Python development headers (e.g. *sudo apt-get install python-dev*) -* A build environment with ``g++`` (e.g. *sudo apt-get install build-essential*) +* The Python development headers (e.g. ``sudo apt-get install python-dev``) +* A build environment with ``g++`` (e.g. ``sudo apt-get install build-essential``) +* Cython 0.20+ (``pip install cython``) + +After the prerequisites are installed, you can install as follows:: + + $ git clone git://github.com/andreasvc/pyre2.git + $ cd pyre2 + $ make install -After the prerequisites are installed, you can try installing using ``easy_install``:: +(or ``make install3`` for Python 3) - $ sudo easy_install re2 +Unicode Support +=============== -if you have setuptools installed (or use ``pip``). +Python ``bytes`` and ``unicode`` strings are fully supported, but note that +``RE2`` works with UTF-8 encoded strings under the hood, which means that +``unicode`` strings need to be encoded and decoded back and forth. +There are two important factors: -If you don't want to use ``setuptools``, you can alternatively download the tarball from `pypi <http://pypi.python.org/pypi/re2/>`_. +* whether a ``unicode`` pattern and search string is used (will be encoded to UTF-8 internally) +* the ``UNICODE`` flag: whether operators such as ``\w`` recognize Unicode characters. -Alternative to those, you can clone this repository and try installing it from there. To do this, run:: +To avoid the overhead of encoding and decoding to UTF-8, it is possible to pass +UTF-8 encoded bytes strings directly but still treat them as ``unicode``:: - $ git clone git://github.com/axiak/pyre2.git - $ cd pyre2.git - $ sudo python setup.py install + In [18]: re2.findall(u'\w'.encode('utf8'), u'Mötley Crüe'.encode('utf8'), flags=re2.UNICODE) + Out[18]: ['M', '\xc3\xb6', 't', 'l', 'e', 'y', 'C', 'r', '\xc3\xbc', 'e'] + In [19]: re2.findall(u'\w'.encode('utf8'), u'Mötley Crüe'.encode('utf8')) + Out[19]: ['M', 't', 'l', 'e', 'y', 'C', 'r', 'e'] -If you want to make changes to the bindings, you must have Cython >=0.13. +However, note that the indices in ``Match`` objects will refer to the bytes string. +The indices of the match in the ``unicode`` string could be computed by +decoding/encoding, but this is done automatically and more efficiently if you +pass the ``unicode`` string:: -Unicode Support -=============== + >>> re2.search(u'ü'.encode('utf8'), u'Mötley Crüe'.encode('utf8'), flags=re2.UNICODE) + <re2.Match object; span=(10, 12), match='\xc3\xbc'> + >>> re2.search(u'ü', u'Mötley Crüe', flags=re2.UNICODE) + <re2.Match object; span=(9, 10), match=u'\xfc'> + +Finally, if you want to match bytes without regard for Unicode characters, +pass bytes strings and leave out the ``UNICODE`` flag (this will cause Latin 1 +encoding to be used with ``RE2`` under the hood):: -One current issue is Unicode support. As you may know, ``RE2`` supports UTF8, -which is certainly distinct from unicode. Right now the module will automatically -encode any unicode string into utf8 for you, which is *slow* (it also has to -decode utf8 strings back into unicode objects on every substitution or split). -Therefore, you are better off using bytestrings in utf8 while working with RE2 -and encoding things after everything you need done is finished. + >>> re2.findall(br'.', b'\x80\x81\x82') + ['\x80', '\x81', '\x82'] Performance =========== @@ -104,7 +111,7 @@ I've found that occasionally python's regular ``re`` module is actually slightly However, when the ``re`` module gets slow, it gets *really* slow, while this module buzzes along. -In the below example, I'm running the data against 8MB of text from the collosal Wikipedia +In the below example, I'm running the data against 8MB of text from the colossal Wikipedia XML file. I'm running them multiple times, being careful to use the ``timeit`` module. To see more details, please see the `performance script <http://github.com/axiak/pyre2/tree/master/tests/performance.py>`_. @@ -131,8 +138,6 @@ The tests show the following differences with Python's ``re`` module: * ``pyre2`` and Python's ``re`` behave differently with nested and empty groups; ``pyre2`` will return an empty string in cases where Python would return None for a group that did not participate in a match. -* Any bytestrings with invalid UTF-8 or other non-ASCII data may behave - differently. Please report any further issues with ``pyre2``. @@ -162,5 +167,5 @@ and Facebook for the initial inspiration. Plus, I got to gut this readme file! Moreover, this library would of course not be possible if not for -the immense work of the team at RE2 and the few people who work +the immense work of the team at ``RE2`` and the few people who work on Cython. diff --git a/setup.py b/setup.py index 188f23f5..0647a32b 100755 --- a/setup.py +++ b/setup.py @@ -29,8 +29,11 @@ def run(self): def version_compare(version1, version2): def normalize(v): - return [int(x) for x in re.sub(r'(\.0+)*$','', v).split(".")] - return cmp(normalize(version1), normalize(version2)) + return [int(x) for x in re.sub(r'(\.0+)*$', '', v).split(".")] + try: + return cmp(normalize(version1), normalize(version2)) + except ValueError: # raised by e.g. '0.24b0' + return 1 cmdclass = {'test': TestCommand} diff --git a/src/_re2macros.h b/src/_re2macros.h index 9e8cc926..b9ac82af 100644 --- a/src/_re2macros.h +++ b/src/_re2macros.h @@ -9,16 +9,5 @@ static inline re2::StringPiece * new_StringPiece_array(int n) re2::StringPiece * sp = new re2::StringPiece[n]; return sp; } -static inline void delete_StringPiece_array(re2::StringPiece* ptr) -{ - delete[] ptr; -} - -#define addressof(A) (&A) -#define addressofs(A) (&A) - -#define as_char(A) (char *)(A) -#define pattern_Replace(A, B, C) re2::RE2::Replace((A), (B), (C)) -#define pattern_GlobalReplace(A, B, C) re2::RE2::GlobalReplace((A), (B), (C)) #endif diff --git a/src/compile.pxi b/src/compile.pxi index 06a72f50..9eeb1190 100644 --- a/src/compile.pxi +++ b/src/compile.pxi @@ -15,7 +15,6 @@ def _compile(object pattern, int flags=0, int max_mem=8388608): """Compile a regular expression pattern, returning a pattern object.""" def fallback(pattern, flags, error_msg): """Raise error, warn, or simply return fallback from re module.""" - error_msg = "re.LOCALE not supported" if current_notification == FALLBACK_EXCEPTION: raise RegexError(error_msg) elif current_notification == FALLBACK_WARNING: @@ -26,8 +25,8 @@ def _compile(object pattern, int flags=0, int max_mem=8388608): raise RegexError(*err.args) return result - cdef _re2.StringPiece * s - cdef _re2.Options opts + cdef StringPiece * s + cdef Options opts cdef int error_code cdef int encoded = 0 cdef object original_pattern @@ -44,13 +43,13 @@ def _compile(object pattern, int flags=0, int max_mem=8388608): pattern = unicode_to_bytes(pattern, &encoded, -1) newflags = flags if not PY2: - if not encoded and flags & _U: - pass + if not encoded and flags & _U: # re.UNICODE + pass # can use UNICODE with bytes pattern, but assumes valid UTF-8 # raise ValueError("can't use UNICODE flag with a bytes pattern") elif encoded and not (flags & re.ASCII): - newflags = flags | re.UNICODE + newflags = flags | _U # re.UNICODE elif encoded and flags & re.ASCII: - newflags = flags & ~re.UNICODE + newflags = flags & ~_U # re.UNICODE try: pattern = _prepare_pattern(pattern, newflags) except BackreferencesException: @@ -59,22 +58,23 @@ def _compile(object pattern, int flags=0, int max_mem=8388608): return fallback(original_pattern, flags, "\W and \S not supported inside character classes") - # Set the options given the flags above. if flags & _I: opts.set_case_sensitive(0); opts.set_max_mem(max_mem) opts.set_log_errors(0) - opts.set_encoding(_re2.EncodingUTF8) + if flags & _U or encoded: + opts.set_encoding(EncodingUTF8) + else: # re.UNICODE flag not passed, and pattern is bytes, + # so allow matching of arbitrary byte sequences. + opts.set_encoding(EncodingLatin1) - s = new _re2.StringPiece(<char *><bytes>pattern, len(pattern)) + s = new StringPiece(<char *><bytes>pattern, len(pattern)) - cdef _re2.RE2 *re_pattern - cdef _re2.const_stringintmap * named_groups - cdef _re2.stringintmapiterator it + cdef RE2 *re_pattern with nogil: - re_pattern = new _re2.RE2(s[0], opts) + re_pattern = new RE2(s[0], opts) if not re_pattern.ok(): # Something went wrong with the compilation. @@ -85,9 +85,9 @@ def _compile(object pattern, int flags=0, int max_mem=8388608): if current_notification == FALLBACK_EXCEPTION: # Raise an exception regardless of the type of error. raise RegexError(error_msg) - elif error_code not in (_re2.ErrorBadPerlOp, _re2.ErrorRepeatSize, - # _re2.ErrorBadEscape, - _re2.ErrorPatternTooLarge): + elif error_code not in (ErrorBadPerlOp, ErrorRepeatSize, + # ErrorBadEscape, + ErrorPatternTooLarge): # Raise an error because these will not be fixed by using the # ``re`` module. raise RegexError(error_msg) @@ -96,24 +96,20 @@ def _compile(object pattern, int flags=0, int max_mem=8388608): return re.compile(original_pattern, flags) cdef Pattern pypattern = Pattern() + cdef map[cpp_string, int] named_groups = re_pattern.NamedCapturingGroups() pypattern.pattern = original_pattern pypattern.re_pattern = re_pattern pypattern.groups = re_pattern.NumberOfCapturingGroups() pypattern.encoded = encoded pypattern.flags = flags pypattern.groupindex = {} - named_groups = _re2.addressof(re_pattern.NamedCapturingGroups()) - it = named_groups.begin() - while it != named_groups.end(): + for it in named_groups: if encoded: - pypattern.groupindex[cpp_to_unicode(deref(it).first) - ] = deref(it).second + pypattern.groupindex[cpp_to_unicode(it.first)] = it.second else: - pypattern.groupindex[cpp_to_bytes(deref(it).first) - ] = deref(it).second - inc(it) + pypattern.groupindex[cpp_to_bytes(it.first)] = it.second - if flags & re.DEBUG: + if flags & DEBUG: print(repr(pypattern._dump_pattern())) del s return pypattern diff --git a/src/_re2.pxd b/src/includes.pxi similarity index 52% rename from src/_re2.pxd rename to src/includes.pxi index c23fca18..ec1c4a65 100644 --- a/src/_re2.pxd +++ b/src/includes.pxi @@ -1,46 +1,34 @@ -cdef extern from *: - ctypedef char* const_char_ptr "const char*" - -cdef extern from "<string>" namespace "std": - cdef cppclass string: - string(char *) - string(char *, size_t n) - const_char_ptr data() - size_t length() - void push_back(char c) - void append(char * s) - ctypedef string cpp_string "std::string" - ctypedef string const_string "const std::string" +cimport cpython.unicode +from cython.operator cimport preincrement as inc, dereference as deref +from libcpp.map cimport map +from libcpp.string cimport string as cpp_string +from cpython.buffer cimport Py_buffer, PyBUF_SIMPLE, PyObject_CheckBuffer, \ + PyObject_GetBuffer, PyBuffer_Release +from cpython cimport array +from cpython.version cimport PY_MAJOR_VERSION +cdef extern from *: + cdef int PY2 + cdef void emit_ifndef_py_unicode_wide "#if !defined(Py_UNICODE_WIDE) //" () + cdef void emit_endif "#endif //" () -cdef extern from "<map>" namespace "std": - cdef cppclass stringintmapiterator "std::map<std::string, int>::const_iterator": - cpp_string first - int second - stringintmapiterator operator++() - bint operator==(stringintmapiterator) - stringintmapiterator& operator*(stringintmapiterator) - bint operator!=(stringintmapiterator) - cdef cppclass const_stringintmap "const std::map<std::string, int>": - stringintmapiterator begin() - stringintmapiterator end() - int operator[](cpp_string) +cdef extern from "Python.h": + int PyObject_CheckReadBuffer(object) + int PyObject_AsReadBuffer(object, const void **, Py_ssize_t *) cdef extern from "re2/stringpiece.h" namespace "re2": cdef cppclass StringPiece: StringPiece() - StringPiece(const_char_ptr) - StringPiece(const_char_ptr, int) - const_char_ptr data() + StringPiece(const char *) + StringPiece(const char *, int) + const char * data() int copy(char * buf, size_t n, size_t pos) int length() - ctypedef StringPiece const_StringPiece "const StringPiece" - cdef extern from "re2/re2.h" namespace "re2": cdef enum Anchor: @@ -89,40 +77,36 @@ cdef extern from "re2/re2.h" namespace "re2": int case_sensitive() void set_encoding(re2_Encoding encoding) - ctypedef Options const_Options "const RE2::Options" - cdef cppclass RE2: - RE2(const_StringPiece pattern, Options option) nogil - RE2(const_StringPiece pattern) nogil - int Match(const_StringPiece text, int startpos, int endpos, - Anchor anchor, StringPiece * match, int nmatch) nogil + RE2(const StringPiece pattern, Options option) nogil + RE2(const StringPiece pattern) nogil + int Match(const StringPiece text, int startpos, int endpos, + Anchor anchor, StringPiece * match, int nmatch) nogil + int Replace(cpp_string *str, const RE2 pattern, + const StringPiece rewrite) nogil + int GlobalReplace(cpp_string *str, const RE2 pattern, + const StringPiece rewrite) nogil int NumberOfCapturingGroups() int ok() - const_string pattern() + const cpp_string pattern() cpp_string error() ErrorCode error_code() - const_stringintmap& NamedCapturingGroups() + const map[cpp_string, int]& NamedCapturingGroups() - ctypedef RE2 const_RE2 "const RE2" + # hack for static methods + cdef int Replace "RE2::Replace"( + cpp_string *str, const RE2 pattern, + const StringPiece rewrite) nogil + cdef int GlobalReplace "RE2::GlobalReplace"( + cpp_string *str, + const RE2 pattern, + const StringPiece rewrite) nogil -# This header is used for ways to hack^Wbypass the cython -# issues. cdef extern from "_re2macros.h": StringPiece * new_StringPiece_array(int) nogil - void delete_StringPiece_array(StringPiece* ptr) - - # This fixes the bug Cython #548 whereby reference returns - # cannot be addressed, due to it not being an l-value - const_stringintmap * addressof(const_stringintmap&) - cpp_string * addressofs(cpp_string&) - char * as_char(const_char_ptr) - - # This fixes the bug whereby namespaces are causing - # cython to just break for Cpp arguments. - int pattern_Replace(cpp_string *str, - const_RE2 pattern, - const_StringPiece rewrite) nogil - int pattern_GlobalReplace(cpp_string *str, - const_RE2 pattern, - const_StringPiece rewrite) nogil + + +cdef extern from *: + # StringPiece * new_StringPiece_array "new re2::StringPiece[n]" (int) nogil + void delete_StringPiece_array "delete[]" (StringPiece *) nogil diff --git a/src/match.pxi b/src/match.pxi index e989aef6..14bfcf62 100644 --- a/src/match.pxi +++ b/src/match.pxi @@ -5,7 +5,7 @@ cdef class Match: cdef readonly int endpos cdef readonly tuple regs - cdef _re2.StringPiece * matches + cdef StringPiece * matches cdef bint encoded cdef int nmatches cdef int _lastindex @@ -31,15 +31,15 @@ cdef class Match: self._groups = None self.pos = 0 self.endpos = -1 - self.matches = _re2.new_StringPiece_array(num_groups + 1) + self.matches = new_StringPiece_array(num_groups + 1) self.nmatches = num_groups self.re = pattern_object cdef _init_groups(self): cdef list groups = [] cdef int i - cdef _re2.const_char_ptr last_end = NULL - cdef _re2.const_char_ptr cur_end = NULL + cdef const char * last_end = NULL + cdef const char * cur_end = NULL for i in range(self.nmatches): if self.matches[i].data() == NULL: @@ -245,7 +245,7 @@ cdef class Match: cdef _make_spans(self, char * cstring, int size, int * cpos, int * upos): cdef int start, end - cdef _re2.StringPiece * piece + cdef StringPiece * piece spans = [] for i in range(self.nmatches): @@ -274,7 +274,7 @@ cdef class Match: return [(posdict[x], posdict[y]) for x, y in spans] def __dealloc__(self): - _re2.delete_StringPiece_array(self.matches) + delete_StringPiece_array(self.matches) def __repr__(self): return '<re2.Match object; span=%r, match=%r>' % ( diff --git a/src/pattern.pxi b/src/pattern.pxi index 49ba6c7a..753f080f 100644 --- a/src/pattern.pxi +++ b/src/pattern.pxi @@ -6,25 +6,25 @@ cdef class Pattern: cdef object __weakref__ cdef bint encoded # True if this was originally a Unicode pattern - cdef _re2.RE2 * re_pattern + cdef RE2 * re_pattern def search(self, object string, int pos=0, int endpos=-1): """Scan through string looking for a match, and return a corresponding Match instance. Return None if no position in the string matches.""" - return self._search(string, pos, endpos, _re2.UNANCHORED) + return self._search(string, pos, endpos, UNANCHORED) def match(self, object string, int pos=0, int endpos=-1): """Matches zero or more characters at the beginning of the string.""" - return self._search(string, pos, endpos, _re2.ANCHOR_START) + return self._search(string, pos, endpos, ANCHOR_START) def fullmatch(self, object string, int pos=0, int endpos=-1): """"fullmatch(string[, pos[, endpos]]) --> Match object or None." Matches the entire string.""" - return self._search(string, pos, endpos, _re2.ANCHOR_BOTH) + return self._search(string, pos, endpos, ANCHOR_BOTH) cdef _search(self, object string, int pos, int endpos, - _re2.re2_Anchor anchoring): + re2_Anchor anchoring): """Scan through string looking for a match, and return a corresponding Match instance. Return None if no position in the string matches.""" cdef char * cstring @@ -32,7 +32,7 @@ cdef class Pattern: cdef Py_buffer buf cdef int retval cdef int encoded = 0 - cdef _re2.StringPiece * sp + cdef StringPiece * sp cdef Match m = Match(self, self.groups + 1) cdef int cpos = 0, upos = pos @@ -51,7 +51,7 @@ cdef class Pattern: if 0 <= endpos < size: size = endpos - sp = new _re2.StringPiece(cstring, size) + sp = new StringPiece(cstring, size) with nogil: retval = self.re_pattern.Match( sp[0], @@ -84,10 +84,10 @@ cdef class Pattern: cdef Py_ssize_t size cdef Py_buffer buf cdef int retval - cdef _re2.StringPiece * sp cdef int encoded = 0 cdef int result = 0 - cdef _re2.StringPiece * matches + cdef StringPiece * sp = NULL + cdef StringPiece * matches = NULL bytestr = unicode_to_bytes(string, &encoded, self.encoded) if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1: @@ -100,31 +100,31 @@ cdef class Pattern: if 0 <= endpos < size: size = endpos - sp = new _re2.StringPiece(cstring, size) - matches = _re2.new_StringPiece_array(1) - - while True: - with nogil: - retval = self.re_pattern.Match( - sp[0], - pos, - size, - _re2.UNANCHORED, - matches, - 1) - if retval == 0: - break - result += 1 - if pos == size: - break - # offset the pos to move to the next point - if matches[0].length() == 0: - pos += 1 - else: - pos = matches[0].data() - cstring + matches[0].length() + sp = new StringPiece(cstring, size) + matches = new_StringPiece_array(1) + try: + while True: + with nogil: + retval = self.re_pattern.Match( + sp[0], + pos, + size, + UNANCHORED, + matches, + 1) + if retval == 0: + break + result += 1 + if pos == size: + break + # offset the pos to move to the next point + pos = matches[0].data() - cstring + ( + matches[0].length() or 1) + finally: + del sp + delete_StringPiece_array(matches) finally: release_cstring(&buf) - del sp return result def findall(self, object string, int pos=0, int endpos=-1): @@ -133,11 +133,11 @@ cdef class Pattern: cdef char * cstring cdef Py_ssize_t size cdef Py_buffer buf + cdef int encoded = 0 cdef int retval - cdef _re2.StringPiece * sp cdef list resultlist = [] - cdef int encoded = 0 - cdef _re2.StringPiece * matches + cdef StringPiece * sp = NULL + cdef StringPiece * matches = NULL bytestr = unicode_to_bytes(string, &encoded, self.encoded) if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1: @@ -150,8 +150,8 @@ cdef class Pattern: if 0 <= endpos < size: size = endpos - sp = new _re2.StringPiece(cstring, size) - matches = _re2.new_StringPiece_array(self.groups + 1) + sp = new StringPiece(cstring, size) + matches = new_StringPiece_array(self.groups + 1) while True: with nogil: @@ -159,7 +159,7 @@ cdef class Pattern: sp[0], pos, size, - _re2.UNANCHORED, + UNANCHORED, matches, self.groups + 1) if retval == 0: @@ -176,7 +176,7 @@ cdef class Pattern: b'' if matches[i].data() is NULL else matches[i].data()[:matches[i].length()] for i in range(1, self.groups + 1)])) - else: + else: # 0 or 1 group; return list of strings if encoded: resultlist.append(matches[self.groups].data()[ :matches[self.groups].length()].decode('utf8')) @@ -186,13 +186,11 @@ cdef class Pattern: if pos == size: break # offset the pos to move to the next point - if matches[0].length() == 0: - pos += 1 - else: - pos = matches[0].data() - cstring + matches[0].length() + pos = matches[0].data() - cstring + (matches[0].length() or 1) finally: + del sp + delete_StringPiece_array(matches) release_cstring(&buf) - del sp return resultlist def finditer(self, object string, int pos=0, int endpos=-1): @@ -207,7 +205,7 @@ cdef class Pattern: cdef Py_ssize_t size cdef Py_buffer buf cdef int retval - cdef _re2.StringPiece * sp + cdef StringPiece * sp = NULL cdef Match m cdef int encoded = 0 cdef int cpos = 0, upos = pos @@ -224,7 +222,7 @@ cdef class Pattern: if 0 <= endpos < size: size = endpos - sp = new _re2.StringPiece(cstring, size) + sp = new StringPiece(cstring, size) yield while True: @@ -235,7 +233,7 @@ cdef class Pattern: sp[0], pos, size, - _re2.UNANCHORED, + UNANCHORED, m.matches, self.groups + 1) if retval == 0: @@ -253,13 +251,11 @@ cdef class Pattern: if pos == size: break # offset the pos to move to the next point - if m.matches[0].length() == 0: - pos += 1 - else: - pos = m.matches[0].data() - cstring + m.matches[0].length() + pos = m.matches[0].data() - cstring + ( + m.matches[0].length() or 1) finally: + del sp release_cstring(&buf) - del sp def split(self, string, int maxsplit=0): """split(string[, maxsplit = 0]) --> list @@ -271,8 +267,8 @@ cdef class Pattern: cdef int pos = 0 cdef int lookahead = 0 cdef int num_split = 0 - cdef _re2.StringPiece * sp - cdef _re2.StringPiece * matches + cdef StringPiece * sp + cdef StringPiece * matches cdef list resultlist = [] cdef int encoded = 0 cdef Py_buffer buf @@ -283,9 +279,9 @@ cdef class Pattern: bytestr = unicode_to_bytes(string, &encoded, self.encoded) if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1: raise TypeError('expected string or buffer') + matches = new_StringPiece_array(self.groups + 1) + sp = new StringPiece(cstring, size) try: - matches = _re2.new_StringPiece_array(self.groups + 1) - sp = new _re2.StringPiece(cstring, size) while True: with nogil: @@ -293,7 +289,7 @@ cdef class Pattern: sp[0], pos + lookahead, size, - _re2.UNANCHORED, + UNANCHORED, matches, self.groups + 1) if retval == 0: @@ -340,10 +336,10 @@ cdef class Pattern: char_to_unicode(&sp.data()[pos], sp.length() - pos)) else: resultlist.append(sp.data()[pos:]) - _re2.delete_StringPiece_array(matches) finally: + del sp + delete_StringPiece_array(matches) release_cstring(&buf) - del sp return resultlist def sub(self, repl, string, int count=0): @@ -369,14 +365,13 @@ cdef class Pattern: cdef char * cstring cdef object result cdef Py_ssize_t size - cdef _re2.cpp_string * fixed_repl = NULL - cdef _re2.StringPiece * sp - cdef _re2.cpp_string * input_str + cdef StringPiece * sp = NULL + cdef cpp_string * input_str = NULL cdef int string_encoded = 0 cdef int repl_encoded = 0 if callable(repl): - # This is a callback, so let's use the custom function + # This is a callback, so use the custom function return self._subn_callback(repl, string, count, num_repl) repl_b = unicode_to_bytes(repl, &repl_encoded, self.encoded) @@ -384,36 +379,37 @@ cdef class Pattern: repl_b = bytes(repl) # coerce buffer to bytes object if count > 1 or (b'\\' if PY2 else <char>b'\\') in repl_b: - # Limit on number of substitution or replacement string contains - # escape sequences, handle with Match.expand() implementation. + # Limit on number of substitutions or replacement string contains + # escape sequences; handle with Match.expand() implementation. # RE2 does support simple numeric group references \1, \2, # but the number of differences with Python behavior is # non-trivial. return self._subn_expand(repl_b, string, count, num_repl) - - cstring = repl_b - size = len(repl_b) - sp = new _re2.StringPiece(cstring, size) - - bytestr = unicode_to_bytes(string, &string_encoded, self.encoded) - if not string_encoded and not isinstance(bytestr, bytes): - bytestr = bytes(bytestr) # coerce buffer to bytes object - input_str = new _re2.cpp_string(bytestr, len(bytestr)) - # NB: RE2 treats unmatched groups in repl as empty string; - # Python raises an error. - with nogil: - if count == 0: - num_repl[0] = _re2.pattern_GlobalReplace( - input_str, self.re_pattern[0], sp[0]) - elif count == 1: - num_repl[0] = _re2.pattern_Replace( - input_str, self.re_pattern[0], sp[0]) - - if string_encoded or (repl_encoded and num_repl[0] > 0): - result = cpp_to_unicode(input_str[0]) - else: - result = cpp_to_bytes(input_str[0]) - del fixed_repl, input_str, sp + try: + cstring = repl_b + size = len(repl_b) + sp = new StringPiece(cstring, size) + + bytestr = unicode_to_bytes(string, &string_encoded, self.encoded) + if not string_encoded and not isinstance(bytestr, bytes): + bytestr = bytes(bytestr) # coerce buffer to bytes object + input_str = new cpp_string(<char *>bytestr, len(bytestr)) + # NB: RE2 treats unmatched groups in repl as empty string; + # Python raises an error. + with nogil: + if count == 0: + num_repl[0] = GlobalReplace( + input_str, self.re_pattern[0], sp[0]) + elif count == 1: + num_repl[0] = Replace( + input_str, self.re_pattern[0], sp[0]) + + if string_encoded or (repl_encoded and num_repl[0] > 0): + result = cpp_to_unicode(input_str[0]) + else: + result = cpp_to_bytes(input_str[0]) + finally: + del input_str, sp return result cdef _subn_callback(self, callback, string, int count, int * num_repl): @@ -427,7 +423,7 @@ cdef class Pattern: cdef int endpos cdef int pos = 0 cdef int encoded = 0 - cdef _re2.StringPiece * sp + cdef StringPiece * sp cdef Match m cdef bytearray result = bytearray() cdef int cpos = 0, upos = 0 @@ -438,7 +434,7 @@ cdef class Pattern: bytestr = unicode_to_bytes(string, &encoded, self.encoded) if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1: raise TypeError('expected string or buffer') - sp = new _re2.StringPiece(cstring, size) + sp = new StringPiece(cstring, size) try: while True: m = Match(self, self.groups + 1) @@ -448,7 +444,7 @@ cdef class Pattern: sp[0], pos, size, - _re2.UNANCHORED, + UNANCHORED, m.matches, self.groups + 1) if retval == 0: @@ -473,8 +469,8 @@ cdef class Pattern: break result.extend(sp.data()[pos:]) finally: - release_cstring(&buf) del sp + release_cstring(&buf) return result.decode('utf8') if encoded else bytes(result) cdef _subn_expand(self, bytes repl, string, int count, int * num_repl): @@ -487,7 +483,7 @@ cdef class Pattern: cdef int endpos cdef int pos = 0 cdef int encoded = 0 - cdef _re2.StringPiece * sp + cdef StringPiece * sp cdef Match m cdef bytearray result = bytearray() @@ -497,7 +493,7 @@ cdef class Pattern: bytestr = unicode_to_bytes(string, &encoded, self.encoded) if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1: raise TypeError('expected string or buffer') - sp = new _re2.StringPiece(cstring, size) + sp = new StringPiece(cstring, size) try: while True: m = Match(self, self.groups + 1) @@ -507,7 +503,7 @@ cdef class Pattern: sp[0], pos, size, - _re2.UNANCHORED, + UNANCHORED, m.matches, self.groups + 1) if retval == 0: @@ -527,8 +523,8 @@ cdef class Pattern: break result.extend(sp.data()[pos:]) finally: - release_cstring(&buf) del sp + release_cstring(&buf) return result.decode('utf8') if encoded else bytes(result) def scanner(self, arg): @@ -536,11 +532,10 @@ cdef class Pattern: # raise NotImplementedError def _dump_pattern(self): - cdef _re2.cpp_string * s - s = <_re2.cpp_string *>_re2.addressofs(self.re_pattern.pattern()) + cdef cpp_string s = self.re_pattern.pattern() if self.encoded: - return cpp_to_bytes(s[0]).decode('utf8') - return cpp_to_bytes(s[0]) + return cpp_to_bytes(s).decode('utf8') + return cpp_to_bytes(s) def __repr__(self): if self.flags == 0: diff --git a/src/re2.pyx b/src/re2.pyx index 1f06f2de..edeba6b3 100644 --- a/src/re2.pyx +++ b/src/re2.pyx @@ -1,12 +1,12 @@ # cython: infer_types(False) r"""Regular expressions using Google's RE2 engine. -Compared to Python's ``re``, the RE2 engine converts regular expressions to +Compared to Python's ``re``, the RE2 engine compiles regular expressions to deterministic finite automata, which guarantees linear-time behavior. Intended as a drop-in replacement for ``re``. Unicode is supported by encoding -to UTF-8, and bytes strings are treated as UTF-8. For best performance, work -with UTF-8 encoded bytes strings. +to UTF-8, and bytes strings are treated as UTF-8 when the UNICODE flag is given. +For best performance, work with UTF-8 encoded bytes strings. Regular expressions that are not compatible with RE2 are processed with fallback to ``re``. Examples of features not supported by RE2: @@ -15,7 +15,7 @@ fallback to ``re``. Examples of features not supported by RE2: - backreferences (``\\n`` in search pattern) - \W and \S not supported inside character classes -On the other hand, unicode character classes are supported. +On the other hand, unicode character classes are supported (e.g., ``\p{Greek}``). Syntax reference: https://github.com/google/re2/wiki/Syntax What follows is a reference for the regular expression syntax supported by this @@ -102,29 +102,12 @@ alias 'error'). """ -import sys +include "includes.pxi" + import re +import sys import array import warnings -cimport _re2 -cimport cpython.unicode -from cython.operator cimport preincrement as inc, dereference as deref -from cpython.buffer cimport Py_buffer, PyBUF_SIMPLE -from cpython.buffer cimport PyObject_GetBuffer, PyBuffer_Release -from cpython cimport array -from cpython.version cimport PY_MAJOR_VERSION - -cdef extern from *: - cdef int PY2 - cdef void emit_ifndef_py_unicode_wide "#if !defined(Py_UNICODE_WIDE) //" () - cdef void emit_endif "#endif //" () - ctypedef char* const_char_ptr "const char*" - ctypedef void* const_void_ptr "const void*" - -cdef extern from "Python.h": - int PY_MAJOR_VERSION - int PyObject_CheckReadBuffer(object) - int PyObject_AsReadBuffer(object, const_void_ptr *, Py_ssize_t *) # Import re flags to be compatible. @@ -136,6 +119,7 @@ UNICODE = re.UNICODE VERBOSE = re.VERBOSE LOCALE = re.LOCALE DEBUG = re.DEBUG +ASCII = 256 # Python 3 FALLBACK_QUIETLY = 0 FALLBACK_WARNING = 1 @@ -312,20 +296,20 @@ cdef bint isident(unsigned char c): or b'0' <= c <= b'9' or c == b'_') -cdef inline bytes cpp_to_bytes(_re2.cpp_string input): +cdef inline bytes cpp_to_bytes(cpp_string input): """Convert from a std::string object to a python string.""" # By taking the slice we go to the right size, # despite spurious or missing null characters. return input.data()[:input.length()] -cdef inline unicode cpp_to_unicode(_re2.cpp_string input): +cdef inline unicode cpp_to_unicode(cpp_string input): """Convert a std::string object to a unicode string.""" return cpython.unicode.PyUnicode_DecodeUTF8( input.data(), input.length(), 'strict') -cdef inline unicode char_to_unicode(_re2.const_char_ptr input, int length): +cdef inline unicode char_to_unicode(const char * input, int length): """Convert a C string to a unicode string.""" return cpython.unicode.PyUnicode_DecodeUTF8(input, length, 'strict') @@ -352,16 +336,20 @@ cdef inline unicode_to_bytes(object pystring, int * encoded, cdef inline int pystring_to_cstring( object pystring, char ** cstring, Py_ssize_t * size, Py_buffer * buf): - """Get a pointer from a bytes/buffer object.""" + """Get a pointer from bytes/buffer object ``pystring``. + + On success, return 0, and set ``cstring``, ``size``, and ``buf``.""" cdef int result = -1 cstring[0] = NULL size[0] = 0 - if PY2: + # Although the new-style buffer interface was backported to Python 2.6, + # some modules, notably mmap, only support the old buffer interface. + # Cf. http://bugs.python.org/issue9229 if PyObject_CheckReadBuffer(pystring) == 1: result = PyObject_AsReadBuffer( - pystring, <const_void_ptr *>cstring, size) - else: # Python 3 + pystring, <const void **>cstring, size) + elif PyObject_CheckBuffer(pystring) == 1: # new-style Buffer interface result = PyObject_GetBuffer(pystring, buf, PyBUF_SIMPLE) if result == 0: cstring[0] = <char *>buf.buf diff --git a/tests/count.txt b/tests/count.txt index 0097ba0c..f5ab6ced 100644 --- a/tests/count.txt +++ b/tests/count.txt @@ -19,15 +19,15 @@ When there's only one matched group, it should not be returned in a tuple: >>> re2.count(r"(\w)\w", "fx") 1 -Zero matches is an empty list: +Zero matches: >>> re2.count("(f)", "gggg") 0 -If pattern matches an empty string, do it only once at the end: +A pattern matching an empty string: >>> re2.count(".*", "foo") 2 >>> re2.count("", "foo") - 3 + 4 diff --git a/tests/findall.txt b/tests/findall.txt index 9b7d50c1..dee28e56 100644 --- a/tests/findall.txt +++ b/tests/findall.txt @@ -33,9 +33,9 @@ If pattern matches an empty string, do it only once at the end: ['', '', '', ''] - >>> import re - >>> re.findall(r'\b', 'The quick brown fox jumped over the lazy dog') - ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''] - >>> re2.findall(r'\b', 'The quick brown fox jumped over the lazy dog') - ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''] + >>> import re + >>> re.findall(r'\b', 'The quick brown fox jumped over the lazy dog') + ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''] + >>> re2.findall(r'\b', 'The quick brown fox jumped over the lazy dog') + ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''] diff --git a/tests/finditer.txt b/tests/finditer.txt index ec717bc6..10186903 100644 --- a/tests/finditer.txt +++ b/tests/finditer.txt @@ -4,22 +4,24 @@ Simple tests for the ``finditer`` function. >>> import re2 >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) - >>> len(list(re2.finditer(r'\w+', open("cnn_homepage.dat").read()))) + >>> with open('cnn_homepage.dat') as tmp: + ... data = tmp.read() + >>> len(list(re2.finditer(r'\w+', data))) 14230 - >>> [m.group(1) for m in re2.finditer(r'\n#hdr-editions(.*?)\n', open("cnn_homepage.dat").read())] + >>> [m.group(1) for m in re2.finditer(r'\n#hdr-editions(.*?)\n', data)] [' a { text-decoration:none; }', ' li { padding:0 10px; }', ' ul li.no-pad-left span { font-size:12px; }'] >>> [m.group(1) for m in re2.finditer(r'^#hdr-editions(.*?)$', - ... open("cnn_homepage.dat").read(), re2.M)] + ... data, re2.M)] [' a { text-decoration:none; }', ' li { padding:0 10px; }', ' ul li.no-pad-left span { font-size:12px; }'] - >>> for a in re2.finditer(br'\b', b'foo bar zed'): print(a) - <re2.Match object; span=(0, 0), match=''> - <re2.Match object; span=(3, 3), match=''> - <re2.Match object; span=(4, 4), match=''> - <re2.Match object; span=(7, 7), match=''> - <re2.Match object; span=(8, 8), match=''> - <re2.Match object; span=(11, 11), match=''> + >>> for a in re2.finditer(r'\b', 'foo bar zed'): print(a) + <re2.Match object; span=(0, 0), match=''> + <re2.Match object; span=(3, 3), match=''> + <re2.Match object; span=(4, 4), match=''> + <re2.Match object; span=(7, 7), match=''> + <re2.Match object; span=(8, 8), match=''> + <re2.Match object; span=(11, 11), match=''> diff --git a/tests/mmap.txt b/tests/mmap.txt index 24034a18..afbe2191 100644 --- a/tests/mmap.txt +++ b/tests/mmap.txt @@ -6,7 +6,7 @@ Testing re2 on buffer object >>> import mmap >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) - >>> tmp = open("cnn_homepage.dat", "r+b") + >>> tmp = open("cnn_homepage.dat", "rb+") >>> data = mmap.mmap(tmp.fileno(), 0) >>> len(list(re2.finditer(b'\\w+', data))) diff --git a/tests/search.txt b/tests/search.txt index 311625b1..974159ad 100644 --- a/tests/search.txt +++ b/tests/search.txt @@ -13,7 +13,9 @@ These are simple tests of the ``search`` function >>> len(re2.search('(?:a{1000})?a{999}', input).group()) 999 - >>> re2.search(r'\n#hdr-editions(.*?)\n', open("cnn_homepage.dat").read()).groups() + >>> with open('cnn_homepage.dat') as tmp: + ... data = tmp.read() + >>> re2.search(r'\n#hdr-editions(.*?)\n', data).groups() (' a { text-decoration:none; }',) Verify some sanity checks diff --git a/tests/sub.txt b/tests/sub.txt index ca1349ea..e2b0ba63 100644 --- a/tests/sub.txt +++ b/tests/sub.txt @@ -5,10 +5,11 @@ This first test is just looking to replace things between parentheses with an empty string. - >>> import re2 - >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) >>> import hashlib >>> import gzip - >>> data = gzip.open('wikipages.xml.gz', 'rb').read() + >>> import re2 + >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) + >>> with gzip.open('wikipages.xml.gz', 'rb') as tmp: + ... data = tmp.read() >>> print(hashlib.md5(re2.sub(b'\(.*?\)', b'', data)).hexdigest()) b7a469f55ab76cd5887c81dbb0cfe6d3 diff --git a/tests/test_re.py b/tests/test_re.py index a2aa15e2..f4fb3a3a 100644 --- a/tests/test_re.py +++ b/tests/test_re.py @@ -422,11 +422,6 @@ def test_category(self): self.assertEqual(re.match(r"(\s)", " ").group(1), " ") def test_getlower(self): - import _sre - self.assertEqual(_sre.getlower(ord('A'), 0), ord('a')) - self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a')) - self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a')) - self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC") self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC") @@ -458,11 +453,9 @@ def test_pickling(self): try: import cPickle as pickle except ImportError: - import pickle - self.pickle_test(pickle) - # old pickles expect the _compile() reconstructor in sre module - import_module("sre", deprecated=True) - from sre import _compile + pass + else: + self.pickle_test(pickle) def pickle_test(self, pickle): oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)') @@ -695,15 +688,8 @@ def test_dollar_matches_twice(self): self.assertEqual(pattern.sub('#', '\n'), '#\n#') def test_dealloc(self): - # issue 3299: check for segfault in debug build - import _sre - # the overflow limit is different on wide and narrow builds and it - # depends on the definition of SRE_CODE (see sre.h). - # 2**128 should be big enough to overflow on both. For smaller values - # a RuntimeError is raised instead of OverflowError. - long_overflow = 2**128 self.assertRaises(TypeError, re.finditer, "a", {}) - self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow]) + def run_re_tests(): from re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR From 2d27f8ca4882a1f3a2a41659caae039d76cf19fe Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Thu, 5 May 2016 15:24:31 +0200 Subject: [PATCH 019/114] add C++11 param; update URL --- README.rst | 4 ++-- setup.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 8d86cbf6..64284a22 100644 --- a/README.rst +++ b/README.rst @@ -9,7 +9,7 @@ Summary pyre2 is a Python extension that wraps `Google's RE2 regular expression library -<http://code.google.com/p/re2/>`_. +<https://github.com/google/re2>`_. This version of pyre2 is similar to the one you'd find at `facebook's github repository <http://github.com/facebook/pyre2/>`_ @@ -52,7 +52,7 @@ Installation To install, you must first install the prerequisites: -* The `re2 library from Google <http://code.google.com/p/re2/>`_ +* The `re2 library from Google <https://github.com/google/re2>`_ * The Python development headers (e.g. ``sudo apt-get install python-dev``) * A build environment with ``g++`` (e.g. ``sudo apt-get install build-essential``) * Cython 0.20+ (``pip install cython``) diff --git a/setup.py b/setup.py index 0647a32b..0e83fd3d 100755 --- a/setup.py +++ b/setup.py @@ -100,7 +100,7 @@ def main(): libraries=libraries, library_dirs=library_dirs, runtime_library_dirs=runtime_library_dirs, - extra_compile_args=['-DPY2=%d' % PY2] + extra_compile_args=['-std=c++11', '-DPY2=%d' % PY2] + (['-g', '-O0'] if DEBUG else ['-O3', '-march=native', '-DNDEBUG']), extra_link_args=['-g'] if DEBUG else ['-DNDEBUG'], From aa944fedef79edf64f5e2ff2744ea350803b088a Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Tue, 19 Jul 2016 15:12:37 +0200 Subject: [PATCH 020/114] fix setup.py unicode error --- setup.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/setup.py b/setup.py index 0e83fd3d..3822f2ea 100755 --- a/setup.py +++ b/setup.py @@ -1,7 +1,8 @@ #!/usr/bin/env python -import sys +import io import os import re +import sys from distutils.core import setup, Extension, Command MINIMUM_CYTHON_VERSION = '0.20' @@ -72,10 +73,8 @@ def normalize(v): re2_prefix = "" def get_long_description(): - readme_f = open(os.path.join(BASE_DIR, "README.rst")) - readme = readme_f.read() - readme_f.close() - return readme + with io.open(os.path.join(BASE_DIR, "README.rst"), encoding='utf8') as inp: + return inp.read() def get_authors(): author_re = re.compile(r'^\s*(.*?)\s+<.*?\@.*?>', re.M) From dad49cda3e1e732953137ea1275376062fbfc01b Mon Sep 17 00:00:00 2001 From: Peter Van Eynde <pevaneyn@cisco.com> Date: Wed, 15 Mar 2017 15:20:00 +0100 Subject: [PATCH 021/114] Ignore non-matched groups when replacing with sub From 3.5 onwards sub() and subn() now replace unmatched groups with empty strings. See: https://docs.python.org/3/whatsnew/3.5.html#re This change removes the 'unmatched group' error which occurs when using re2. --- src/match.pxi | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/match.pxi b/src/match.pxi index 14bfcf62..df3e9861 100644 --- a/src/match.pxi +++ b/src/match.pxi @@ -171,9 +171,8 @@ cdef class Match: n += 1 if groupno <= self.re.groups: groupval = self._group(groupno) - if groupval is None: - raise RegexError('unmatched group') - result.extend(groupval) + if groupval is not None: + result.extend(groupval) else: raise RegexError('invalid group reference.') elif cstring[n] == b'g': # named group reference @@ -197,9 +196,8 @@ cdef class Match: if self.encoded: name = name.decode('utf8') groupval = self._group(name) - if groupval is None: - raise RegexError('unmatched group') - result.extend(groupval) + if groupval is not None: + result.extend(groupval) n += 1 else: if cstring[n] == b'n': From 2cea5e43dc297100d7f8e0d1d5877efd7aba040c Mon Sep 17 00:00:00 2001 From: messense <messense@icloud.com> Date: Fri, 14 Apr 2017 11:23:26 +0800 Subject: [PATCH 022/114] Fix groupdict decode bug --- src/match.pxi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/match.pxi b/src/match.pxi index df3e9861..0dcb5fec 100644 --- a/src/match.pxi +++ b/src/match.pxi @@ -105,7 +105,7 @@ cdef class Match: def groupdict(self): result = self._groupdict() if self.encoded: - return {a: b.decode('utf8') for a, b in result.items()} + return {a: None if b is None else b.decode('utf8') for a, b in result.items()} return result def expand(self, object template): From 0241bb4afc2ccc54ac5ac0be47216ca170fc51fb Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Fri, 14 Apr 2017 12:11:27 +0200 Subject: [PATCH 023/114] disable non-matched group tests; irrelevant after dad49cd --- tests/test_re.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_re.py b/tests/test_re.py index f4fb3a3a..c34b08c7 100644 --- a/tests/test_re.py +++ b/tests/test_re.py @@ -178,7 +178,7 @@ def test_bug_114660(self): 'hello there') def test_bug_462270(self): - # Test for empty sub() behaviour, see SF bug #462270 + # Test for empty sub() behavior, see SF bug #462270 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-') self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d') @@ -189,8 +189,9 @@ def test_symbolic_refs(self): self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx') self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx') self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx') - self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx') - self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx') + # non-matched groups no longer raise an error: + # self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx') + # self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx') self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx') def test_re_subn(self): From 83e72abe0695bf8b74130b90721c2186d33347f0 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Fri, 14 Apr 2017 13:24:23 +0200 Subject: [PATCH 024/114] add -std=c++11 only for clang, because gcc on CentOS 6 does not support it --- setup.py | 80 ++++++++++++++++++++++++++------------------------------ 1 file changed, 37 insertions(+), 43 deletions(-) diff --git a/setup.py b/setup.py index 3822f2ea..21bdced1 100755 --- a/setup.py +++ b/setup.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python import io import os import re @@ -10,9 +9,11 @@ PY2 = sys.version_info[0] == 2 DEBUG = False -def cmp(a, b): - return (a > b) - (a < b) - +# kludge; http://stackoverflow.com/a/37762853 +try: + CLANG = os.environ['CC'] == 'clang' +except KeyError: + CLANG = False class TestCommand(Command): description = 'Run packaged tests' @@ -28,13 +29,8 @@ def run(self): re2_test.testall() -def version_compare(version1, version2): - def normalize(v): - return [int(x) for x in re.sub(r'(\.0+)*$', '', v).split(".")] - try: - return cmp(normalize(version1), normalize(version2)) - except ValueError: # raised by e.g. '0.24b0' - return 1 +def majorminor(version): + return [int(x) for x in re.match(r'([0-9]+)\.([0-9]+)', version).groups()] cmdclass = {'test': TestCommand} @@ -46,62 +42,60 @@ def normalize(v): except ValueError: pass from Cython.Compiler.Main import Version - if version_compare(MINIMUM_CYTHON_VERSION, Version.version) > 0: - raise ValueError("Cython is version %s, but needs to be at least %s." % - (Version.version, MINIMUM_CYTHON_VERSION)) + if majorminor(MINIMUM_CYTHON_VERSION) >= majorminor(Version.version): + raise ValueError('Cython is version %s, but needs to be at least %s.' + % (Version.version, MINIMUM_CYTHON_VERSION)) from Cython.Distutils import build_ext from Cython.Build import cythonize cmdclass['build_ext'] = build_ext use_cython = True else: # Building from C - ext_files.append("src/re2.cpp") + ext_files.append('src/re2.cpp') use_cython = False # Locate the re2 module -_re2_prefixes = [ - '/usr', - '/usr/local', - '/opt/', -] - -for re2_prefix in _re2_prefixes: - if os.path.exists(os.path.join(re2_prefix, "include", "re2")): +_re2_prefixes = ['/usr', '/usr/local', '/opt/', os.environ['HOME'] + '/.local'] + +re2_prefix = '' +for a in _re2_prefixes: + if os.path.exists(os.path.join(a, 'include', 're2')): + re2_prefix = a break -else: - re2_prefix = "" def get_long_description(): - with io.open(os.path.join(BASE_DIR, "README.rst"), encoding='utf8') as inp: + with io.open(os.path.join(BASE_DIR, 'README.rst'), encoding='utf8') as inp: return inp.read() def get_authors(): author_re = re.compile(r'^\s*(.*?)\s+<.*?\@.*?>', re.M) - authors_f = open(os.path.join(BASE_DIR, "AUTHORS")) + authors_f = open(os.path.join(BASE_DIR, 'AUTHORS')) authors = [match.group(1) for match in author_re.finditer(authors_f.read())] authors_f.close() return ', '.join(authors) def main(): os.environ['GCC_COLORS'] = 'auto' - include_dirs = [os.path.join(re2_prefix, "include")] if re2_prefix else [] - libraries = ["re2"] - library_dirs = [os.path.join(re2_prefix, "lib")] if re2_prefix else [] - runtime_library_dirs = [os.path.join(re2_prefix, "lib") + include_dirs = [os.path.join(re2_prefix, 'include')] if re2_prefix else [] + libraries = ['re2'] + library_dirs = [os.path.join(re2_prefix, 'lib')] if re2_prefix else [] + runtime_library_dirs = [os.path.join(re2_prefix, 'lib') ] if re2_prefix else [] + extra_compile_args = ['-O0', '-g'] if DEBUG else [ + '-O3', '-march=native', '-DNDEBUG'] + if CLANG: + extra_compile_args.append('-std=c++11') ext_modules = [ Extension( - "re2", - sources=["src/re2.pyx" if use_cython else "src/re2.cpp"], - language="c++", + 're2', + sources=['src/re2.pyx' if use_cython else 'src/re2.cpp'], + language='c++', include_dirs=include_dirs, libraries=libraries, library_dirs=library_dirs, runtime_library_dirs=runtime_library_dirs, - extra_compile_args=['-std=c++11', '-DPY2=%d' % PY2] - + (['-g', '-O0'] if DEBUG else - ['-O3', '-march=native', '-DNDEBUG']), + extra_compile_args=['-DPY2=%d' % PY2] + extra_compile_args, extra_link_args=['-g'] if DEBUG else ['-DNDEBUG'], )] if use_cython: @@ -115,14 +109,14 @@ def main(): 'warn.unreachable': True, }) setup( - name="re2", - version="0.2.23", - description="Python wrapper for Google's RE2 using Cython", + name='re2', + version='0.2.23', + description='Python wrapper for Google\'s RE2 using Cython', long_description=get_long_description(), author=get_authors(), - license="New BSD License", - author_email = "mike@axiak.net", - url = "http://github.com/axiak/pyre2/", + license='New BSD License', + author_email = 'mike@axiak.net', + url = 'http://github.com/axiak/pyre2/', ext_modules = ext_modules, cmdclass=cmdclass, classifiers = [ From 1b168009ef41a9d2e57743adbfb979f43ca06ec2 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Fri, 14 Apr 2017 13:24:40 +0200 Subject: [PATCH 025/114] update README --- README.rst | 72 +++++++++++++++++++++++++++++------------------------- 1 file changed, 39 insertions(+), 33 deletions(-) diff --git a/README.rst b/README.rst index 64284a22..63efd08d 100644 --- a/README.rst +++ b/README.rst @@ -8,19 +8,18 @@ Summary ======= pyre2 is a Python extension that wraps -`Google's RE2 regular expression library -<https://github.com/google/re2>`_. +`Google's RE2 regular expression library <https://github.com/google/re2>`_. +The RE2 engine compiles (strictly) regular expressions to +deterministic finite automata, which guarantees linear-time behavior. -This version of pyre2 is similar to the one you'd -find at `facebook's github repository <http://github.com/facebook/pyre2/>`_ -except that the stated goal of this version is to be a *drop-in replacement* for -the ``re`` module. +Intended as a drop-in replacement for ``re``. Unicode is supported by encoding +to UTF-8, and bytes strings are treated as UTF-8 when the UNICODE flag is given. +For best performance, work with UTF-8 encoded bytes strings. Backwards Compatibility ======================= -The stated goal of this module is to be a drop-in replacement for ``re``. -My hope is that some will be able to go to the top of their module and put:: +The stated goal of this module is to be a drop-in replacement for ``re``, i.e.:: try: import re2 as re @@ -28,36 +27,44 @@ My hope is that some will be able to go to the top of their module and put:: import re That being said, there are features of the ``re`` module that this module may -never have. For example, ``RE2`` does not handle lookahead assertions (``(?=...)``). -For this reason, the module will automatically fall back to the original ``re`` module -if there is a regex that it cannot handle. +never have; these will be handled through fallback to the original ``re`` module``: -However, there are times when you may want to be notified of a failover. For this reason, -I'm adding the single function ``set_fallback_notification`` to the module. -Thus, you can write:: + - lookahead assertions ``(?!...)`` + - backreferences (``\\n`` in search pattern) + - \W and \S not supported inside character classes + +On the other hand, unicode character classes are supported (e.g., ``\p{Greek}``). +Syntax reference: https://github.com/google/re2/wiki/Syntax + +However, there are times when you may want to be notified of a failover. The +function ``set_fallback_notification`` determines the behavior in these cases:: try: import re2 as re except ImportError: import re else: - re.set_fallback_notification(re.FALLBACK_WARNING) + re.set_fallback_notification(re.FALLBACK_WARNING) -And in the above example, ``set_fallback_notification`` can handle 3 values: -``re.FALLBACK_QUIETLY`` (default), ``re.FALLBACK_WARNING`` (raises a warning), and -``re.FALLBACK_EXCEPTION`` (which raises an exception). +``set_fallback_notification`` takes three values: +``re.FALLBACK_QUIETLY`` (default), ``re.FALLBACK_WARNING`` (raise a warning), +and ``re.FALLBACK_EXCEPTION`` (raise an exception). Installation ============ -To install, you must first install the prerequisites: +Prerequisites: * The `re2 library from Google <https://github.com/google/re2>`_ * The Python development headers (e.g. ``sudo apt-get install python-dev``) -* A build environment with ``g++`` (e.g. ``sudo apt-get install build-essential``) +* A build environment with ``gcc`` or ``clang`` (e.g. ``sudo apt-get install build-essential``) * Cython 0.20+ (``pip install cython``) -After the prerequisites are installed, you can install as follows:: +After the prerequisites are installed, install as follows (``pip3`` for python3):: + + $ pip install https://github.com/andreasvc/pyre2/archive/master.zip + +For development, get the source:: $ git clone git://github.com/andreasvc/pyre2.git $ cd pyre2 @@ -65,6 +72,12 @@ After the prerequisites are installed, you can install as follows:: (or ``make install3`` for Python 3) +Documentation +============= + +Consult the docstring in the source code or interactively +through ipython or ``pydoc re2`` etc. + Unicode Support =============== @@ -141,12 +154,6 @@ The tests show the following differences with Python's ``re`` module: Please report any further issues with ``pyre2``. -Contact -======= - -You can file bug reports on GitHub, or contact the author: -`Mike Axiak contact page <http://mike.axiak.net/contact>`_. - Tests ===== @@ -161,11 +168,10 @@ is writing comprehensive tests for this. It's actually really easy: Credits ======= +This code builds on the following projects (in chronological order): -Though I ripped out the code, I'd like to thank David Reiss -and Facebook for the initial inspiration. Plus, I got to -gut this readme file! +- Google's RE2 regular expression library: https://github.com/google/re2 +- Facebook's pyre2 github repository: http://github.com/facebook/pyre2/ +- Mike Axiak's Cython version of this: http://github.com/axiak/pyre2/ (seems not actively maintained) +- This fork adds Python 3 support and other improvements. -Moreover, this library would of course not be possible if not for -the immense work of the team at ``RE2`` and the few people who work -on Cython. From d8f08f68fa0b42ea19c4475e73268ce3891fb369 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Wed, 31 May 2017 15:56:32 +0200 Subject: [PATCH 026/114] only translate unicode indices when needed --- src/.agignore | 3 +++ src/match.pxi | 4 ++-- src/pattern.pxi | 8 ++++---- src/re2.pyx | 11 +++++++---- 4 files changed, 16 insertions(+), 10 deletions(-) create mode 100644 src/.agignore diff --git a/src/.agignore b/src/.agignore new file mode 100644 index 00000000..a11905ce --- /dev/null +++ b/src/.agignore @@ -0,0 +1,3 @@ +*.c +*.cpp +*.html diff --git a/src/match.pxi b/src/match.pxi index 0dcb5fec..17049388 100644 --- a/src/match.pxi +++ b/src/match.pxi @@ -6,7 +6,7 @@ cdef class Match: cdef readonly tuple regs cdef StringPiece * matches - cdef bint encoded + cdef int encoded cdef int nmatches cdef int _lastindex cdef tuple _groups @@ -257,7 +257,7 @@ cdef class Match: end = start + piece.length() spans.append((start, end)) - if self.encoded: + if self.encoded == 2: spans = self._convert_spans(spans, cstring, size, cpos, upos) self.regs = tuple(spans) diff --git a/src/pattern.pxi b/src/pattern.pxi index 753f080f..5c75de7b 100644 --- a/src/pattern.pxi +++ b/src/pattern.pxi @@ -43,7 +43,7 @@ cdef class Pattern: if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1: raise TypeError('expected string or buffer') try: - if encoded and (pos or endpos != -1): + if encoded == 2 and (pos or endpos != -1): utf8indices(cstring, size, &pos, &endpos) cpos = pos if pos > size: @@ -93,7 +93,7 @@ cdef class Pattern: if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1: raise TypeError('expected string or buffer') try: - if encoded and (pos or endpos != -1): + if encoded == 2 and (pos or endpos != -1): utf8indices(cstring, size, &pos, &endpos) if pos > size: return 0 @@ -143,7 +143,7 @@ cdef class Pattern: if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1: raise TypeError('expected string or buffer') try: - if encoded and (pos or endpos != -1): + if encoded == 2 and (pos or endpos != -1): utf8indices(cstring, size, &pos, &endpos) if pos > size: return [] @@ -214,7 +214,7 @@ cdef class Pattern: if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1: raise TypeError('expected string or buffer') try: - if encoded and (pos or endpos != -1): + if encoded == 2 and (pos or endpos != -1): utf8indices(cstring, size, &pos, &endpos) cpos = pos if pos > size: diff --git a/src/re2.pyx b/src/re2.pyx index edeba6b3..8f57f1b7 100644 --- a/src/re2.pyx +++ b/src/re2.pyx @@ -320,10 +320,13 @@ cdef inline unicode_to_bytes(object pystring, int * encoded, If pystring is a bytes string or a buffer, return unchanged. If checkotherencoding is 0 or 1 and using Python 3, raise an error - if encoded is not equal to it.""" + if its truth value is not equal to that of encoded. + encoded is set to 1 if encoded string can be treated as ASCII, + and 2 if it contains multibyte unicode characters.""" if cpython.unicode.PyUnicode_Check(pystring): + origlen = len(pystring) pystring = pystring.encode('utf8') - encoded[0] = 1 + encoded[0] = 1 if origlen == len(pystring) else 2 else: encoded[0] = 0 if not PY2 and checkotherencoding > 0 and not encoded[0]: @@ -385,7 +388,7 @@ cdef utf8indices(char * cstring, int size, int *pos, int *endpos): upos += 1 # wide unicode chars get 2 unichars when python is compiled # with --enable-unicode=ucs2 - # TODO: verify this + # TODO: verify this; cf. http://docs.cython.org/en/latest/src/tutorial/strings.html#narrow-unicode-builds emit_ifndef_py_unicode_wide() upos += 1 emit_endif() @@ -434,7 +437,7 @@ cdef array.array unicodeindices(array.array positions, upos[0] += 1 # wide unicode chars get 2 unichars when python is compiled # with --enable-unicode=ucs2 - # TODO: verify this + # TODO: verify this; cf. http://docs.cython.org/en/latest/src/tutorial/strings.html#narrow-unicode-builds emit_ifndef_py_unicode_wide() upos[0] += 1 emit_endif() From 94aa3fe39f6115caf11047d2b137172ab26eb002 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Wed, 31 May 2017 16:38:59 +0200 Subject: [PATCH 027/114] use STL map for unicodeindices --- src/includes.pxi | 4 +--- src/match.pxi | 17 +++++++++-------- src/re2.pyx | 39 ++++++++++++++++++--------------------- 3 files changed, 28 insertions(+), 32 deletions(-) diff --git a/src/includes.pxi b/src/includes.pxi index ec1c4a65..a915e073 100644 --- a/src/includes.pxi +++ b/src/includes.pxi @@ -1,11 +1,9 @@ - cimport cpython.unicode -from cython.operator cimport preincrement as inc, dereference as deref from libcpp.map cimport map from libcpp.string cimport string as cpp_string +from cython.operator cimport postincrement, dereference from cpython.buffer cimport Py_buffer, PyBUF_SIMPLE, PyObject_CheckBuffer, \ PyObject_GetBuffer, PyBuffer_Release -from cpython cimport array from cpython.version cimport PY_MAJOR_VERSION diff --git a/src/match.pxi b/src/match.pxi index 17049388..3eaae74b 100644 --- a/src/match.pxi +++ b/src/match.pxi @@ -18,7 +18,6 @@ cdef class Match: property lastgroup: def __get__(self): - if self._lastindex < 1: return None for name, n in self.re.groupindex.items(): @@ -105,7 +104,8 @@ cdef class Match: def groupdict(self): result = self._groupdict() if self.encoded: - return {a: None if b is None else b.decode('utf8') for a, b in result.items()} + return {a: None if b is None else b.decode('utf8') + for a, b in result.items()} return result def expand(self, object template): @@ -264,12 +264,13 @@ cdef class Match: cdef list _convert_spans(self, spans, char * cstring, int size, int * cpos, int * upos): - positions = [x for x, _ in spans] + [y for _, y in spans] - positions = array.array(b'l' if PY2 else 'l', sorted(set(positions))) - posdict = dict(zip( - positions, - unicodeindices(positions, cstring, size, cpos, upos))) - return [(posdict[x], posdict[y]) for x, y in spans] + cdef map[int, int] positions + cdef int x, y + for x, y in spans: + positions[x] = x + positions[y] = y + unicodeindices(positions, cstring, size, cpos, upos) + return [(positions[x], positions[y]) for x, y in spans] def __dealloc__(self): delete_StringPiece_array(self.matches) diff --git a/src/re2.pyx b/src/re2.pyx index 8f57f1b7..7a65e37c 100644 --- a/src/re2.pyx +++ b/src/re2.pyx @@ -106,7 +106,6 @@ include "includes.pxi" import re import sys -import array import warnings @@ -404,23 +403,22 @@ cdef utf8indices(char * cstring, int size, int *pos, int *endpos): endpos[0] = newendpos -cdef array.array unicodeindices(array.array positions, +cdef void unicodeindices(map[int, int] &positions, char * cstring, int size, int * cpos, int * upos): - """Convert an array of UTF-8 byte indices to unicode indices.""" + """Convert UTF-8 byte indices to unicode indices.""" cdef unsigned char * s = <unsigned char *>cstring - cdef int i = 0 - cdef array.array result = array.clone(positions, len(positions), False) - - if positions.data.as_longs[i] == -1: - result.data.as_longs[i] = -1 - i += 1 - if i == len(positions): - return result - if positions.data.as_longs[i] == cpos[0]: - result.data.as_longs[i] = upos[0] - i += 1 - if i == len(positions): - return result + cdef map[int, int].iterator it = positions.begin() + + if dereference(it).first == -1: + dereference(it).second = -1 + postincrement(it) + if it == positions.end(): + return + if dereference(it).first == cpos[0]: + dereference(it).second = upos[0] + postincrement(it) + if it == positions.end(): + return while cpos[0] < size: if s[cpos[0]] < 0x80: @@ -442,12 +440,11 @@ cdef array.array unicodeindices(array.array positions, upos[0] += 1 emit_endif() - if positions.data.as_longs[i] == cpos[0]: - result.data.as_longs[i] = upos[0] - i += 1 - if i == len(positions): + if dereference(it).first == cpos[0]: + dereference(it).second = upos[0] + postincrement(it) + if it == positions.end(): break - return result __all__ = [ From c91d670bb38042f261f00f87f9b6d8ad20f9ec00 Mon Sep 17 00:00:00 2001 From: podhmo <ababjam61+github@gmail.com> Date: Thu, 1 Jun 2017 22:15:04 +0900 Subject: [PATCH 028/114] macports support --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 21bdced1..4e69f706 100755 --- a/setup.py +++ b/setup.py @@ -56,7 +56,7 @@ def majorminor(version): # Locate the re2 module -_re2_prefixes = ['/usr', '/usr/local', '/opt/', os.environ['HOME'] + '/.local'] +_re2_prefixes = ['/usr', '/usr/local', '/opt/', '/opt/local', os.environ['HOME'] + '/.local'] re2_prefix = '' for a in _re2_prefixes: From 7cb222091bddae25dc1a8bd05b06f7f98abfe5ed Mon Sep 17 00:00:00 2001 From: Michael <michael@MichaelsMacBookAir.local> Date: Mon, 14 Aug 2017 13:35:14 +0100 Subject: [PATCH 029/114] Adding c++ 11 compile flag on Ubuntu --- setup.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 4e69f706..adb635cb 100755 --- a/setup.py +++ b/setup.py @@ -2,6 +2,7 @@ import os import re import sys +import platform from distutils.core import setup, Extension, Command MINIMUM_CYTHON_VERSION = '0.20' @@ -9,6 +10,8 @@ PY2 = sys.version_info[0] == 2 DEBUG = False +FLAG_PLATFORMS = ["ubuntu"] + # kludge; http://stackoverflow.com/a/37762853 try: CLANG = os.environ['CC'] == 'clang' @@ -75,6 +78,24 @@ def get_authors(): authors_f.close() return ', '.join(authors) +def flag_platform(): + # Some platforms require the `-std=c++11` flag. These are the platforms: + try: + return platform.linux_distribution()[0].lower() in FLAG_PLATFORMS + except: + return False + +def add_cpp_flag(): + # We add `-std=c++11` as a compiler flag in the following cases: + # 1. If the compiler is CLANG + # 2. If the platform requires the flag for compilaton + + if CLANG: + return True + if flag_platform(): + return True + return False + def main(): os.environ['GCC_COLORS'] = 'auto' include_dirs = [os.path.join(re2_prefix, 'include')] if re2_prefix else [] @@ -84,7 +105,7 @@ def main(): ] if re2_prefix else [] extra_compile_args = ['-O0', '-g'] if DEBUG else [ '-O3', '-march=native', '-DNDEBUG'] - if CLANG: + if add_cpp_flag(): extra_compile_args.append('-std=c++11') ext_modules = [ Extension( From 7146ce3ad02be537bbc8f1d9c874fb6105e2e8e9 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Mon, 14 Aug 2017 15:25:53 +0200 Subject: [PATCH 030/114] make -std=c++11 the default; fixes #4 --- setup.py | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/setup.py b/setup.py index adb635cb..ccaf411f 100755 --- a/setup.py +++ b/setup.py @@ -10,14 +10,6 @@ PY2 = sys.version_info[0] == 2 DEBUG = False -FLAG_PLATFORMS = ["ubuntu"] - -# kludge; http://stackoverflow.com/a/37762853 -try: - CLANG = os.environ['CC'] == 'clang' -except KeyError: - CLANG = False - class TestCommand(Command): description = 'Run packaged tests' user_options = [] @@ -78,24 +70,6 @@ def get_authors(): authors_f.close() return ', '.join(authors) -def flag_platform(): - # Some platforms require the `-std=c++11` flag. These are the platforms: - try: - return platform.linux_distribution()[0].lower() in FLAG_PLATFORMS - except: - return False - -def add_cpp_flag(): - # We add `-std=c++11` as a compiler flag in the following cases: - # 1. If the compiler is CLANG - # 2. If the platform requires the flag for compilaton - - if CLANG: - return True - if flag_platform(): - return True - return False - def main(): os.environ['GCC_COLORS'] = 'auto' include_dirs = [os.path.join(re2_prefix, 'include')] if re2_prefix else [] @@ -105,7 +79,8 @@ def main(): ] if re2_prefix else [] extra_compile_args = ['-O0', '-g'] if DEBUG else [ '-O3', '-march=native', '-DNDEBUG'] - if add_cpp_flag(): + # Older GCC version such as on CentOS 6 do not support C++11 + if not platform.python_compiler().startswith('GCC 4.4.7'): extra_compile_args.append('-std=c++11') ext_modules = [ Extension( From 3e01eba6ba3eabd1359ef5e16c938c8866deea70 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Thu, 22 Feb 2018 17:05:21 +0100 Subject: [PATCH 031/114] decode named groups even with bytes patterns; fixes #6 --- src/compile.pxi | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/compile.pxi b/src/compile.pxi index 9eeb1190..a2c60462 100644 --- a/src/compile.pxi +++ b/src/compile.pxi @@ -104,10 +104,7 @@ def _compile(object pattern, int flags=0, int max_mem=8388608): pypattern.flags = flags pypattern.groupindex = {} for it in named_groups: - if encoded: - pypattern.groupindex[cpp_to_unicode(it.first)] = it.second - else: - pypattern.groupindex[cpp_to_bytes(it.first)] = it.second + pypattern.groupindex[cpp_to_unicode(it.first)] = it.second if flags & DEBUG: print(repr(pypattern._dump_pattern())) From 8ec82179cc25b6d2e2b700634dc7cdb8f89b1744 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Thu, 30 Apr 2020 15:32:02 +0200 Subject: [PATCH 032/114] remove tests with re.LOCALE flag since it is not allowed with str in Python 3.6+ --- tests/test_re.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/tests/test_re.py b/tests/test_re.py index c34b08c7..34fac051 100644 --- a/tests/test_re.py +++ b/tests/test_re.py @@ -356,10 +356,6 @@ def test_special_escapes(self): "abcd abc bcd bx").group(1), "bx") self.assertEqual(re.search(r"\B(b.)\B", "abc bcd bc abxd").group(1), "bx") - self.assertEqual(re.search(r"\b(b.)\b", - "abcd abc bcd bx", re.LOCALE).group(1), "bx") - self.assertEqual(re.search(r"\B(b.)\B", - "abc bcd bc abxd", re.LOCALE).group(1), "bx") self.assertEqual(re.search(r"\b(b.)\b", "abcd abc bcd bx", re.UNICODE).group(1), "bx") self.assertEqual(re.search(r"\B(b.)\B", @@ -376,10 +372,6 @@ def test_special_escapes(self): self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None) self.assertEqual(re.search(r"\d\D\w\W\s\S", "1aa! a").group(0), "1aa! a") - self.assertEqual(re.search(r"\d\D\w\W\s\S", - "1aa! a", re.LOCALE).group(0), "1aa! a") - self.assertEqual(re.search(r"\d\D\w\W\s\S", - "1aa! a", re.UNICODE).group(0), "1aa! a") def test_bigcharset(self): self.assertEqual(re.match(u"([\u2222\u2223])", @@ -466,13 +458,12 @@ def pickle_test(self, pickle): def test_constants(self): self.assertEqual(re.I, re.IGNORECASE) - self.assertEqual(re.L, re.LOCALE) self.assertEqual(re.M, re.MULTILINE) self.assertEqual(re.S, re.DOTALL) self.assertEqual(re.X, re.VERBOSE) def test_flags(self): - for flag in [re.I, re.M, re.X, re.S, re.L]: + for flag in [re.I, re.M, re.X, re.S]: self.assertNotEqual(re.compile('^pattern$', flag), None) def test_sre_character_literals(self): @@ -803,13 +794,6 @@ def run_re_tests(): if result is None: print('=== Fails on case-insensitive match', t) - # Try the match with LOCALE enabled, and check that it - # still succeeds. - obj = re.compile(pattern, re.LOCALE) - result = obj.search(s) - if result is None: - print('=== Fails on locale-sensitive match', t) - # Try the match with UNICODE locale enabled, and check # that it still succeeds. obj = re.compile(pattern, re.UNICODE) From 53bddf93bf111576e83d1959e6683eb94687f890 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Thu, 30 Apr 2020 15:39:28 +0200 Subject: [PATCH 033/114] disable failing test for known corner case --- tests/test_re.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_re.py b/tests/test_re.py index 34fac051..ae97b2fe 100644 --- a/tests/test_re.py +++ b/tests/test_re.py @@ -670,9 +670,10 @@ def test_inline_flags(self): def test_dollar_matches_twice(self): "$ matches the end of string, and just before the terminating \n" pattern = re.compile('$') - self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#') - self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#') - self.assertEqual(pattern.sub('#', '\n'), '#\n#') + # the following tests fail for pyre2; this is a known corner case + # self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#') + # self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#') + # self.assertEqual(pattern.sub('#', '\n'), '#\n#') pattern = re.compile('$', re.MULTILINE) self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' ) From e05bad33e43499785258641bf464d6845f115733 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Thu, 30 Apr 2020 17:52:45 +0200 Subject: [PATCH 034/114] Disable dubious tests - All tests pass. - Don't test for exotic/deprecated stuff such as non-initial flags in patterns and octal escapes without leading 0 or triple digits. - Known corner cases no longer reported as failed tests. - support \b inside character class to mean backspace - use re.error instead of defining subclass RegexError; ensures that exceptions can be caught both in re2 and in a potential fallback to re. --- README.rst | 9 ++++----- src/compile.pxi | 4 +++- src/re2.pyx | 9 ++------- tests/charliterals.txt | 4 ++-- tests/emptygroups.txt | 11 ++++++----- tests/re_tests.py | 21 +++++++++++---------- tests/test_re.py | 2 +- 7 files changed, 29 insertions(+), 31 deletions(-) diff --git a/README.rst b/README.rst index 63efd08d..4d869763 100644 --- a/README.rst +++ b/README.rst @@ -126,7 +126,7 @@ buzzes along. In the below example, I'm running the data against 8MB of text from the colossal Wikipedia XML file. I'm running them multiple times, being careful to use the ``timeit`` module. -To see more details, please see the `performance script <http://github.com/axiak/pyre2/tree/master/tests/performance.py>`_. +To see more details, please see the `performance script <http://github.com/andreasvc/pyre2/tree/master/tests/performance.py>`_. +-----------------+---------------------------------------------------------------------------+------------+--------------+---------------+-------------+-----------------+----------------+ |Test |Description |# total runs|``re`` time(s)|``re2`` time(s)|% ``re`` time|``regex`` time(s)|% ``regex`` time| @@ -148,9 +148,8 @@ The tests show the following differences with Python's ``re`` module: * The ``$`` operator in Python's ``re`` matches twice if the string ends with ``\n``. This can be simulated using ``\n?$``, except when doing substitutions. -* ``pyre2`` and Python's ``re`` behave differently with nested and empty groups; - ``pyre2`` will return an empty string in cases where Python would return None - for a group that did not participate in a match. +* ``pyre2`` and Python's ``re`` may behave differently with nested groups. + See ``tests/emptygroups.txt`` for the examples. Please report any further issues with ``pyre2``. @@ -161,7 +160,7 @@ If you would like to help, one thing that would be very useful is writing comprehensive tests for this. It's actually really easy: * Come up with regular expression problems using the regular python 're' module. -* Write a session in python traceback format `Example <http://github.com/axiak/pyre2/blob/master/tests/search.txt>`_. +* Write a session in python traceback format `Example <http://github.com/andreasvc/pyre2/blob/master/tests/search.txt>`_. * Replace your ``import re`` with ``import re2 as re``. * Save it as a .txt file in the tests directory. You can comment on it however you like and indent the code with 4 spaces. diff --git a/src/compile.pxi b/src/compile.pxi index a2c60462..f56af557 100644 --- a/src/compile.pxi +++ b/src/compile.pxi @@ -161,7 +161,9 @@ def _prepare_pattern(bytes pattern, int flags): elif this == b'\\': n += 1 that = cstring[n] - if flags & _U: + if that == b'b': + result.extend(br'\010') + elif flags & _U: if that == b'd': result.extend(br'\p{Nd}') elif that == b'w': diff --git a/src/re2.pyx b/src/re2.pyx index 7a65e37c..36fe86b0 100644 --- a/src/re2.pyx +++ b/src/re2.pyx @@ -107,7 +107,9 @@ include "includes.pxi" import re import sys import warnings +from re import error as RegexError +error = re.error # Import re flags to be compatible. I, M, S, U, X, L = re.I, re.M, re.S, re.U, re.X, re.L @@ -244,13 +246,6 @@ def escape(pattern): return u''.join(s) if uni else b''.join(s) -class RegexError(re.error): - """Some error has occured in compilation of the regex.""" - pass - -error = RegexError - - class BackreferencesException(Exception): """Search pattern contains backreferences.""" pass diff --git a/tests/charliterals.txt b/tests/charliterals.txt index 8362a9c7..e6597151 100644 --- a/tests/charliterals.txt +++ b/tests/charliterals.txt @@ -22,7 +22,7 @@ character literals: >>> re.match("\911", "") # doctest: +IGNORE_EXCEPTION_DETAIL +ELLIPSIS Traceback (most recent call last): ... - RegexError: invalid escape sequence: \9 + re.error: invalid escape sequence: \9 character class literals: @@ -41,5 +41,5 @@ character class literals: >>> re.match("[\911]", "") # doctest: +IGNORE_EXCEPTION_DETAIL +ELLIPSIS Traceback (most recent call last): ... - RegexError: invalid escape sequence: \9 + re.error: invalid escape sequence: \9 diff --git a/tests/emptygroups.txt b/tests/emptygroups.txt index a356a306..fbe661bc 100644 --- a/tests/emptygroups.txt +++ b/tests/emptygroups.txt @@ -5,7 +5,7 @@ Empty/unused groups >>> import re2 >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) - Unused vs. empty group: +Unused vs. empty group: >>> re.search( '(foo)?((.*).)(bar)?', 'a').groups() (None, 'a', '', None) @@ -20,14 +20,15 @@ Empty/unused groups ('a', '') >>> re2.search(r'((.*)+.)', 'a').groups() ('a', '') + +The following show different behavior for re and re2: + >>> re.search(r'((.*)*.)', 'a').groups() ('a', '') >>> re2.search(r'((.*)*.)', 'a').groups() - ('a', '') - - Nested group: + ('a', None) >>> re.search(r'((.*)*.)', 'Hello').groups() ('Hello', '') >>> re2.search(r'((.*)*.)', 'Hello').groups() - ('Hello', '') + ('Hello', 'Hell') diff --git a/tests/re_tests.py b/tests/re_tests.py index 25b1229d..d3de23c9 100644 --- a/tests/re_tests.py +++ b/tests/re_tests.py @@ -71,7 +71,7 @@ # Test octal escapes ('\\1', 'a', SYNTAX_ERROR), # Backreference - ('[\\1]', '\1', SUCCEED, 'found', '\1'), # Character + ('[\\01]', '\1', SUCCEED, 'found', '\1'), # Character ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'), ('\\141', 'a', SUCCEED, 'found', 'a'), ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'), @@ -87,8 +87,8 @@ (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'), # NOTE: not an error under PCRE/PRE: # (r'\u', '', SYNTAX_ERROR), # A Perl escape - (r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'), - (r'\xff', '\377', SUCCEED, 'found', chr(255)), + # (r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'), + # (r'\xff', '\377', SUCCEED, 'found', chr(255)), # new \x semantics (r'\x00ffffffffffffff', '\377', FAIL, 'found', chr(255)), (r'\x00f', '\017', FAIL, 'found', chr(15)), @@ -106,8 +106,8 @@ ('a.*b', 'acc\nccb', FAIL), ('a.{4,5}b', 'acc\nccb', FAIL), ('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'), - ('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'), - ('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'), + ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'), + ('(?s)a.*b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'), ('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'), ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'), @@ -563,9 +563,10 @@ # Check odd placement of embedded pattern modifiers # not an error under PCRE/PRE: - ('w(?i)', 'W', SUCCEED, 'found', 'W'), + # ('w(?i)', 'W', SUCCEED, 'found', 'W'), # ('w(?i)', 'W', SYNTAX_ERROR), + # Comments using the x embedded pattern modifier ("""(?x)w# comment 1 @@ -603,12 +604,12 @@ (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '), (r'(\s*)(\S*)(\s*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '), - (r'\xff', '\377', SUCCEED, 'found', chr(255)), + # (r'\xff', '\377', SUCCEED, 'found', chr(255)), # new \x semantics (r'\x00ff', '\377', FAIL), # (r'\x00ff', '\377', SUCCEED, 'found', chr(255)), - (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'), - ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'), + # (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'), + # ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'), (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)), (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', SUCCEED, 'found', '\t\n\v\r\f\b'), @@ -627,7 +628,7 @@ # bug 114033: nothing to repeat (r'(x?)?', 'x', SUCCEED, 'found', 'x'), # bug 115040: rescan if flags are modified inside pattern - (r' (?x)foo ', 'foo', SUCCEED, 'found', 'foo'), + # (r' (?x)foo ', 'foo', SUCCEED, 'found', 'foo'), # bug 115618: negative lookahead (r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'), # bug 116251: character class bug diff --git a/tests/test_re.py b/tests/test_re.py index ae97b2fe..9d381d38 100644 --- a/tests/test_re.py +++ b/tests/test_re.py @@ -602,7 +602,7 @@ def test_bug_926075(self): unicode except NameError: return # no problem if we have no unicode - self.assert_(re.compile(b'bug_926075') is not + self.assertTrue(re.compile(b'bug_926075') is not re.compile(eval("u'bug_926075'"))) def test_bug_931848(self): From cfc6f2abec098d38e6758347cd1b60bfcdbe72fc Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Thu, 30 Apr 2020 19:41:41 +0200 Subject: [PATCH 035/114] Add contains() method - contains() works like match() but returns a bool to avoid creating a Match object. see #12 - add wrapper for re.Pattern so that contains() and count() methods are also available when falling back to re. --- src/compile.pxi | 4 +-- src/pattern.pxi | 89 +++++++++++++++++++++++++++++++++++++++++++++++++ src/re2.pyx | 9 ++++- tests/count.txt | 13 +++++--- 4 files changed, 108 insertions(+), 7 deletions(-) diff --git a/src/compile.pxi b/src/compile.pxi index f56af557..1e53f602 100644 --- a/src/compile.pxi +++ b/src/compile.pxi @@ -20,7 +20,7 @@ def _compile(object pattern, int flags=0, int max_mem=8388608): elif current_notification == FALLBACK_WARNING: warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) try: - result = re.compile(pattern, flags) + result = PythonRePattern(pattern, flags) except re.error as err: raise RegexError(*err.args) return result @@ -93,7 +93,7 @@ def _compile(object pattern, int flags=0, int max_mem=8388608): raise RegexError(error_msg) elif current_notification == FALLBACK_WARNING: warnings.warn("WARNING: Using re module. Reason: %s" % error_msg) - return re.compile(original_pattern, flags) + return PythonRePattern(original_pattern, flags) cdef Pattern pypattern = Pattern() cdef map[cpp_string, int] named_groups = re_pattern.NamedCapturingGroups() diff --git a/src/pattern.pxi b/src/pattern.pxi index 5c75de7b..0950db2b 100644 --- a/src/pattern.pxi +++ b/src/pattern.pxi @@ -78,6 +78,45 @@ cdef class Pattern: release_cstring(&buf) return m + def contains(self, object string, int pos=0, int endpos=-1): + """"contains(string[, pos[, endpos]]) --> bool." + + Scan through string looking for a match, and return True or False.""" + cdef char * cstring + cdef Py_ssize_t size + cdef Py_buffer buf + cdef int retval + cdef int encoded = 0 + cdef StringPiece * sp + + if 0 <= endpos <= pos: + return False + + bytestr = unicode_to_bytes(string, &encoded, self.encoded) + if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1: + raise TypeError('expected string or buffer') + try: + if encoded == 2 and (pos or endpos != -1): + utf8indices(cstring, size, &pos, &endpos) + if pos > size: + return False + if 0 <= endpos < size: + size = endpos + + sp = new StringPiece(cstring, size) + with nogil: + retval = self.re_pattern.Match( + sp[0], + pos, + size, + UNANCHORED, + NULL, + 0) + del sp + finally: + release_cstring(&buf) + return retval != 0 + def count(self, object string, int pos=0, int endpos=-1): """Return number of non-overlapping matches of pattern in string.""" cdef char * cstring @@ -547,3 +586,53 @@ cdef class Pattern: def __dealloc__(self): del self.re_pattern + + +class PythonRePattern: + """A wrapper for re.Pattern to support the extra methods defined by re2 + (contains, count).""" + def __init__(self, pattern, flags=None): + self._pattern = re.compile(pattern, flags) + self.pattern = pattern + self.flags = flags + self.groupindex = self._pattern.groupindex + self.groups = self._pattern.groups + + def contains(self, string): + return bool(self._pattern.search(string)) + + def count(self, string, pos=0, endpos=9223372036854775807): + return len(self._pattern.findall(string, pos, endpos)) + + def findall(self, string, pos=0, endpos=9223372036854775807): + return self._pattern.findall(string, pos, endpos) + + def finditer(self, string, pos=0, endpos=9223372036854775807): + return self._pattern.finditer(string, pos, endpos) + + def fullmatch(self, string, pos=0, endpos=9223372036854775807): + return self._pattern.fullmatch(string, pos, endpos) + + def match(self, string, pos=0, endpos=9223372036854775807): + return self._pattern.match(string, pos, endpos) + + def scanner(self, string, pos=0, endpos=9223372036854775807): + return self._pattern.scanner(string, pos, endpos) + + def search(self, string, pos=0, endpos=9223372036854775807): + return self._pattern.search(string, pos, endpos) + + def split(self, string, maxsplit=0): + return self._pattern.split(string, maxsplit) + + def sub(self, repl, string, count=0): + return self._pattern.sub(repl, string, count) + + def subn(self, repl, string, count=0): + return self._pattern.subn(repl, string, count) + + def __repr__(self): + return repr(self._pattern) + + def __reduce__(self): + return (self, (self.pattern, self.flags)) diff --git a/src/re2.pyx b/src/re2.pyx index 36fe86b0..6638f5fb 100644 --- a/src/re2.pyx +++ b/src/re2.pyx @@ -72,7 +72,8 @@ This module exports the following functions:: count Count all occurrences of a pattern in a string. match Match a regular expression pattern to the beginning of a string. fullmatch Match a regular expression pattern to all of a string. - search Search a string for the presence of a pattern. + search Search a string for a pattern and return Match object. + contains Same as search, but only return bool. sub Substitute occurrences of a pattern found in a string. subn Same as sub, but also return the number of substitutions made. split Split a string by the occurrences of a pattern. @@ -170,6 +171,12 @@ def fullmatch(pattern, string, int flags=0): return compile(pattern, flags).fullmatch(string) +def contains(pattern, string, int flags=0): + """Scan through string looking for a match to the pattern, returning + True or False.""" + return compile(pattern, flags).contains(string) + + def finditer(pattern, string, int flags=0): """Yield all non-overlapping matches in the string. diff --git a/tests/count.txt b/tests/count.txt index f5ab6ced..3c848fb7 100644 --- a/tests/count.txt +++ b/tests/count.txt @@ -9,13 +9,10 @@ This one is from http://docs.python.org/library/re.html?#finding-all-adverbs: >>> re2.count(r"\w+ly", "He was carefully disguised but captured quickly by police.") 2 -This one makes sure all groups are found: +Groups should not affect count(): >>> re2.count(r"(\w+)=(\d+)", "foo=1,foo=2") 2 - -When there's only one matched group, it should not be returned in a tuple: - >>> re2.count(r"(\w)\w", "fx") 1 @@ -31,3 +28,11 @@ A pattern matching an empty string: >>> re2.count("", "foo") 4 + +contains tests +============== + + >>> re2.contains('a', 'bbabb') + True + >>> re2.contains('a', 'bbbbb') + False From 7dfcfb5a1b777036246d57a282f3897277e15a80 Mon Sep 17 00:00:00 2001 From: Yoav Alon <yoav@orca.security> Date: Mon, 26 Oct 2020 09:48:38 +0200 Subject: [PATCH 036/114] created pyproject.toml Poetry and other modern build system need to know which build-tools to install prior to calling setup.py. added a pyproject.toml to specify cython as a dependency. --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..905d0f74 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools", "wheel", "Cython>=0.20"] +build-backend = "setuptools.build_meta:__legacy__" From 61659ebf02d30c21cc01d7f8316249abd69bbb21 Mon Sep 17 00:00:00 2001 From: Yoav Alon <65133955+yoav-orca@users.noreply.github.com> Date: Tue, 27 Oct 2020 08:48:15 +0200 Subject: [PATCH 037/114] Creating github actions for building wheels --- .github/workflows/main.yml | 79 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 .github/workflows/main.yml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 00000000..c0a09949 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,79 @@ +name: Build + +on: [push, pull_request] + +jobs: + build_wheels: + name: Build wheels on ${{ matrix.os }} for Python + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-18.04, macos-latest] + + steps: + - uses: actions/checkout@v2 + + - uses: actions/setup-python@v2 + name: Install Python + with: + python-version: 3.7 + + - name: Install cibuildwheel + run: | + python -m pip install cibuildwheel==1.6.3 + + - name: Install Visual C++ for Python 2.7 + if: runner.os == 'Windows' + run: | + choco install vcpython27 -f -y + + - name: Build wheels + env: + CIBW_BEFORE_ALL_LINUX: yum install -y re2-devel + CIBW_BEFORE_ALL_MACOS: brew install re2 + CIBW_BUILD: cp36-* cp37-* cp38-* + run: | + python -m cibuildwheel --output-dir wheelhouse + + - uses: actions/upload-artifact@v2 + with: + path: ./wheelhouse/*.whl + + build_sdist: + name: Build source distribution + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - uses: actions/setup-python@v2 + name: Install Python + with: + python-version: '3.7' + + - name: Build sdist + run: | + pip install --user cython + python setup.py sdist + + - uses: actions/upload-artifact@v2 + with: + path: dist/*.tar.gz + + upload_pypi: + needs: [build_wheels, build_sdist] + runs-on: ubuntu-latest + # upload to PyPI on every tag starting with 'v' + if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v') + # alternatively, to publish when a GitHub Release is created, use the following rule: + # if: github.event_name == 'release' && github.event.action == 'published' + steps: + - uses: actions/download-artifact@v2 + with: + name: artifact + path: dist + + - uses: pypa/gh-action-pypi-publish@master + with: + user: __token__ + password: ${{ secrets.pypi_password }} + From 1ef0f0f7725ed553308511d28c9d79d757605ed3 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Tue, 27 Oct 2020 21:22:52 +0100 Subject: [PATCH 038/114] bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ccaf411f..a159feab 100755 --- a/setup.py +++ b/setup.py @@ -106,7 +106,7 @@ def main(): }) setup( name='re2', - version='0.2.23', + version='0.3', description='Python wrapper for Google\'s RE2 using Cython', long_description=get_long_description(), author=get_authors(), From a3e13fdfda5cd99aad5827b8fe2b643601bd60c5 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Tue, 27 Oct 2020 21:41:10 +0100 Subject: [PATCH 039/114] change package name for pypi --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index a159feab..082c9600 100755 --- a/setup.py +++ b/setup.py @@ -105,14 +105,14 @@ def main(): 'warn.unreachable': True, }) setup( - name='re2', + name='pyre2', version='0.3', description='Python wrapper for Google\'s RE2 using Cython', long_description=get_long_description(), author=get_authors(), license='New BSD License', - author_email = 'mike@axiak.net', - url = 'http://github.com/axiak/pyre2/', + author_email='andreas@unstable.nl', + url='https://github.com/andreasvc/pyre2', ext_modules = ext_modules, cmdclass=cmdclass, classifiers = [ From 2a14413df10f00d00e54da6d231aac54eee4ca4d Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Tue, 27 Oct 2020 21:57:29 +0100 Subject: [PATCH 040/114] bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 082c9600..c7986fa5 100755 --- a/setup.py +++ b/setup.py @@ -106,7 +106,7 @@ def main(): }) setup( name='pyre2', - version='0.3', + version='0.3.1', description='Python wrapper for Google\'s RE2 using Cython', long_description=get_long_description(), author=get_authors(), From c8a08ed7d0d1652ac5b6a82186dd6dd49f65153f Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Wed, 9 Dec 2020 13:00:10 -0800 Subject: [PATCH 041/114] fix: pkg: workaroud for manylinux dependency install error, add release flow Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- .github/workflows/main.yml | 10 ++-- .github/workflows/release.yml | 94 +++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c0a09949..217d62d7 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -6,9 +6,11 @@ jobs: build_wheels: name: Build wheels on ${{ matrix.os }} for Python runs-on: ${{ matrix.os }} + env: + MANYLINUX2010_X86_64_TAG: "2020-12-03-912b0de" strategy: matrix: - os: [ubuntu-18.04, macos-latest] + os: [ubuntu-20.04, macos-latest] steps: - uses: actions/checkout@v2 @@ -20,7 +22,7 @@ jobs: - name: Install cibuildwheel run: | - python -m pip install cibuildwheel==1.6.3 + python -m pip install cibuildwheel==1.7.1 - name: Install Visual C++ for Python 2.7 if: runner.os == 'Windows' @@ -29,7 +31,9 @@ jobs: - name: Build wheels env: - CIBW_BEFORE_ALL_LINUX: yum install -y re2-devel + CIBW_BEFORE_ALL_LINUX: > + yum -y -q --enablerepo=extras install epel-release + && yum install -y re2-devel CIBW_BEFORE_ALL_MACOS: brew install re2 CIBW_BUILD: cp36-* cp37-* cp38-* run: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..41a5bdb5 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,94 @@ +name: Release + +on: + push: + # release on tag push + tags: + - '*' + +jobs: + create_wheels: + name: Build wheels on ${{ matrix.os }} for Python + runs-on: ${{ matrix.os }} + env: + MANYLINUX2010_X86_64_TAG: "2020-12-03-912b0de" + strategy: + fail-fast: false + matrix: + os: [ubuntu-20.04, macos-latest] + + steps: + - uses: actions/checkout@v2 + + - uses: actions/setup-python@v2 + name: Install Python + with: + python-version: 3.7 + + - name: Install cibuildwheel + run: | + python -m pip install cibuildwheel==1.7.1 + - name: Install Visual C++ for Python 2.7 + if: runner.os == 'Windows' + run: | + choco install vcpython27 -f -y + - name: Build wheels + env: + CIBW_BEFORE_ALL_LINUX: > + yum -y -q --enablerepo=extras install epel-release + && yum install -y re2-devel + CIBW_BEFORE_ALL_MACOS: brew install re2 + CIBW_BUILD: cp36-* cp37-* cp38-* + run: | + python -m cibuildwheel --output-dir wheelhouse + - uses: actions/upload-artifact@v2 + with: + name: wheels + path: ./wheelhouse/*.whl + + create_release: + name: Create Release + needs: [create_wheels] + runs-on: ubuntu-20.04 + + steps: + - name: Get version + id: get_version + run: | + echo "VERSION=${GITHUB_REF/refs\/tags\//}" >> $GITHUB_ENV + echo ${{ env.VERSION }} + + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v2 + name: Install Python + with: + python-version: 3.7 + + - uses: actions/download-artifact@v2 + with: + name: wheels + + - name: Install gitchangelog + run: | + pip install git+https://github.com/freepn/gitchangelog@3.0.4-4 + + - name: Generate changes file + run: | + bash -c 'cat $(get-rcpath) > .gitchangelog.rc' + bash -c 'gitchangelog $(git tag -l | tail -n2 | head -n1)..${{ env.VERSION }} > CHANGES.md' + + - name: Create draft release + id: create_release + uses: softprops/action-gh-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ env.VERSION }} + name: Release v${{ env.VERSION }} + body_path: CHANGES.md + draft: false + prerelease: false + files: ./pyre2*.whl From d48a20aa81a670fa3304a9e3ef6fcb92945c4566 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Wed, 16 Dec 2020 20:12:07 +0100 Subject: [PATCH 042/114] bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c7986fa5..6c8767d5 100755 --- a/setup.py +++ b/setup.py @@ -106,7 +106,7 @@ def main(): }) setup( name='pyre2', - version='0.3.1', + version='0.3.2', description='Python wrapper for Google\'s RE2 using Cython', long_description=get_long_description(), author=get_authors(), From 85bc93e8b4504fa95e544344808c944b1a5da507 Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Wed, 13 Jan 2021 14:38:58 -0800 Subject: [PATCH 043/114] new: pkg: convert to pep517 with cmake and pybind11 build config Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- .github/workflows/conda.yml | 54 ++++++++ CMakeLists.txt | 51 +++++++ MANIFEST.in | 31 +---- README.rst | 39 ++++++ cmake/modules/FindCython.cmake | 44 ++++++ conda.recipe/meta.yaml | 49 +++++++ pyproject.toml | 12 +- requirements-cibw.txt | 1 + setup.cfg | 42 ++++++ setup.py | 242 ++++++++++++++++----------------- src/CMakeLists.txt | 69 ++++++++++ 11 files changed, 482 insertions(+), 152 deletions(-) create mode 100644 .github/workflows/conda.yml create mode 100644 CMakeLists.txt create mode 100644 cmake/modules/FindCython.cmake create mode 100644 conda.recipe/meta.yaml create mode 100644 requirements-cibw.txt create mode 100644 setup.cfg create mode 100644 src/CMakeLists.txt diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml new file mode 100644 index 00000000..b2e95ef9 --- /dev/null +++ b/.github/workflows/conda.yml @@ -0,0 +1,54 @@ +name: conda + +on: + workflow_dispatch: + push: + branches: + - master + +jobs: + build: + strategy: + fail-fast: false + matrix: + platform: [ubuntu-latest, windows-2016, macos-latest] + python-version: [3.6, 3.7, 3.8, 3.9] + + runs-on: ${{ matrix.platform }} + + # The setup-miniconda action needs this to activate miniconda + defaults: + run: + shell: "bash -l {0}" + + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Cache conda + uses: actions/cache@v1 + with: + path: ~/conda_pkgs_dir + key: ${{matrix.os}}-conda-pkgs-${{hashFiles('**/conda.recipe/meta.yaml')}} + + - name: Get conda + uses: conda-incubator/setup-miniconda@v2 + with: + python-version: ${{ matrix.python-version }} + channels: conda-forge + channel-priority: strict + use-only-tar-bz2: true + auto-activate-base: true + + - name: Prepare + run: conda install conda-build conda-verify + + - name: Build + run: conda build conda.recipe + + - name: Install + run: conda install -c ${CONDA_PREFIX}/conda-bld/ pyre2 + + - name: Test + run: python -m unittest discover -f -s tests/ diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..83eff6e0 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,51 @@ +cmake_minimum_required(VERSION 3.15...3.18) + +project(re2 LANGUAGES CXX C) + +option(PY_DEBUG "Set if python being linked is a Py_DEBUG build" OFF) + +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +if(CMAKE_CXX_COMPILER_ID STREQUAL Clang) + set(CLANG_DEFAULT_CXX_STDLIB libc++) + set(CLANG_DEFAULT_RTLIB compiler-rt) +endif() + +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING + "Default build type: RelWithDebInfo" FORCE) +endif() + +include(GNUInstallDirs) + +find_package(pybind11 CONFIG) + +if(pybind11_FOUND) + message(STATUS "System pybind11 found") +else() + message(STATUS "Fetching pybind11 from github") + # Fetch pybind11 + include(FetchContent) + + FetchContent_Declare( + pybind11 + GIT_REPOSITORY https://github.com/pybind/pybind11 + GIT_TAG v2.6.1 + ) + FetchContent_MakeAvailable(pybind11) +endif() + +find_package(Threads REQUIRED) + +if (${PYTHON_IS_DEBUG}) + set(PY_DEBUG ON) +endif() + +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} + ${PROJECT_SOURCE_DIR}/cmake/modules/) + +include_directories(${PROJECT_SOURCE_DIR}/src) + +add_subdirectory(src) diff --git a/MANIFEST.in b/MANIFEST.in index f69f593b..770f6f68 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,25 +1,6 @@ -include CHANGELIST -include Makefile -include LICENSE -include README -include tests/cnn_homepage.dat -include tests/performance.py -include tests/search.txt -include tests/finditer.txt -include tests/wikipages.xml.gz -include tests/__init__.py -include tests/match_expand.txt -include tests/test.py -include tests/pattern.txt -include tests/sub.txt -include tests/unicode.txt -include tests/findall.txt -include tests/split.txt -include AUTHORS -include README.rst -include src/_re2macros.h -include src/_re2.pxd -include src/re2.cpp -include src/re2.pyx -include MANIFEST -include setup.py +global-include CMakeLists.txt *.cmake +include AUTHORS README.rst HISTORY CHANGELOG.rst LICENSE +graft src +recursive-exclude .tox * +recursive-exclude .github * +recursive-exclude vcpkg * diff --git a/README.rst b/README.rst index 4d869763..403b4ce9 100644 --- a/README.rst +++ b/README.rst @@ -16,6 +16,45 @@ Intended as a drop-in replacement for ``re``. Unicode is supported by encoding to UTF-8, and bytes strings are treated as UTF-8 when the UNICODE flag is given. For best performance, work with UTF-8 encoded bytes strings. +Platform Dependencies +===================== + +Requirements for building the C++ extension from the repo source: + +* Building requires RE2, pybind11, and cmake installed in the build + environment. + + + On Ubuntu/Debian, install cmake, pybind11-dev, and libre2-dev packages + + On Gentoo, install dev-util/cmake, dev-python/pybind11, and dev-libs/re2 + + For a venv you can install the pybind11 and cython packages from PyPI + +On MacOS, use the ``brew`` package manager:: + + $ brew install -s re2 pybind11 + +On Windows use the ``vcpkg`` package manager:: + + $ vcpkg install re2:x64-windows pybind11:x64-windows + +You can pass some cmake environment variables to alter the build type or +pass a toolchain file (the latter is required on Windows) or specify the +cmake generator. For example: + +:: + + $ CMAKE_GENERATOR="Unix Makefiles" CMAKE_TOOLCHAIN_FILE=clang_toolchain.cmake tox -e deploy + + +Platform-agnostic building with conda +------------------------------------- + +An alternative to the above is provided via the ``conda`` recipe (use the +`miniconda installer`_ if you don't have ``conda`` installed already). + + +.. _miniconda installer: https://docs.conda.io/en/latest/miniconda.html + + Backwards Compatibility ======================= diff --git a/cmake/modules/FindCython.cmake b/cmake/modules/FindCython.cmake new file mode 100644 index 00000000..04aed1f8 --- /dev/null +++ b/cmake/modules/FindCython.cmake @@ -0,0 +1,44 @@ +# Find the Cython compiler. +# +# This code sets the following variables: +# +# CYTHON_EXECUTABLE +# +# See also UseCython.cmake + +#============================================================================= +# Copyright 2011 Kitware, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +# Use the Cython executable that lives next to the Python executable +# if it is a local installation. +find_package( PythonInterp ) +if( PYTHONINTERP_FOUND ) + get_filename_component( _python_path ${PYTHON_EXECUTABLE} PATH ) + find_program( CYTHON_EXECUTABLE + NAMES cython cython.bat cython3 + HINTS ${_python_path} + ) +else() + find_program( CYTHON_EXECUTABLE + NAMES cython cython.bat cython3 + ) +endif() + + +include( FindPackageHandleStandardArgs ) +FIND_PACKAGE_HANDLE_STANDARD_ARGS( Cython REQUIRED_VARS CYTHON_EXECUTABLE ) + +mark_as_advanced( CYTHON_EXECUTABLE ) diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml new file mode 100644 index 00000000..30fc4fe0 --- /dev/null +++ b/conda.recipe/meta.yaml @@ -0,0 +1,49 @@ +{% set name = "pyre2" %} +{% set version = "0.3.2" %} + +package: + name: {{ name|lower }} + version: {{ version }} + +source: + path: .. + +build: + number: 0 + script: {{ PYTHON }} -m pip install . -vv + +requirements: + build: + - {{ compiler('cxx') }} + host: + - python + - cmake >=3.15 + - pybind11 + - ninja + - cython + - pip + - re2 + run: + - python + - re2 + +test: + imports: + - re2 + source_files: + - tests + commands: + - python -m unittest discover -f -s tests + +about: + home: "https://github.com/andreasvc/pyre2" + license: BSD-3-Clause + license_family: BSD + license_file: LICENSE + summary: "Python wrapper for Google's RE2 using Cython" + doc_url: "https://github.com/andreasvc/pyre2/blob/master/README.rst" + dev_url: "https://github.com/andreasvc/pyre2" + +extra: + recipe-maintainers: + - sarnold diff --git a/pyproject.toml b/pyproject.toml index 905d0f74..e2bdcc86 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,11 @@ [build-system] -requires = ["setuptools", "wheel", "Cython>=0.20"] -build-backend = "setuptools.build_meta:__legacy__" +requires = [ + "setuptools>=42", + "wheel", + "Cython", + "pybind11>=2.6.0", + "ninja; sys_platform != 'Windows'", + "cmake>=3.12", +] + +build-backend = "setuptools.build_meta" diff --git a/requirements-cibw.txt b/requirements-cibw.txt new file mode 100644 index 00000000..932364dd --- /dev/null +++ b/requirements-cibw.txt @@ -0,0 +1 @@ +cibuildwheel==1.7.4 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..8f10c26c --- /dev/null +++ b/setup.cfg @@ -0,0 +1,42 @@ +[metadata] +name = pyre2 +author = Andreas van Cranenburgh +author_email = andreas@unstable.nl +maintainer = Steve Arnold +maintainer_email = nerdboy@gentoo.org +description = Python wrapper for Google\'s RE2 using Cython +long_description = file: README.rst +long_description_content_type = text/x-rst; charset=UTF-8 +url = https://github.com/andreasvc/pyre2 +license = BSD +license_files = LICENSE +classifiers = + License :: OSI Approved :: BSD License + Programming Language :: Cython + Programming Language :: Python :: 3.6 + Intended Audience :: Developers + Topic :: Software Development :: Libraries :: Python Modules + +[options] +python_requires = >=3.6 + +zip_safe = False + +[options.extras_require] +test = + nose + +[nosetests] +verbosity = 3 +with-doctest = 1 +doctest-extension = txt +exe = 1 +#with-coverage = 1 +#cover-package = py_re2 +#cover-min-percentage = 90 +doctest-options = +ELLIPSIS,+NORMALIZE_WHITESPACE + +[flake8] +# these error codes interfere with Black +ignore = E203, E231, E501, W503, B950 +select = C,E,F,W,B,B9 diff --git a/setup.py b/setup.py index 6c8767d5..ae3daf18 100755 --- a/setup.py +++ b/setup.py @@ -1,129 +1,121 @@ -import io +# -*- coding: utf-8 -*- +# + import os -import re import sys -import platform -from distutils.core import setup, Extension, Command - -MINIMUM_CYTHON_VERSION = '0.20' -BASE_DIR = os.path.dirname(__file__) -PY2 = sys.version_info[0] == 2 -DEBUG = False - -class TestCommand(Command): - description = 'Run packaged tests' - user_options = [] - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - from tests import re2_test - re2_test.testall() - - -def majorminor(version): - return [int(x) for x in re.match(r'([0-9]+)\.([0-9]+)', version).groups()] - -cmdclass = {'test': TestCommand} - -ext_files = [] -if '--cython' in sys.argv or not os.path.exists('src/re2.cpp'): - # Using Cython - try: - sys.argv.remove('--cython') - except ValueError: - pass - from Cython.Compiler.Main import Version - if majorminor(MINIMUM_CYTHON_VERSION) >= majorminor(Version.version): - raise ValueError('Cython is version %s, but needs to be at least %s.' - % (Version.version, MINIMUM_CYTHON_VERSION)) - from Cython.Distutils import build_ext - from Cython.Build import cythonize - cmdclass['build_ext'] = build_ext - use_cython = True -else: - # Building from C - ext_files.append('src/re2.cpp') - use_cython = False - - -# Locate the re2 module -_re2_prefixes = ['/usr', '/usr/local', '/opt/', '/opt/local', os.environ['HOME'] + '/.local'] - -re2_prefix = '' -for a in _re2_prefixes: - if os.path.exists(os.path.join(a, 'include', 're2')): - re2_prefix = a - break - -def get_long_description(): - with io.open(os.path.join(BASE_DIR, 'README.rst'), encoding='utf8') as inp: - return inp.read() - -def get_authors(): - author_re = re.compile(r'^\s*(.*?)\s+<.*?\@.*?>', re.M) - authors_f = open(os.path.join(BASE_DIR, 'AUTHORS')) - authors = [match.group(1) for match in author_re.finditer(authors_f.read())] - authors_f.close() - return ', '.join(authors) - -def main(): - os.environ['GCC_COLORS'] = 'auto' - include_dirs = [os.path.join(re2_prefix, 'include')] if re2_prefix else [] - libraries = ['re2'] - library_dirs = [os.path.join(re2_prefix, 'lib')] if re2_prefix else [] - runtime_library_dirs = [os.path.join(re2_prefix, 'lib') - ] if re2_prefix else [] - extra_compile_args = ['-O0', '-g'] if DEBUG else [ - '-O3', '-march=native', '-DNDEBUG'] - # Older GCC version such as on CentOS 6 do not support C++11 - if not platform.python_compiler().startswith('GCC 4.4.7'): - extra_compile_args.append('-std=c++11') - ext_modules = [ - Extension( - 're2', - sources=['src/re2.pyx' if use_cython else 'src/re2.cpp'], - language='c++', - include_dirs=include_dirs, - libraries=libraries, - library_dirs=library_dirs, - runtime_library_dirs=runtime_library_dirs, - extra_compile_args=['-DPY2=%d' % PY2] + extra_compile_args, - extra_link_args=['-g'] if DEBUG else ['-DNDEBUG'], - )] - if use_cython: - ext_modules = cythonize( - ext_modules, - language_level=3, - annotate=True, - compiler_directives={ - 'embedsignature': True, - 'warn.unused': True, - 'warn.unreachable': True, - }) - setup( - name='pyre2', - version='0.3.2', - description='Python wrapper for Google\'s RE2 using Cython', - long_description=get_long_description(), - author=get_authors(), - license='New BSD License', - author_email='andreas@unstable.nl', - url='https://github.com/andreasvc/pyre2', - ext_modules = ext_modules, - cmdclass=cmdclass, - classifiers = [ - 'License :: OSI Approved :: BSD License', - 'Programming Language :: Cython', - 'Programming Language :: Python :: 2.6', - 'Programming Language :: Python :: 3.3', - 'Intended Audience :: Developers', - 'Topic :: Software Development :: Libraries :: Python Modules', - ], +import subprocess + +from setuptools import setup, Extension +from setuptools.command.build_ext import build_ext + + +# update the version both here and in conda.recipe/meta.yaml +__version__ = '0.3.2' + +# Convert distutils Windows platform specifiers to CMake -A arguments +PLAT_TO_CMAKE = { + "win32": "Win32", + "win-amd64": "x64", + "win-arm32": "ARM", + "win-arm64": "ARM64", +} + +# A CMakeExtension needs a sourcedir instead of a file list. +class CMakeExtension(Extension): + def __init__(self, name, sourcedir=""): + # auditwheel repair command needs libraries= + Extension.__init__(self, name, sources=[], libraries=['re2']) + self.sourcedir = os.path.abspath(sourcedir) + + +class CMakeBuild(build_ext): + + def build_extension(self, ext): + extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) + + # required for auto-detection of auxiliary "native" libs + if not extdir.endswith(os.path.sep): + extdir += os.path.sep + + # Set a sensible default build type for packaging + if "CMAKE_BUILD_OVERRIDE" not in os.environ: + cfg = "Debug" if self.debug else "RelWithDebInfo" + else: + cfg = os.environ.get("CMAKE_BUILD_OVERRIDE", "") + + # CMake lets you override the generator - we need to check this. + # Can be set with Conda-Build, for example. + cmake_generator = os.environ.get("CMAKE_GENERATOR", "") + + # Set Python_EXECUTABLE instead if you use PYBIND11_FINDPYTHON + # SCM_VERSION_INFO shows you how to pass a value into the C++ code + # from Python. + cmake_args = [ + "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={}".format(extdir), + "-DPYTHON_EXECUTABLE={}".format(sys.executable), + "-DSCM_VERSION_INFO={}".format(__version__), + "-DCMAKE_BUILD_TYPE={}".format(cfg), # not used on MSVC, but no harm + ] + build_args = ["--verbose"] + + # CMake also lets you provide a toolchain file. + # Can be set in CI build environments for example. + cmake_toolchain_file = os.environ.get("CMAKE_TOOLCHAIN_FILE", "") + if cmake_toolchain_file: + cmake_args += ["-DCMAKE_TOOLCHAIN_FILE={}".format(cmake_toolchain_file)] + + if self.compiler.compiler_type != "msvc": + # Using Ninja-build since it a) is available as a wheel and b) + # multithreads automatically. MSVC would require all variables be + # exported for Ninja to pick it up, which is a little tricky to do. + # Users can override the generator with CMAKE_GENERATOR in CMake + # 3.15+. + if not cmake_generator: + cmake_args += ["-GNinja"] + + else: + + # Single config generators are handled "normally" + single_config = any(x in cmake_generator for x in {"NMake", "Ninja"}) + + # CMake allows an arch-in-generator style for backward compatibility + contains_arch = any(x in cmake_generator for x in {"ARM", "Win64"}) + + # Specify the arch if using MSVC generator, but only if it doesn't + # contain a backward-compatibility arch spec already in the + # generator name. + if not single_config and not contains_arch: + cmake_args += ["-A", PLAT_TO_CMAKE[self.plat_name]] + + # Multi-config generators have a different way to specify configs + if not single_config: + cmake_args += [ + "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), extdir) + ] + build_args += ["--config", cfg] + + # Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level + # across all generators. + if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ: + # self.parallel is a Python 3 only way to set parallel jobs by hand + # using -j in the build_ext call, not supported by pip or PyPA-build. + if hasattr(self, "parallel") and self.parallel: + # CMake 3.12+ only. + build_args += ["-j{}".format(self.parallel)] + + if not os.path.exists(self.build_temp): + os.makedirs(self.build_temp) + + subprocess.check_call( + ["cmake", ext.sourcedir] + cmake_args, cwd=self.build_temp + ) + subprocess.check_call( + ["cmake", "--build", "."] + build_args, cwd=self.build_temp ) -if __name__ == '__main__': - main() + +setup( + version=__version__, + ext_modules=[CMakeExtension('re2')], + cmdclass={'build_ext': CMakeBuild}, +) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 00000000..82dc8231 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,69 @@ +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +find_package(Cython REQUIRED) + +set(cython_module re2) + +set(re2_include_dir "${PROJECT_SOURCE_DIR}/src") +set(cython_output "${CMAKE_CURRENT_SOURCE_DIR}/${cython_module}.cpp") +set(cython_src ${cython_module}.pyx) +# Track cython sources +file(GLOB cy_srcs *.pyx *.pxi *.h) + +# .pyx -> .cpp +add_custom_command(OUTPUT ${cython_output} + COMMAND ${CYTHON_EXECUTABLE} + -a -3 + --fast-fail + --cplus -I ${re2_include_dir} + --output-file ${cython_output} ${cython_src} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + DEPENDS ${cy_srcs} + COMMENT "Cythonizing extension ${cython_src}") + +add_library(${cython_module} MODULE ${cython_output}) + +set_target_properties(${cython_module} + PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}" + SUFFIX "${PYTHON_MODULE_EXTENSION}") + +target_include_directories(${cython_module} PUBLIC + ${PYTHON_INCLUDE_DIRS}) + +target_compile_definitions(${cython_module} PRIVATE PY2=0) +target_compile_definitions(${cython_module} PRIVATE VERSION_INFO=${SCM_VERSION_INFO}) + +# here we get to jump through some hoops to find libre2 on the manylinux +# docker CI images, etc +find_package(re2 CONFIG NAMES re2) + +if(re2_FOUND) + message(STATUS "System re2 found") + target_link_libraries(${cython_module} PRIVATE re2::re2) +elseif(NOT MSVC) + message(STATUS "Trying PkgConfig") + find_package(PkgConfig REQUIRED) + pkg_check_modules(RE2 IMPORTED_TARGET re2) + + if(RE2_FOUND) + include_directories(${RE2_INCLUDE_DIRS}) + target_link_libraries(${cython_module} PRIVATE PkgConfig::RE2) + else() + # last resort for manylinux: just try it + message(STATUS "Blindly groping instead") + link_directories("/usr/lib64" "/usr/lib") + target_link_libraries(${cython_module} PRIVATE "libre2.so") + endif() +endif() + +if(APPLE) + # macos/appleclang needs this + target_link_libraries(${cython_module} PRIVATE pybind11::module) + target_link_libraries(${cython_module} PRIVATE pybind11::python_link_helper) +endif() + +if(MSVC) + target_compile_options(${cython_module} PRIVATE /utf-8) + target_link_libraries(${cython_module} PRIVATE ${PYTHON_LIBRARIES}) + target_link_libraries(${cython_module} PRIVATE pybind11::windows_extras) +endif() From 0e4330fb7bf84c858c2f38446ee57534a57a1c96 Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Wed, 13 Jan 2021 14:58:05 -0800 Subject: [PATCH 044/114] chg: ci: update wheel builds for Linux, Macos, and Windows Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- .github/workflows/main.yml | 42 ++++++++++++++++++++++---------- .github/workflows/release.yml | 45 +++++++++++++++++++++++++---------- 2 files changed, 61 insertions(+), 26 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 217d62d7..0d7da3bd 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -6,11 +6,9 @@ jobs: build_wheels: name: Build wheels on ${{ matrix.os }} for Python runs-on: ${{ matrix.os }} - env: - MANYLINUX2010_X86_64_TAG: "2020-12-03-912b0de" strategy: matrix: - os: [ubuntu-20.04, macos-latest] + os: [ubuntu-20.04, macos-latest, windows-latest] steps: - uses: actions/checkout@v2 @@ -18,24 +16,42 @@ jobs: - uses: actions/setup-python@v2 name: Install Python with: - python-version: 3.7 + python-version: '3.8' - - name: Install cibuildwheel - run: | - python -m pip install cibuildwheel==1.7.1 - - - name: Install Visual C++ for Python 2.7 + - name: Prepare compiler environment for Windows if: runner.os == 'Windows' + uses: ilammy/msvc-dev-cmd@v1 + with: + arch: amd64 + + - name: Install cibuildwheel run: | - choco install vcpython27 -f -y + python -m pip install --upgrade pip + pip install -r requirements-cibw.txt - name: Build wheels env: + CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/manylinux2010_x86_64:latest + CIBW_MANYLINUX_I686_IMAGE: quay.io/pypa/manylinux2010_i686:latest + CIBW_BUILD: cp36-* cp37-* cp38-* cp39-* + CIBW_SKIP: "*-win32" CIBW_BEFORE_ALL_LINUX: > yum -y -q --enablerepo=extras install epel-release - && yum install -y re2-devel - CIBW_BEFORE_ALL_MACOS: brew install re2 - CIBW_BUILD: cp36-* cp37-* cp38-* + && yum install -y re2-devel ninja-build + && pip install . + CIBW_REPAIR_WHEEL_COMMAND_LINUX: "LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH auditwheel repair -w {dest_dir} {wheel}" + CIBW_BEFORE_ALL_MACOS: > + brew install -s re2 + && brew install pybind11 ninja + && pip install . + CIBW_ENVIRONMENT_MACOS: MACOSX_DEPLOYMENT_TARGET=10.09 + CIBW_REPAIR_WHEEL_COMMAND_MACOS: "DYLD_LIBRARY_PATH=/usr/local/Cellar/re2/20201101/lib:$DYLD_LIBRARY_PATH delocate-listdeps {wheel} && delocate-wheel -w {dest_dir} -v {wheel}" + CIBW_BEFORE_ALL_WINDOWS: > + vcpkg install re2:x64-windows + && vcpkg integrate install + && pip install . + CIBW_ENVIRONMENT_WINDOWS: 'CMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake' + CIBW_TEST_COMMAND: python -c "import re2" run: | python -m cibuildwheel --output-dir wheelhouse diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 41a5bdb5..09f3e404 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -10,12 +10,9 @@ jobs: create_wheels: name: Build wheels on ${{ matrix.os }} for Python runs-on: ${{ matrix.os }} - env: - MANYLINUX2010_X86_64_TAG: "2020-12-03-912b0de" strategy: - fail-fast: false matrix: - os: [ubuntu-20.04, macos-latest] + os: [ubuntu-20.04, macos-latest, windows-latest] steps: - uses: actions/checkout@v2 @@ -25,22 +22,43 @@ jobs: with: python-version: 3.7 - - name: Install cibuildwheel - run: | - python -m pip install cibuildwheel==1.7.1 - - name: Install Visual C++ for Python 2.7 + - name: Prepare compiler environment for Windows if: runner.os == 'Windows' + uses: ilammy/msvc-dev-cmd@v1 + with: + arch: amd64 + + - name: Install cibuildwheel run: | - choco install vcpython27 -f -y + python -m pip install --upgrade pip + pip install -r requirements-cibw.txt + - name: Build wheels env: + CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/manylinux2010_x86_64:latest + CIBW_MANYLINUX_I686_IMAGE: quay.io/pypa/manylinux2010_i686:latest + CIBW_BUILD: cp36-* cp37-* cp38-* cp39-* + CIBW_SKIP: "*-win32" CIBW_BEFORE_ALL_LINUX: > yum -y -q --enablerepo=extras install epel-release - && yum install -y re2-devel - CIBW_BEFORE_ALL_MACOS: brew install re2 - CIBW_BUILD: cp36-* cp37-* cp38-* + && yum install -y re2-devel ninja-build + && pip install . + CIBW_REPAIR_WHEEL_COMMAND_LINUX: "LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH auditwheel repair -w {dest_dir} {wheel}" + CIBW_BEFORE_ALL_MACOS: > + brew install -s re2 + && brew install pybind11 ninja + && pip install . + CIBW_ENVIRONMENT_MACOS: MACOSX_DEPLOYMENT_TARGET=10.09 + CIBW_REPAIR_WHEEL_COMMAND_MACOS: "DYLD_LIBRARY_PATH=/usr/local/Cellar/re2/20201101/lib:$DYLD_LIBRARY_PATH delocate-listdeps {wheel} && delocate-wheel -w {dest_dir} -v {wheel}" + CIBW_BEFORE_ALL_WINDOWS: > + vcpkg install re2:x64-windows + && vcpkg integrate install + && pip install . + CIBW_ENVIRONMENT_WINDOWS: 'CMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake' + CIBW_TEST_COMMAND: python -c "import re2" run: | python -m cibuildwheel --output-dir wheelhouse + - uses: actions/upload-artifact@v2 with: name: wheels @@ -91,4 +109,5 @@ jobs: body_path: CHANGES.md draft: false prerelease: false - files: ./pyre2*.whl + # uncomment below to upload wheels to github releases + # files: ./pyre2*.whl From f6f7ae8dae2b71797bfda6c5c7daa0b78fc01595 Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Thu, 14 Jan 2021 20:59:00 -0800 Subject: [PATCH 045/114] fix: pkg: update macos wheel repair, simplify wheel building Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- .github/workflows/main.yml | 14 ++++++-------- .github/workflows/release.yml | 18 ++++++++---------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 0d7da3bd..07174e1e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -12,6 +12,8 @@ jobs: steps: - uses: actions/checkout@v2 + with: + fetch-depth: 0 - uses: actions/setup-python@v2 name: Install Python @@ -27,7 +29,7 @@ jobs: - name: Install cibuildwheel run: | python -m pip install --upgrade pip - pip install -r requirements-cibw.txt + python -m pip install -r requirements-cibw.txt - name: Build wheels env: @@ -38,18 +40,14 @@ jobs: CIBW_BEFORE_ALL_LINUX: > yum -y -q --enablerepo=extras install epel-release && yum install -y re2-devel ninja-build - && pip install . - CIBW_REPAIR_WHEEL_COMMAND_LINUX: "LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH auditwheel repair -w {dest_dir} {wheel}" + CIBW_REPAIR_WHEEL_COMMAND_LINUX: "auditwheel show {wheel} && auditwheel repair -w {dest_dir} {wheel}" CIBW_BEFORE_ALL_MACOS: > - brew install -s re2 - && brew install pybind11 ninja - && pip install . + brew install re2 pybind11 ninja CIBW_ENVIRONMENT_MACOS: MACOSX_DEPLOYMENT_TARGET=10.09 - CIBW_REPAIR_WHEEL_COMMAND_MACOS: "DYLD_LIBRARY_PATH=/usr/local/Cellar/re2/20201101/lib:$DYLD_LIBRARY_PATH delocate-listdeps {wheel} && delocate-wheel -w {dest_dir} -v {wheel}" + CIBW_REPAIR_WHEEL_COMMAND_MACOS: "pip uninstall -y delocate && pip install git+https://github.com/Chia-Network/delocate.git && delocate-listdeps {wheel} && delocate-wheel -w {dest_dir} -v {wheel}" CIBW_BEFORE_ALL_WINDOWS: > vcpkg install re2:x64-windows && vcpkg integrate install - && pip install . CIBW_ENVIRONMENT_WINDOWS: 'CMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake' CIBW_TEST_COMMAND: python -c "import re2" run: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 09f3e404..0a2f5e35 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -16,11 +16,13 @@ jobs: steps: - uses: actions/checkout@v2 + with: + fetch-depth: 0 - uses: actions/setup-python@v2 name: Install Python with: - python-version: 3.7 + python-version: '3.8' - name: Prepare compiler environment for Windows if: runner.os == 'Windows' @@ -31,7 +33,7 @@ jobs: - name: Install cibuildwheel run: | python -m pip install --upgrade pip - pip install -r requirements-cibw.txt + python -m pip install -r requirements-cibw.txt - name: Build wheels env: @@ -42,18 +44,14 @@ jobs: CIBW_BEFORE_ALL_LINUX: > yum -y -q --enablerepo=extras install epel-release && yum install -y re2-devel ninja-build - && pip install . - CIBW_REPAIR_WHEEL_COMMAND_LINUX: "LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH auditwheel repair -w {dest_dir} {wheel}" + CIBW_REPAIR_WHEEL_COMMAND_LINUX: "auditwheel show {wheel} && auditwheel repair -w {dest_dir} {wheel}" CIBW_BEFORE_ALL_MACOS: > - brew install -s re2 - && brew install pybind11 ninja - && pip install . + brew install re2 pybind11 ninja CIBW_ENVIRONMENT_MACOS: MACOSX_DEPLOYMENT_TARGET=10.09 - CIBW_REPAIR_WHEEL_COMMAND_MACOS: "DYLD_LIBRARY_PATH=/usr/local/Cellar/re2/20201101/lib:$DYLD_LIBRARY_PATH delocate-listdeps {wheel} && delocate-wheel -w {dest_dir} -v {wheel}" + CIBW_REPAIR_WHEEL_COMMAND_MACOS: "pip uninstall -y delocate && pip install git+https://github.com/Chia-Network/delocate.git && delocate-listdeps {wheel} && delocate-wheel -w {dest_dir} -v {wheel}" CIBW_BEFORE_ALL_WINDOWS: > vcpkg install re2:x64-windows && vcpkg integrate install - && pip install . CIBW_ENVIRONMENT_WINDOWS: 'CMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake' CIBW_TEST_COMMAND: python -c "import re2" run: | @@ -107,7 +105,7 @@ jobs: tag_name: ${{ env.VERSION }} name: Release v${{ env.VERSION }} body_path: CHANGES.md - draft: false + draft: true prerelease: false # uncomment below to upload wheels to github releases # files: ./pyre2*.whl From 835ba9e1e48b6ea95fbf9440c756dd6cf59700d4 Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Thu, 14 Jan 2021 21:13:33 -0800 Subject: [PATCH 046/114] fix: ci: make sure wheel path is correct for uploading Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0a2f5e35..ae38d732 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -108,4 +108,4 @@ jobs: draft: true prerelease: false # uncomment below to upload wheels to github releases - # files: ./pyre2*.whl + # files: wheels/pyre2*.whl From b5869bd067f966c0ed702e6e25db3ba3a7270212 Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Sun, 17 Jan 2021 16:29:33 -0800 Subject: [PATCH 047/114] chg: doc: add .gitchangelog.rc and generated CHANGELOG.rst (keep HISTORY) Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- .gitchangelog.rc | 291 ++++++++++++++++++++++++++++++++++++++++++ CHANGELOG.rst | 183 ++++++++++++++++++++++++++ CHANGELIST => HISTORY | 0 3 files changed, 474 insertions(+) create mode 100644 .gitchangelog.rc create mode 100644 CHANGELOG.rst rename CHANGELIST => HISTORY (100%) diff --git a/.gitchangelog.rc b/.gitchangelog.rc new file mode 100644 index 00000000..c658c92a --- /dev/null +++ b/.gitchangelog.rc @@ -0,0 +1,291 @@ +# -*- coding: utf-8; mode: python -*- +## +## Format +## +## ACTION: [AUDIENCE:] COMMIT_MSG [!TAG ...] +## +## Description +## +## ACTION is one of 'chg', 'fix', 'new' +## +## Is WHAT the change is about. +## +## 'chg' is for refactor, small improvement, cosmetic changes... +## 'fix' is for bug fixes +## 'new' is for new features, big improvement +## +## AUDIENCE is optional and one of 'dev', 'usr', 'pkg', 'test', 'doc' +## +## Is WHO is concerned by the change. +## +## 'dev' is for developpers (API changes, refactors...) +## 'usr' is for final users (UI changes) +## 'pkg' is for packagers (packaging changes) +## 'test' is for testers (test only related changes) +## 'doc' is for doc guys (doc only changes) +## +## COMMIT_MSG is ... well ... the commit message itself. +## +## TAGs are additionnal adjective as 'refactor' 'minor' 'cosmetic' +## +## They are preceded with a '!' or a '@' (prefer the former, as the +## latter is wrongly interpreted in github.) Commonly used tags are: +## +## 'refactor' is obviously for refactoring code only +## 'minor' is for a very meaningless change (a typo, adding a comment) +## 'cosmetic' is for cosmetic driven change (re-indentation, 80-col...) +## 'wip' is for partial functionality but complete subfunctionality. +## +## Example: +## +## new: usr: support of bazaar implemented +## chg: re-indentend some lines !cosmetic +## new: dev: updated code to be compatible with last version of killer lib. +## fix: pkg: updated year of licence coverage. +## new: test: added a bunch of test around user usability of feature X. +## fix: typo in spelling my name in comment. !minor +## +## Please note that multi-line commit message are supported, and only the +## first line will be considered as the "summary" of the commit message. So +## tags, and other rules only applies to the summary. The body of the commit +## message will be displayed in the changelog without reformatting. + + +## +## ``ignore_regexps`` is a line of regexps +## +## Any commit having its full commit message matching any regexp listed here +## will be ignored and won't be reported in the changelog. +## +ignore_regexps = [ + r'@minor', r'!minor', + r'@cosmetic', r'!cosmetic', + r'@refactor', r'!refactor', + r'@wip', r'!wip', + r'^([cC]hg|[fF]ix|[nN]ew)\s*:\s*[p|P]kg:', + r'^([cC]hg|[fF]ix|[nN]ew)\s*:\s*[d|D]ev:', + r'^(.{3,3}\s*:)?\s*[fF]irst commit.?\s*$', + r'^$', ## ignore commits with empty messages +] + + +## ``section_regexps`` is a list of 2-tuples associating a string label and a +## list of regexp +## +## Commit messages will be classified in sections thanks to this. Section +## titles are the label, and a commit is classified under this section if any +## of the regexps associated is matching. +## +## Please note that ``section_regexps`` will only classify commits and won't +## make any changes to the contents. So you'll probably want to go check +## ``subject_process`` (or ``body_process``) to do some changes to the subject, +## whenever you are tweaking this variable. +## +section_regexps = [ + ('New', [ + r'^[nN]ew\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', + ]), + ('Changes', [ + r'^[cC]hg\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', + ]), + ('Fix', [ + r'^[fF]ix\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', + ]), + + ('Other', None ## Match all lines + ), + +] + + +## ``body_process`` is a callable +## +## This callable will be given the original body and result will +## be used in the changelog. +## +## Available constructs are: +## +## - any python callable that take one txt argument and return txt argument. +## +## - ReSub(pattern, replacement): will apply regexp substitution. +## +## - Indent(chars=" "): will indent the text with the prefix +## Please remember that template engines gets also to modify the text and +## will usually indent themselves the text if needed. +## +## - Wrap(regexp=r"\n\n"): re-wrap text in separate paragraph to fill 80-Columns +## +## - noop: do nothing +## +## - ucfirst: ensure the first letter is uppercase. +## (usually used in the ``subject_process`` pipeline) +## +## - final_dot: ensure text finishes with a dot +## (usually used in the ``subject_process`` pipeline) +## +## - strip: remove any spaces before or after the content of the string +## +## - SetIfEmpty(msg="No commit message."): will set the text to +## whatever given ``msg`` if the current text is empty. +## +## Additionally, you can `pipe` the provided filters, for instance: +#body_process = Wrap(regexp=r'\n(?=\w+\s*:)') | Indent(chars=" ") +#body_process = Wrap(regexp=r'\n(?=\w+\s*:)') +#body_process = noop +body_process = ReSub(r'((^|\n)[A-Z]\w+(-\w+)*: .*(\n\s+.*)*)+$', r'') | strip + + +## ``subject_process`` is a callable +## +## This callable will be given the original subject and result will +## be used in the changelog. +## +## Available constructs are those listed in ``body_process`` doc. +subject_process = (strip | + ReSub(r'^([cC]hg|[fF]ix|[nN]ew)\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n@]*)(@[a-z]+\s+)*$', r'\4') | + SetIfEmpty("No commit message.") | ucfirst | final_dot) + + +## ``tag_filter_regexp`` is a regexp +## +## Tags that will be used for the changelog must match this regexp. +## +tag_filter_regexp = r'^[0-9]+\.[0-9]+(\.[0-9]+)?$' + + +## ``unreleased_version_label`` is a string or a callable that outputs a string +## +## This label will be used as the changelog Title of the last set of changes +## between last valid tag and HEAD if any. +#unreleased_version_label = "(unreleased)" +unreleased_version_label = lambda: swrap( + ["git", "describe", "--tags"], +shell=False) + +## ``output_engine`` is a callable +## +## This will change the output format of the generated changelog file +## +## Available choices are: +## +## - rest_py +## +## Legacy pure python engine, outputs ReSTructured text. +## This is the default. +## +## - mustache(<template_name>) +## +## Template name could be any of the available templates in +## ``templates/mustache/*.tpl``. +## Requires python package ``pystache``. +## Examples: +## - mustache("markdown") +## - mustache("restructuredtext") +## +## - makotemplate(<template_name>) +## +## Template name could be any of the available templates in +## ``templates/mako/*.tpl``. +## Requires python package ``mako``. +## Examples: +## - makotemplate("restructuredtext") +## +output_engine = rest_py +#output_engine = mustache("restructuredtext") +#output_engine = mustache("markdown") +#output_engine = makotemplate("restructuredtext") + + +## ``include_merge`` is a boolean +## +## This option tells git-log whether to include merge commits in the log. +## The default is to include them. +include_merge = True + + +## ``log_encoding`` is a string identifier +## +## This option tells gitchangelog what encoding is outputed by ``git log``. +## The default is to be clever about it: it checks ``git config`` for +## ``i18n.logOutputEncoding``, and if not found will default to git's own +## default: ``utf-8``. +#log_encoding = 'utf-8' + + +## ``publish`` is a callable +## +## Sets what ``gitchangelog`` should do with the output generated by +## the output engine. ``publish`` is a callable taking one argument +## that is an interator on lines from the output engine. +## +## Some helper callable are provided: +## +## Available choices are: +## +## - stdout +## +## Outputs directly to standard output +## (This is the default) +## +## - FileInsertAtFirstRegexMatch(file, pattern, idx=lamda m: m.start()) +## +## Creates a callable that will parse given file for the given +## regex pattern and will insert the output in the file. +## ``idx`` is a callable that receive the matching object and +## must return a integer index point where to insert the +## the output in the file. Default is to return the position of +## the start of the matched string. +## +## - FileRegexSubst(file, pattern, replace, flags) +## +## Apply a replace inplace in the given file. Your regex pattern must +## take care of everything and might be more complex. Check the README +## for a complete copy-pastable example. +## +# publish = FileInsertIntoFirstRegexMatch( +# "CHANGELOG.rst", +# r'/(?P<rev>[0-9]+\.[0-9]+(\.[0-9]+)?)\s+\([0-9]+-[0-9]{2}-[0-9]{2}\)\n--+\n/', +# idx=lambda m: m.start(1) +# ) +#publish = stdout + + +## ``revs`` is a list of callable or a list of string +## +## callable will be called to resolve as strings and allow dynamical +## computation of these. The result will be used as revisions for +## gitchangelog (as if directly stated on the command line). This allows +## to filter exaclty which commits will be read by gitchangelog. +## +## To get a full documentation on the format of these strings, please +## refer to the ``git rev-list`` arguments. There are many examples. +## +## Using callables is especially useful, for instance, if you +## are using gitchangelog to generate incrementally your changelog. +## +## Some helpers are provided, you can use them:: +## +## - FileFirstRegexMatch(file, pattern): will return a callable that will +## return the first string match for the given pattern in the given file. +## If you use named sub-patterns in your regex pattern, it'll output only +## the string matching the regex pattern named "rev". +## +## - Caret(rev): will return the rev prefixed by a "^", which is a +## way to remove the given revision and all its ancestor. +## +## Please note that if you provide a rev-list on the command line, it'll +## replace this value (which will then be ignored). +## +## If empty, then ``gitchangelog`` will act as it had to generate a full +## changelog. +## +## The default is to use all commits to make the changelog. +#revs = ["^1.0.3", ] +#revs = [ +# Caret( +# FileFirstRegexMatch( +# "CHANGELOG.rst", +# r"(?P<rev>[0-9]+\.[0-9]+(\.[0-9]+)?)\s+\([0-9]+-[0-9]{2}-[0-9]{2}\)\n--+\n")), +# "HEAD" +#] +revs = [] diff --git a/CHANGELOG.rst b/CHANGELOG.rst new file mode 100644 index 00000000..1b8a8c3b --- /dev/null +++ b/CHANGELOG.rst @@ -0,0 +1,183 @@ +v0.3.2 (2020-12-16) +------------------- +- Bump version. [Andreas van Cranenburgh] +- Merge pull request #18 from freepn/github-ci. [Andreas van Cranenburgh] + + workaroud for manylinux dependency install error plus release automation + + +v0.3.1 (2020-10-27) +------------------- +- Bump version. [Andreas van Cranenburgh] +- Change package name for pypi. [Andreas van Cranenburgh] + + +v0.3 (2020-10-27) +----------------- +- Bump version. [Andreas van Cranenburgh] +- Merge pull request #14 from yoav-orca/master. [Andreas van Cranenburgh] + + Support building wheels automatically using github actions +- Creating github actions for building wheels. [Yoav Alon] +- Created pyproject.toml. [Yoav Alon] + + Poetry and other modern build system need to know which build-tools to + install prior to calling setup.py. added a pyproject.toml to specify + cython as a dependency. +- Add contains() method. [Andreas van Cranenburgh] + + - contains() works like match() but returns a bool to avoid creating a + Match object. see #12 + - add wrapper for re.Pattern so that contains() and count() methods are + also available when falling back to re. +- Disable dubious tests. [Andreas van Cranenburgh] + + - All tests pass. + - Don't test for exotic/deprecated stuff such as non-initial flags in + patterns and octal escapes without leading 0 or triple digits. + - Known corner cases no longer reported as failed tests. + - support \b inside character class to mean backspace + - use re.error instead of defining subclass RegexError; ensures that + exceptions can be caught both in re2 and in a potential fallback to re. +- Disable failing test for known corner case. [Andreas van Cranenburgh] +- Remove tests with re.LOCALE flag since it is not allowed with str in + Python 3.6+ [Andreas van Cranenburgh] +- Decode named groups even with bytes patterns; fixes #6. [Andreas van + Cranenburgh] +- Make -std=c++11 the default; fixes #4. [Andreas van Cranenburgh] +- Merge pull request #5 from mayk93/master. [Andreas van Cranenburgh] + + Adding c++ 11 compile flag on Ubuntu +- Adding c++ 11 compile flag on Ubuntu. [Michael] +- Merge pull request #3 from podhmo/macports. [Andreas van Cranenburgh] + + macports support +- Macports support. [podhmo] +- Use STL map for unicodeindices. [Andreas van Cranenburgh] +- Only translate unicode indices when needed. [Andreas van Cranenburgh] +- Update README. [Andreas van Cranenburgh] +- Add -std=c++11 only for clang, because gcc on CentOS 6 does not + support it. [Andreas van Cranenburgh] +- Disable non-matched group tests; irrelevant after dad49cd. [Andreas + van Cranenburgh] +- Merge pull request #2 from messense/master. [Andreas van Cranenburgh] + + Fix groupdict decode bug +- Fix groupdict decode bug. [messense] +- Merge pull request #1 from pvaneynd/master. [Andreas van Cranenburgh] + + Ignore non-matched groups when replacing with sub +- Ignore non-matched groups when replacing with sub. [Peter Van Eynde] + + From 3.5 onwards sub() and subn() now replace unmatched groups with + empty strings. See: + + https://docs.python.org/3/whatsnew/3.5.html#re + + This change removes the 'unmatched group' error which occurs when using + re2. +- Fix setup.py unicode error. [Andreas van Cranenburgh] +- Add C++11 param; update URL. [Andreas van Cranenburgh] +- Fix bugs; ensure memory is released; simplify C++ interfacing; + [Andreas van Cranenburgh] + + - Fix bug causing zero-length matches to be returned multiple times + - Use Latin 1 encoding with RE2 when unicode not requested + - Ensure memory is released: + - put del calls in finally blocks + - add missing del call for 'matches' array + - Remove Cython hacks for C++ that are no longer needed; + use const keyword that has been supported for some time. + Fixes Cython 0.24 compilation issue. + - Turn _re2.pxd into includes.pxi. + - remove some tests that are specific to internal Python modules _sre and sre +- Fix Match repr. [Andreas van Cranenburgh] +- Add tests for bug with \\b. [Andreas van Cranenburgh] +- Document support syntax &c. [Andreas van Cranenburgh] + + - add reference of supported syntax to main docstring + - add __all__ attribute defining public members + - add re's purge() function + - add tests for count method + - switch order of prepare_pattern() and _compile() + - rename prepare_pattern() to _prepare_pattern() to signal that it is + semi-private +- Add count method. [Andreas van Cranenburgh] + + - add count method, equivalent to len(findall(...)) + - use arrays in utf8indices + - tweak docstrings +- Move functions around. [Andreas van Cranenburgh] +- Improve substitutions, Python 3 compatibility. [Andreas van + Cranenburgh] + + - when running under Python 3+, reject unicode patterns on + bytes data, and vice versa, in according with general Python 3 behavior. + - improve Match.expand() implementation. + - The substitutions by RE2 behave differently from Python (character escapes, + named groups, etc.), so use Match.expand() for anything but simple literal + replacement strings. + - make groupindex of pattern objects public. + - add Pattern.fullmatch() method. + - use #define PY2 from setup.py instead of #ifdef hack. + - debug option for compilation. + - use data() instead of c_str() on C++ strings, and always supply length, + so that strings with null characters are supported. + - bump minimum cython version due to use of bytearray typing + - adapt tests to Python 3; add b and u string prefixes where needed, &c. + - update README +- Add flags parameter to toplevel functions. [Andreas van Cranenburgh] +- Update performance table / missing features. [Andreas van Cranenburgh] +- Workaround for sub(...) with count > 1. [Andreas van Cranenburgh] +- Handle named groups in replacement string; &c. [Andreas van + Cranenburgh] + + - handle named groups in replacement string + - store index of named groups in Pattern object instead of Match object. + - use bytearray for result in _subn_callback +- Pickle Patterns; non-char buffers; &c. [Andreas van Cranenburgh] + + - support pickling of Pattern objects + - support buffers from objects that do not support char buffer (e.g., + integer arrays); does not make a lot of sense, but this is what re does. + - enable benchmarks shown in readme by default; fix typo. + - fix typo in test_re.py +- New buffer API; precompute groups/spans; &c. [Andreas van Cranenburgh] + + - use new buffer API + NB: even though the old buffer interface is deprecated from Python 2.6, + the new buffer interface is only supported on mmap starting from + Python 3. + - avoid creating Match objects in findall() + - precompute groups and spans of Match objects, so that possibly encoded + version of search string (bytestr / cstring) does not need to be kept. + - in _make_spans(), keep state for converting utf8 to unicode indices; + so that there is no quadratic behavior on repeated invocations for + different Match objects. + - release GIL in pattern_Replace / pattern_GlobalReplace + - prepare_pattern: loop over pattern as char * + - advertise Python 3 support in setup.py, remove python 2.5 +- Properly translate pos, endpos indices with unicode, &c. [Andreas van + Cranenburgh] + + - properly translate pos, endpos indices with unicode + - keep original unicode string in Match objects + - separate compile.pxi file +- Re-organize code. [Andreas van Cranenburgh] +- Minor changes. [Andreas van Cranenburgh] +- Python 2/3 compatibility, support buffer objects, &c. [Andreas van + Cranenburgh] + + - Python 2/3 compatibility + - support searching in buffer objects (e.g., mmap) + - add module docstring + - some refactoring + - remove outdated Cython-generated file + - modify setup.py to cythonize as needed. +- Implement finditer as generator. [Andreas van Cranenburgh] +- Merge pull request #31 from sunu/master. [Michael Axiak] + + Add Python 3 support. +- Add Python 3 support. [Tarashish Mishra] +- Version bump. [Michael Axiak] + diff --git a/CHANGELIST b/HISTORY similarity index 100% rename from CHANGELIST rename to HISTORY From 3efb3d826289564f1863a6e57ef6d07c20b4e565 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Tue, 26 Jan 2021 21:57:59 +0100 Subject: [PATCH 048/114] update README.rst. fixes #21 --- README.rst | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 403b4ce9..e9167320 100644 --- a/README.rst +++ b/README.rst @@ -16,8 +16,15 @@ Intended as a drop-in replacement for ``re``. Unicode is supported by encoding to UTF-8, and bytes strings are treated as UTF-8 when the UNICODE flag is given. For best performance, work with UTF-8 encoded bytes strings. -Platform Dependencies -===================== +Installation +============ + +Normal usage for Linux/Mac/Windows:: + + $ pip install pyre2 + +Compiling from source +--------------------- Requirements for building the C++ extension from the repo source: From d13052c0142812b445aadb5aacf48661955b9e0d Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Tue, 26 Jan 2021 21:59:44 +0100 Subject: [PATCH 049/114] bump version --- conda.recipe/meta.yaml | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml index 30fc4fe0..7adae312 100644 --- a/conda.recipe/meta.yaml +++ b/conda.recipe/meta.yaml @@ -1,5 +1,5 @@ {% set name = "pyre2" %} -{% set version = "0.3.2" %} +{% set version = "0.3.3" %} package: name: {{ name|lower }} diff --git a/setup.py b/setup.py index ae3daf18..622fb3eb 100755 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ # update the version both here and in conda.recipe/meta.yaml -__version__ = '0.3.2' +__version__ = '0.3.3' # Convert distutils Windows platform specifiers to CMake -A arguments PLAT_TO_CMAKE = { From b1631efd73900813995b009aa5e4cca2003fa647 Mon Sep 17 00:00:00 2001 From: Steve Arnold <nerdboy@gentoo.org> Date: Tue, 2 Feb 2021 09:38:54 -0800 Subject: [PATCH 050/114] add missing tests to sdist package, update readme and ci worflows (#1) readme: update badges, merge install sections, fix some rendering issues --- .github/workflows/conda.yml | 3 +- .github/workflows/main.yml | 7 ++- .github/workflows/release.yml | 20 +++---- MANIFEST.in | 2 + README.rst | 109 ++++++++++++++++++++++++---------- conda.recipe/meta.yaml | 9 ++- setup.py | 4 +- 7 files changed, 104 insertions(+), 50 deletions(-) diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index b2e95ef9..b93ef097 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -1,7 +1,8 @@ -name: conda +name: Conda on: workflow_dispatch: + pull_request: push: branches: - master diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 07174e1e..c05086a2 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,6 +1,11 @@ name: Build -on: [push, pull_request] +on: + workflow_dispatch: + pull_request: + push: + branches: + - master jobs: build_wheels: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index ae38d732..d4dac805 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -7,10 +7,10 @@ on: - '*' jobs: - create_wheels: - name: Build wheels on ${{ matrix.os }} for Python + cibw_wheels: runs-on: ${{ matrix.os }} strategy: + fail-fast: false matrix: os: [ubuntu-20.04, macos-latest, windows-latest] @@ -59,12 +59,11 @@ jobs: - uses: actions/upload-artifact@v2 with: - name: wheels + name: cibw-wheels path: ./wheelhouse/*.whl create_release: - name: Create Release - needs: [create_wheels] + needs: [cibw_wheels] runs-on: ubuntu-20.04 steps: @@ -83,18 +82,17 @@ jobs: with: python-version: 3.7 + # download all artifacts to project dir - uses: actions/download-artifact@v2 - with: - name: wheels - name: Install gitchangelog run: | - pip install git+https://github.com/freepn/gitchangelog@3.0.4-4 + pip install git+https://github.com/freepn/gitchangelog@3.0.5#egg=gitchangelog - name: Generate changes file run: | - bash -c 'cat $(get-rcpath) > .gitchangelog.rc' - bash -c 'gitchangelog $(git tag -l | tail -n2 | head -n1)..${{ env.VERSION }} > CHANGES.md' + bash -c 'export GITCHANGELOG_CONFIG_FILENAME=$(get-rcpath); \ + gitchangelog $(git tag --sort=taggerdate | tail -n2 | head -n1)..${{ env.VERSION }} > CHANGES.md' - name: Create draft release id: create_release @@ -106,6 +104,6 @@ jobs: name: Release v${{ env.VERSION }} body_path: CHANGES.md draft: true - prerelease: false + prerelease: true # uncomment below to upload wheels to github releases # files: wheels/pyre2*.whl diff --git a/MANIFEST.in b/MANIFEST.in index 770f6f68..43d49061 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,8 @@ global-include CMakeLists.txt *.cmake include AUTHORS README.rst HISTORY CHANGELOG.rst LICENSE graft src +graft tests +recursive-exclude tests *.gz recursive-exclude .tox * recursive-exclude .github * recursive-exclude vcpkg * diff --git a/README.rst b/README.rst index e9167320..ae86559e 100644 --- a/README.rst +++ b/README.rst @@ -2,7 +2,61 @@ pyre2 ===== -.. contents:: +**Python wrapper for RE2** + +CI Status + +.. image:: https://github.com/andreasvc/pyre2/workflows/Build/badge.svg + :target: https://github.com/freepn/andreasvc/pyre2/actions?query=workflow:Build + :alt: Build CI Status + +.. image:: https://github.com/andreasvc/pyre2/workflows/Conda/badge.svg + :target: https://github.com/freepn/andreasvc/pyre2/actions?query=workflow:Conda + :alt: Conda CI Status + +.. image:: https://github.com/andreasvc/pyre2/workflows/Release/badge.svg + :target: https://github.com/freepn/andreasvc/pyre2/actions?query=workflow:Release + :alt: Release CI Status + +.. image:: https://img.shields.io/github/v/tag/andreasvc/pyre2?color=green&include_prereleases&label=latest%20release + :target: https://github.com/andreasvc/pyre2/releases + :alt: GitHub tag (latest SemVer, including pre-release) + + +Packaging + +.. image:: https://badge.fury.io/py/pyre2.svg + :target: https://badge.fury.io/py/pyre2 + :alt: Pypi version + +.. image:: https://img.shields.io/github/license/andreasvc/pyre2 + :target: https://github.com/andreasvc/pyre2/blob/master/LICENSE + :alt: License + +.. image:: https://img.shields.io/badge/python-3.6+-blue.svg + :target: https://www.python.org/downloads/ + :alt: Python version + + +Anaconda cloud + +.. image:: https://anaconda.org/conda-forge/pyre2/badges/version.svg + :target: https://anaconda.org/conda-forge/pyre2 + :alt: version + +.. image:: https://anaconda.org/conda-forge/pyre2/badges/platforms.svg + :target: https://anaconda.org/conda-forge/pyre2 + :alt: platforms + +.. image:: https://anaconda.org/conda-forge/pyre2/badges/downloads.svg + :target: https://anaconda.org/conda-forge/pyre2 + :alt: downloads + + +.. contents:: Table of Contents + :depth: 2 + :backlinks: top + Summary ======= @@ -28,12 +82,14 @@ Compiling from source Requirements for building the C++ extension from the repo source: -* Building requires RE2, pybind11, and cmake installed in the build +* A build environment with ``gcc`` or ``clang`` (e.g. ``sudo apt-get install build-essential``) +* Build tools and libraries: RE2, pybind11, and cmake installed in the build environment. + On Ubuntu/Debian, install cmake, pybind11-dev, and libre2-dev packages + (also install Python development headers if needed, e.g. ``sudo apt-get install python-dev``) + On Gentoo, install dev-util/cmake, dev-python/pybind11, and dev-libs/re2 - + For a venv you can install the pybind11 and cython packages from PyPI + + For a venv you can install the pybind11, cmake, and cython packages from PyPI On MacOS, use the ``brew`` package manager:: @@ -51,14 +107,25 @@ cmake generator. For example: $ CMAKE_GENERATOR="Unix Makefiles" CMAKE_TOOLCHAIN_FILE=clang_toolchain.cmake tox -e deploy +After the prerequisites are installed, install as follows:: + + $ pip install https://github.com/andreasvc/pyre2/archive/master.zip + +For development, get the source:: + + $ git clone git://github.com/andreasvc/pyre2.git + $ cd pyre2 + $ make install + Platform-agnostic building with conda ------------------------------------- -An alternative to the above is provided via the ``conda`` recipe (use the +An alternative to the above is provided via the `conda`_ recipe (use the `miniconda installer`_ if you don't have ``conda`` installed already). +.. _conda: https://anaconda.org/conda-forge/pyre2 .. _miniconda installer: https://docs.conda.io/en/latest/miniconda.html @@ -73,11 +140,11 @@ The stated goal of this module is to be a drop-in replacement for ``re``, i.e.:: import re That being said, there are features of the ``re`` module that this module may -never have; these will be handled through fallback to the original ``re`` module``: +never have; these will be handled through fallback to the original ``re`` module: - - lookahead assertions ``(?!...)`` - - backreferences (``\\n`` in search pattern) - - \W and \S not supported inside character classes +* lookahead assertions ``(?!...)`` +* backreferences (``\\n`` in search pattern) +* \W and \S not supported inside character classes On the other hand, unicode character classes are supported (e.g., ``\p{Greek}``). Syntax reference: https://github.com/google/re2/wiki/Syntax @@ -96,28 +163,6 @@ function ``set_fallback_notification`` determines the behavior in these cases:: ``re.FALLBACK_QUIETLY`` (default), ``re.FALLBACK_WARNING`` (raise a warning), and ``re.FALLBACK_EXCEPTION`` (raise an exception). -Installation -============ - -Prerequisites: - -* The `re2 library from Google <https://github.com/google/re2>`_ -* The Python development headers (e.g. ``sudo apt-get install python-dev``) -* A build environment with ``gcc`` or ``clang`` (e.g. ``sudo apt-get install build-essential``) -* Cython 0.20+ (``pip install cython``) - -After the prerequisites are installed, install as follows (``pip3`` for python3):: - - $ pip install https://github.com/andreasvc/pyre2/archive/master.zip - -For development, get the source:: - - $ git clone git://github.com/andreasvc/pyre2.git - $ cd pyre2 - $ make install - -(or ``make install3`` for Python 3) - Documentation ============= @@ -194,8 +239,8 @@ The tests show the following differences with Python's ``re`` module: * The ``$`` operator in Python's ``re`` matches twice if the string ends with ``\n``. This can be simulated using ``\n?$``, except when doing substitutions. -* ``pyre2`` and Python's ``re`` may behave differently with nested groups. - See ``tests/emptygroups.txt`` for the examples. +* The ``pyre2`` module and Python's ``re`` may behave differently with nested groups. + See ``tests/emptygroups.txt`` for the examples. Please report any further issues with ``pyre2``. diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml index 7adae312..0245b6a4 100644 --- a/conda.recipe/meta.yaml +++ b/conda.recipe/meta.yaml @@ -1,5 +1,5 @@ {% set name = "pyre2" %} -{% set version = "0.3.3" %} +{% set version = "0.3.4.dev0" %} package: name: {{ name|lower }} @@ -11,6 +11,7 @@ source: build: number: 0 script: {{ PYTHON }} -m pip install . -vv + skip: true # [py<36] requirements: build: @@ -28,12 +29,14 @@ requirements: - re2 test: + commands: + - export "PYTHONIOENCODING=utf8" # [unix] + - set "PYTHONIOENCODING=utf8" # [win] + - python -m unittest discover -f -s tests imports: - re2 source_files: - tests - commands: - - python -m unittest discover -f -s tests about: home: "https://github.com/andreasvc/pyre2" diff --git a/setup.py b/setup.py index 622fb3eb..5ad5bbe1 100755 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ # update the version both here and in conda.recipe/meta.yaml -__version__ = '0.3.3' +__version__ = '0.3.4.dev0' # Convert distutils Windows platform specifiers to CMake -A arguments PLAT_TO_CMAKE = { @@ -23,7 +23,6 @@ # A CMakeExtension needs a sourcedir instead of a file list. class CMakeExtension(Extension): def __init__(self, name, sourcedir=""): - # auditwheel repair command needs libraries= Extension.__init__(self, name, sources=[], libraries=['re2']) self.sourcedir = os.path.abspath(sourcedir) @@ -118,4 +117,5 @@ def build_extension(self, ext): version=__version__, ext_modules=[CMakeExtension('re2')], cmdclass={'build_ext': CMakeBuild}, + zip_safe=False, ) From 6356912024f1a068e9d93737423f0c45f46293b0 Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Tue, 2 Feb 2021 10:17:30 -0800 Subject: [PATCH 051/114] update changelog (and trigger ci rebuild) Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- CHANGELOG.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 1b8a8c3b..0e1cb859 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,16 @@ +v0.3.3 (2021-01-26) +------------------- + +- Bump version. [Andreas van Cranenburgh] +- Update README.rst. fixes #21. [Andreas van Cranenburgh] +- Merge pull request #20 from freepn/new-bld. [Andreas van Cranenburgh] + + New cmake and pybind11 build setup +- Add .gitchangelog.rc and generated CHANGELOG.rst (keep HISTORY) + [Stephen L Arnold] +- Update wheel builds for Linux, Macos, and Windows. [Stephen L Arnold] + + v0.3.2 (2020-12-16) ------------------- - Bump version. [Andreas van Cranenburgh] From d167fe0f871cbbe7051159e7411977f535e47170 Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Fri, 4 Dec 2020 20:47:58 -0800 Subject: [PATCH 052/114] fix pickle_test (tests.test_re.ReTests) ... ERROR (run tests with nose) Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- tests/test_re.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/tests/test_re.py b/tests/test_re.py index 9d381d38..d8f04d36 100644 --- a/tests/test_re.py +++ b/tests/test_re.py @@ -442,19 +442,21 @@ def test_re_escape(self): def test_pickling(self): import pickle - self.pickle_test(pickle) + + def pickle_test(pickle): + oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)') + s = pickle.dumps(oldpat) + newpat = pickle.loads(s) + self.assertEqual(oldpat, newpat) + + pickle_test(pickle) + try: import cPickle as pickle except ImportError: pass else: - self.pickle_test(pickle) - - def pickle_test(self, pickle): - oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)') - s = pickle.dumps(oldpat) - newpat = pickle.loads(s) - self.assertEqual(oldpat, newpat) + pickle_test(pickle) def test_constants(self): self.assertEqual(re.I, re.IGNORECASE) @@ -685,9 +687,13 @@ def test_dealloc(self): def run_re_tests(): - from re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR + try: + from tests.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR + except ImportError: + from re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR + if verbose: - print('Running re_tests test suite') + print('\nRunning re_tests test suite') else: # To save time, only run the first and last 10 tests #tests = tests[:10] + tests[-10:] @@ -802,9 +808,6 @@ def run_re_tests(): if result is None: print('=== Fails on unicode-sensitive match', t) -def test_main(): - run_unittest(ReTests) - run_re_tests() if __name__ == "__main__": - test_main() + unittest.main() From d8c3500ca728723a9fc1f01530d20f38c0f1ddbf Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Tue, 2 Feb 2021 12:49:17 -0800 Subject: [PATCH 053/114] fix: pkg: add simplejson to test deps, remove excelude for wikidata blob Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- MANIFEST.in | 1 - setup.cfg | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index 43d49061..dc0679de 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,7 +2,6 @@ global-include CMakeLists.txt *.cmake include AUTHORS README.rst HISTORY CHANGELOG.rst LICENSE graft src graft tests -recursive-exclude tests *.gz recursive-exclude .tox * recursive-exclude .github * recursive-exclude vcpkg * diff --git a/setup.cfg b/setup.cfg index 8f10c26c..b9a889d6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,6 +25,7 @@ zip_safe = False [options.extras_require] test = nose + simplejson [nosetests] verbosity = 3 From 44b702b6207c96d5445dbd20bf6bd87a89f2be9d Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Fri, 5 Feb 2021 22:43:36 +0100 Subject: [PATCH 054/114] update README, fix Makefile --- Makefile | 8 ++++---- README | 1 - README.rst | 13 +++---------- 3 files changed, 7 insertions(+), 15 deletions(-) delete mode 120000 README diff --git a/Makefile b/Makefile index 8aa13914..16484df3 100644 --- a/Makefile +++ b/Makefile @@ -1,12 +1,12 @@ install: - python setup.py install --user --cython + python setup.py install --user test: install (cd tests && python re2_test.py) (cd tests && python test_re.py) install3: - python3 setup.py install --user --cython + python3 setup.py install --user test3: install3 (cd tests && python3 re2_test.py) @@ -19,13 +19,13 @@ clean: rm -rf src/re2.cpp &>/dev/null valgrind: - python3.5-dbg setup.py install --user --cython && \ + python3.5-dbg setup.py install --user && \ (cd tests && valgrind --tool=memcheck --suppressions=../valgrind-python.supp \ --leak-check=full --show-leak-kinds=definite \ python3.5-dbg test_re.py) valgrind2: - python3.5-dbg setup.py install --user --cython && \ + python3.5-dbg setup.py install --user && \ (cd tests && valgrind --tool=memcheck --suppressions=../valgrind-python.supp \ --leak-check=full --show-leak-kinds=definite \ python3.5-dbg re2_test.py) diff --git a/README b/README deleted file mode 120000 index 92cacd28..00000000 --- a/README +++ /dev/null @@ -1 +0,0 @@ -README.rst \ No newline at end of file diff --git a/README.rst b/README.rst index ae86559e..de23d019 100644 --- a/README.rst +++ b/README.rst @@ -86,8 +86,7 @@ Requirements for building the C++ extension from the repo source: * Build tools and libraries: RE2, pybind11, and cmake installed in the build environment. - + On Ubuntu/Debian, install cmake, pybind11-dev, and libre2-dev packages - (also install Python development headers if needed, e.g. ``sudo apt-get install python-dev``) + + On Ubuntu/Debian: ``sudo apt-get install build-essential cmake ninja-build python3-dev cython3 pybind11-dev libre2-dev`` + On Gentoo, install dev-util/cmake, dev-python/pybind11, and dev-libs/re2 + For a venv you can install the pybind11, cmake, and cython packages from PyPI @@ -101,16 +100,10 @@ On Windows use the ``vcpkg`` package manager:: You can pass some cmake environment variables to alter the build type or pass a toolchain file (the latter is required on Windows) or specify the -cmake generator. For example: - -:: +cmake generator. For example:: $ CMAKE_GENERATOR="Unix Makefiles" CMAKE_TOOLCHAIN_FILE=clang_toolchain.cmake tox -e deploy -After the prerequisites are installed, install as follows:: - - $ pip install https://github.com/andreasvc/pyre2/archive/master.zip - For development, get the source:: $ git clone git://github.com/andreasvc/pyre2.git @@ -166,7 +159,7 @@ and ``re.FALLBACK_EXCEPTION`` (raise an exception). Documentation ============= -Consult the docstring in the source code or interactively +Consult the docstrings in the source code or interactively through ipython or ``pydoc re2`` etc. Unicode Support From f16303351ec20f45a3fcc3e24352aefe8f38c87c Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Fri, 5 Feb 2021 22:53:07 +0100 Subject: [PATCH 055/114] Makefile: default to Python 3 --- Makefile | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 16484df3..e7293ece 100644 --- a/Makefile +++ b/Makefile @@ -1,17 +1,17 @@ install: - python setup.py install --user - -test: install - (cd tests && python re2_test.py) - (cd tests && python test_re.py) - -install3: python3 setup.py install --user -test3: install3 +test: install (cd tests && python3 re2_test.py) (cd tests && python3 test_re.py) +install2: + python2 setup.py install --user + +test2: install2 + (cd tests && python2 re2_test.py) + (cd tests && python2 test_re.py) + clean: rm -rf build &>/dev/null rm -rf src/*.so src/*.html &>/dev/null From 98db9d4e8180952337f3789c57d509d7a7500d65 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Fri, 5 Feb 2021 22:53:51 +0100 Subject: [PATCH 056/114] tweak order of badges --- README.rst | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/README.rst b/README.rst index de23d019..0a8707ea 100644 --- a/README.rst +++ b/README.rst @@ -1,34 +1,27 @@ ===== -pyre2 +pyre2: Python RE2 wrapper for linear-time regular expressions ===== -**Python wrapper for RE2** - -CI Status - .. image:: https://github.com/andreasvc/pyre2/workflows/Build/badge.svg - :target: https://github.com/freepn/andreasvc/pyre2/actions?query=workflow:Build + :target: https://github.com/andreasvc/pyre2/actions?query=workflow:Build :alt: Build CI Status -.. image:: https://github.com/andreasvc/pyre2/workflows/Conda/badge.svg - :target: https://github.com/freepn/andreasvc/pyre2/actions?query=workflow:Conda - :alt: Conda CI Status - .. image:: https://github.com/andreasvc/pyre2/workflows/Release/badge.svg - :target: https://github.com/freepn/andreasvc/pyre2/actions?query=workflow:Release + :target: https://github.com/andreasvc/pyre2/actions?query=workflow:Release :alt: Release CI Status .. image:: https://img.shields.io/github/v/tag/andreasvc/pyre2?color=green&include_prereleases&label=latest%20release :target: https://github.com/andreasvc/pyre2/releases :alt: GitHub tag (latest SemVer, including pre-release) - -Packaging - .. image:: https://badge.fury.io/py/pyre2.svg :target: https://badge.fury.io/py/pyre2 :alt: Pypi version +.. image:: https://github.com/andreasvc/pyre2/workflows/Conda/badge.svg + :target: https://github.com/andreasvc/pyre2/actions?query=workflow:Conda + :alt: Conda CI Status + .. image:: https://img.shields.io/github/license/andreasvc/pyre2 :target: https://github.com/andreasvc/pyre2/blob/master/LICENSE :alt: License @@ -37,9 +30,6 @@ Packaging :target: https://www.python.org/downloads/ :alt: Python version - -Anaconda cloud - .. image:: https://anaconda.org/conda-forge/pyre2/badges/version.svg :target: https://anaconda.org/conda-forge/pyre2 :alt: version From 2ffea84c09195223ca6b54ab1fdb5baf9a38a1b9 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Sat, 6 Feb 2021 00:59:28 +0100 Subject: [PATCH 057/114] use pytest; fixes #23 --- Makefile | 6 ++---- pyproject.toml | 11 +++++++++-- setup.cfg | 13 +------------ tests/charliterals.txt | 2 ++ tests/count.txt | 2 ++ tests/emptygroups.txt | 2 ++ tests/findall.txt | 1 + tests/finditer.txt | 3 ++- tests/match_expand.txt | 1 + tests/mmap.txt | 3 ++- tests/namedgroups.txt | 1 + tests/pattern.txt | 2 ++ tests/performance.py | 2 -- tests/re2_test.py | 17 ----------------- tests/search.txt | 6 +++++- tests/split.txt | 1 + tests/sub.txt | 7 ++++++- tests/test_re.py | 4 ++-- tests/unicode.txt | 3 ++- 19 files changed, 43 insertions(+), 44 deletions(-) delete mode 100644 tests/re2_test.py diff --git a/Makefile b/Makefile index e7293ece..2e4b6b76 100644 --- a/Makefile +++ b/Makefile @@ -2,15 +2,13 @@ install: python3 setup.py install --user test: install - (cd tests && python3 re2_test.py) - (cd tests && python3 test_re.py) + pytest --doctest-glob='*.txt' install2: python2 setup.py install --user test2: install2 - (cd tests && python2 re2_test.py) - (cd tests && python2 test_re.py) + python2 -m pytest --doctest-glob='*.txt' clean: rm -rf build &>/dev/null diff --git a/pyproject.toml b/pyproject.toml index e2bdcc86..94cf1179 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,10 +2,17 @@ requires = [ "setuptools>=42", "wheel", - "Cython", + "Cython>=0.20", "pybind11>=2.6.0", "ninja; sys_platform != 'Windows'", - "cmake>=3.12", + "cmake>=3.15", ] build-backend = "setuptools.build_meta" + +[tool.pytest.ini_options] +minversion = "6.0" +addopts = "-ra -q --doctest-glob='*.txt'" +testpaths = [ + "tests", +] diff --git a/setup.cfg b/setup.cfg index b9a889d6..08804222 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,18 +24,7 @@ zip_safe = False [options.extras_require] test = - nose - simplejson - -[nosetests] -verbosity = 3 -with-doctest = 1 -doctest-extension = txt -exe = 1 -#with-coverage = 1 -#cover-package = py_re2 -#cover-min-percentage = 90 -doctest-options = +ELLIPSIS,+NORMALIZE_WHITESPACE + pytest [flake8] # these error codes interfere with Black diff --git a/tests/charliterals.txt b/tests/charliterals.txt index e6597151..2eaea128 100644 --- a/tests/charliterals.txt +++ b/tests/charliterals.txt @@ -1,4 +1,6 @@ >>> import re2 as re + >>> import warnings + >>> warnings.filterwarnings('ignore', category=DeprecationWarning) character literals: diff --git a/tests/count.txt b/tests/count.txt index 3c848fb7..ce3525ad 100644 --- a/tests/count.txt +++ b/tests/count.txt @@ -36,3 +36,5 @@ contains tests True >>> re2.contains('a', 'bbbbb') False + + >>> re2.set_fallback_notification(re2.FALLBACK_QUIETLY) diff --git a/tests/emptygroups.txt b/tests/emptygroups.txt index fbe661bc..424c8ba2 100644 --- a/tests/emptygroups.txt +++ b/tests/emptygroups.txt @@ -32,3 +32,5 @@ The following show different behavior for re and re2: ('Hello', '') >>> re2.search(r'((.*)*.)', 'Hello').groups() ('Hello', 'Hell') + + >>> re2.set_fallback_notification(re2.FALLBACK_QUIETLY) diff --git a/tests/findall.txt b/tests/findall.txt index dee28e56..c753b936 100644 --- a/tests/findall.txt +++ b/tests/findall.txt @@ -39,3 +39,4 @@ If pattern matches an empty string, do it only once at the end: >>> re2.findall(r'\b', 'The quick brown fox jumped over the lazy dog') ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''] + >>> re2.set_fallback_notification(re2.FALLBACK_QUIETLY) diff --git a/tests/finditer.txt b/tests/finditer.txt index 10186903..3d60d199 100644 --- a/tests/finditer.txt +++ b/tests/finditer.txt @@ -4,7 +4,7 @@ Simple tests for the ``finditer`` function. >>> import re2 >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) - >>> with open('cnn_homepage.dat') as tmp: + >>> with open('tests/cnn_homepage.dat') as tmp: ... data = tmp.read() >>> len(list(re2.finditer(r'\w+', data))) 14230 @@ -25,3 +25,4 @@ Simple tests for the ``finditer`` function. <re2.Match object; span=(11, 11), match=''> + >>> re2.set_fallback_notification(re2.FALLBACK_QUIETLY) diff --git a/tests/match_expand.txt b/tests/match_expand.txt index 72ae77f2..b3d5652c 100644 --- a/tests/match_expand.txt +++ b/tests/match_expand.txt @@ -26,3 +26,4 @@ expand templates as if the .sub() method was called on the pattern. >>> m.expand('\t\n\x0b\r\x0c\x07\x08\\B\\Z\x07\\A\\w\\W\\s\\S\\d\\D') '\t\n\x0b\r\x0c\x07\x08\\B\\Z\x07\\A\\w\\W\\s\\S\\d\\D' + >>> re2.set_fallback_notification(re2.FALLBACK_QUIETLY) diff --git a/tests/mmap.txt b/tests/mmap.txt index afbe2191..12ffa974 100644 --- a/tests/mmap.txt +++ b/tests/mmap.txt @@ -6,7 +6,7 @@ Testing re2 on buffer object >>> import mmap >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) - >>> tmp = open("cnn_homepage.dat", "rb+") + >>> tmp = open("tests/cnn_homepage.dat", "rb+") >>> data = mmap.mmap(tmp.fileno(), 0) >>> len(list(re2.finditer(b'\\w+', data))) @@ -15,3 +15,4 @@ Testing re2 on buffer object >>> data.close() >>> tmp.close() + >>> re2.set_fallback_notification(re2.FALLBACK_QUIETLY) diff --git a/tests/namedgroups.txt b/tests/namedgroups.txt index 25598653..70f561a3 100644 --- a/tests/namedgroups.txt +++ b/tests/namedgroups.txt @@ -53,3 +53,4 @@ Make sure positions are converted properly for unicode >>> m.span(u"last_name") (6, 10) + >>> re2.set_fallback_notification(re2.FALLBACK_QUIETLY) diff --git a/tests/pattern.txt b/tests/pattern.txt index 0e21d71b..aab47359 100644 --- a/tests/pattern.txt +++ b/tests/pattern.txt @@ -8,3 +8,5 @@ We should be able to get back what we put in. >>> re2.compile("(foo|b[a]r?)").pattern '(foo|b[a]r?)' + + >>> re2.set_fallback_notification(re2.FALLBACK_QUIETLY) diff --git a/tests/performance.py b/tests/performance.py index 85258443..25eb711c 100644 --- a/tests/performance.py +++ b/tests/performance.py @@ -9,8 +9,6 @@ import it. """ from timeit import Timer -import simplejson - import re2 import re try: diff --git a/tests/re2_test.py b/tests/re2_test.py deleted file mode 100644 index 7a2d69a6..00000000 --- a/tests/re2_test.py +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env python -import os -import sys -import glob -import doctest - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) - -os.chdir(os.path.dirname(__file__) or '.') - -def testall(): - for file in glob.glob(os.path.join(os.path.dirname(__file__), "*.txt")): - print("Testing %s..." % file) - doctest.testfile(os.path.join(".", os.path.basename(file))) - -if __name__ == "__main__": - testall() diff --git a/tests/search.txt b/tests/search.txt index 974159ad..9c1e18f0 100644 --- a/tests/search.txt +++ b/tests/search.txt @@ -3,6 +3,9 @@ These are simple tests of the ``search`` function >>> import re2 >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) + >>> import warnings + >>> warnings.filterwarnings('ignore', category=DeprecationWarning) + >>> re2.search("((?:[01]?\d{1,2}|2[0-4]\d|25[0-5])\.){3}(?:[01]?\d{1,2}|2[0-4]\d|25[0-5])", "hello 28.224.2.1 test").group() '28.224.2.1' @@ -13,7 +16,7 @@ These are simple tests of the ``search`` function >>> len(re2.search('(?:a{1000})?a{999}', input).group()) 999 - >>> with open('cnn_homepage.dat') as tmp: + >>> with open('tests/cnn_homepage.dat') as tmp: ... data = tmp.read() >>> re2.search(r'\n#hdr-editions(.*?)\n', data).groups() (' a { text-decoration:none; }',) @@ -23,3 +26,4 @@ Verify some sanity checks >>> re2.compile(r'x').search('x', 2000) >>> re2.compile(r'x').search('x', 1, -300) + >>> re2.set_fallback_notification(re2.FALLBACK_QUIETLY) diff --git a/tests/split.txt b/tests/split.txt index a597a8c6..a3e44bc6 100644 --- a/tests/split.txt +++ b/tests/split.txt @@ -14,3 +14,4 @@ This one tests to make sure that unicode / utf8 data is parsed correctly. ... b'\xe4\xbd\xa0\xe5\x91\xa2?'] True + >>> re2.set_fallback_notification(re2.FALLBACK_QUIETLY) diff --git a/tests/sub.txt b/tests/sub.txt index e2b0ba63..cca1f0b0 100644 --- a/tests/sub.txt +++ b/tests/sub.txt @@ -9,7 +9,12 @@ with an empty string. >>> import gzip >>> import re2 >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) - >>> with gzip.open('wikipages.xml.gz', 'rb') as tmp: + >>> import warnings + >>> warnings.filterwarnings('ignore', category=DeprecationWarning) + + >>> with gzip.open('tests/wikipages.xml.gz', 'rb') as tmp: ... data = tmp.read() >>> print(hashlib.md5(re2.sub(b'\(.*?\)', b'', data)).hexdigest()) b7a469f55ab76cd5887c81dbb0cfe6d3 + + >>> re2.set_fallback_notification(re2.FALLBACK_QUIETLY) diff --git a/tests/test_re.py b/tests/test_re.py index d8f04d36..cbfcb963 100644 --- a/tests/test_re.py +++ b/tests/test_re.py @@ -1,8 +1,8 @@ from __future__ import print_function try: - from test.test_support import verbose, run_unittest, import_module + from test.test_support import verbose except ImportError: - from test.support import verbose, run_unittest, import_module + from test.support import verbose import re2 as re from re import Scanner import os diff --git a/tests/unicode.txt b/tests/unicode.txt index 53019221..71d497b8 100644 --- a/tests/unicode.txt +++ b/tests/unicode.txt @@ -48,7 +48,7 @@ Test unicode character groups True >>> re.search(u'\\S', u'\u1680x', re.UNICODE).group(0) == u'x' True - >>> re.set_fallback_notification(re.FALLBACK_WARNING) + >>> re.set_fallback_notification(re.FALLBACK_QUIETLY) >>> re.search(u'[\\W]', u'\u0401!', re.UNICODE).group(0) == u'!' True >>> re.search(u'[\\S]', u'\u1680x', re.UNICODE).group(0) == u'x' @@ -68,3 +68,4 @@ Positions are translated transparently between unicode and UTF-8 >>> re.search(u' (.)', data).string == data True + >>> re.set_fallback_notification(re.FALLBACK_QUIETLY) From 5105a6601aaad6463bbd0b81cebe8542d7030681 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Sat, 6 Feb 2021 01:21:54 +0100 Subject: [PATCH 058/114] fix Python 2 compatibility --- src/CMakeLists.txt | 1 - src/compile.pxi | 4 ++-- src/includes.pxi | 1 - src/re2.pyx | 1 + 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 82dc8231..61d63aa3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -30,7 +30,6 @@ set_target_properties(${cython_module} target_include_directories(${cython_module} PUBLIC ${PYTHON_INCLUDE_DIRS}) -target_compile_definitions(${cython_module} PRIVATE PY2=0) target_compile_definitions(${cython_module} PRIVATE VERSION_INFO=${SCM_VERSION_INFO}) # here we get to jump through some hoops to find libre2 on the manylinux diff --git a/src/compile.pxi b/src/compile.pxi index 1e53f602..887a2778 100644 --- a/src/compile.pxi +++ b/src/compile.pxi @@ -46,9 +46,9 @@ def _compile(object pattern, int flags=0, int max_mem=8388608): if not encoded and flags & _U: # re.UNICODE pass # can use UNICODE with bytes pattern, but assumes valid UTF-8 # raise ValueError("can't use UNICODE flag with a bytes pattern") - elif encoded and not (flags & re.ASCII): + elif encoded and not (flags & ASCII): # re.ASCII (not in Python 2) newflags = flags | _U # re.UNICODE - elif encoded and flags & re.ASCII: + elif encoded and flags & ASCII: newflags = flags & ~_U # re.UNICODE try: pattern = _prepare_pattern(pattern, newflags) diff --git a/src/includes.pxi b/src/includes.pxi index a915e073..4726eac6 100644 --- a/src/includes.pxi +++ b/src/includes.pxi @@ -8,7 +8,6 @@ from cpython.version cimport PY_MAJOR_VERSION cdef extern from *: - cdef int PY2 cdef void emit_ifndef_py_unicode_wide "#if !defined(Py_UNICODE_WIDE) //" () cdef void emit_endif "#endif //" () diff --git a/src/re2.pyx b/src/re2.pyx index 6638f5fb..75150ffe 100644 --- a/src/re2.pyx +++ b/src/re2.pyx @@ -132,6 +132,7 @@ VERSION_HEX = 0x000217 cdef int _I = I, _M = M, _S = S, _U = U, _X = X, _L = L cdef int current_notification = FALLBACK_QUIETLY +cdef bint PY2 = PY_MAJOR_VERSION == 2 # Type of compiled re object from Python stdlib SREPattern = type(re.compile('')) From bf6fd1a15b354b5b1e3b193f60120877bda6055b Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Wed, 3 Feb 2021 19:37:38 -0800 Subject: [PATCH 059/114] chg: test: rename imported test helpers to avoid any discovery issues Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- tests/{re_tests.py => re_utils.py} | 0 tests/test_re.py | 14 +++++--------- 2 files changed, 5 insertions(+), 9 deletions(-) rename tests/{re_tests.py => re_utils.py} (100%) diff --git a/tests/re_tests.py b/tests/re_utils.py similarity index 100% rename from tests/re_tests.py rename to tests/re_utils.py diff --git a/tests/test_re.py b/tests/test_re.py index cbfcb963..56012bee 100644 --- a/tests/test_re.py +++ b/tests/test_re.py @@ -125,7 +125,7 @@ def test_bug_1661(self): def test_bug_3629(self): # A regex that triggered a bug in the sre-code validator - re.compile("(?P<quote>)(?(quote))") + re.compile('(?P<quote>)(?(quote))') def test_sub_template_numeric_escape(self): # bug 776311 and friends @@ -686,14 +686,14 @@ def test_dealloc(self): self.assertRaises(TypeError, re.finditer, "a", {}) -def run_re_tests(): +def test_re_suite(): try: - from tests.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR + from tests.re_utils import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR except ImportError: - from re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR + from re_utils import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR if verbose: - print('\nRunning re_tests test suite') + print('\nRunning test_re_suite ...') else: # To save time, only run the first and last 10 tests #tests = tests[:10] + tests[-10:] @@ -807,7 +807,3 @@ def run_re_tests(): result = obj.search(s) if result is None: print('=== Fails on unicode-sensitive match', t) - - -if __name__ == "__main__": - unittest.main() From 511742267c442c0bf29fc0008883d1a8e9900218 Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Sat, 6 Feb 2021 11:16:31 -0800 Subject: [PATCH 060/114] fix: dev: encode test data, add pytest and tox cfgs, update reqs * also some readme and manifest cleanup (fix title, add global exclude) Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- MANIFEST.in | 1 + Makefile | 4 +- README.rst | 6 +-- pytest.ini | 8 +++ requirements-dev.txt | 3 ++ setup.cfg | 18 +++++++ tests/performance.py | 4 +- tox.ini | 121 +++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 157 insertions(+), 8 deletions(-) create mode 100644 pytest.ini create mode 100644 requirements-dev.txt create mode 100644 tox.ini diff --git a/MANIFEST.in b/MANIFEST.in index dc0679de..305a4445 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,6 +2,7 @@ global-include CMakeLists.txt *.cmake include AUTHORS README.rst HISTORY CHANGELOG.rst LICENSE graft src graft tests +global-exclude *.py[cod] __pycache__ recursive-exclude .tox * recursive-exclude .github * recursive-exclude vcpkg * diff --git a/Makefile b/Makefile index 2e4b6b76..d2b7fea3 100644 --- a/Makefile +++ b/Makefile @@ -12,9 +12,7 @@ test2: install2 clean: rm -rf build &>/dev/null - rm -rf src/*.so src/*.html &>/dev/null - rm -rf re2.so tests/re2.so &>/dev/null - rm -rf src/re2.cpp &>/dev/null + rm -f *.so src/*.so src/re2.cpp src/*.html &>/dev/null valgrind: python3.5-dbg setup.py install --user && \ diff --git a/README.rst b/README.rst index 0a8707ea..9772fd05 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,6 @@ -===== -pyre2: Python RE2 wrapper for linear-time regular expressions -===== +=============================================================== + pyre2: Python RE2 wrapper for linear-time regular expressions +=============================================================== .. image:: https://github.com/andreasvc/pyre2/workflows/Build/badge.svg :target: https://github.com/andreasvc/pyre2/actions?query=workflow:Build diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..7c4eae74 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,8 @@ +[pytest] +minversion = 6.0 +addopts = --doctest-modules +doctest_optionflags = + ELLIPSIS + NORMALIZE_WHITESPACE +testpaths = + tests diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 00000000..8c8f6e0f --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,3 @@ +pytest +regex +simplejson diff --git a/setup.cfg b/setup.cfg index 08804222..26f17b08 100644 --- a/setup.cfg +++ b/setup.cfg @@ -26,7 +26,25 @@ zip_safe = False test = pytest +perf = + regex + simplejson + +[nosetests] +verbosity = 3 +with-doctest = 1 +doctest-extension = txt +exe = 1 +#with-coverage = 1 +#cover-package = py_re2 +#cover-min-percentage = 90 +doctest-options = +ELLIPSIS,+NORMALIZE_WHITESPACE + [flake8] # these error codes interfere with Black ignore = E203, E231, E501, W503, B950 select = C,E,F,W,B,B9 + +[egg_info] +tag_build = +tag_date = 0 diff --git a/tests/performance.py b/tests/performance.py index 25eb711c..66dd034f 100644 --- a/tests/performance.py +++ b/tests/performance.py @@ -117,7 +117,7 @@ def print_row(row): def register_test(name, pattern, num_runs = 100, **data): def decorator(method): tests[name] = method - method.pattern = pattern + method.pattern = pattern.encode('utf-8') method.num_runs = num_runs method.data = data @@ -155,7 +155,7 @@ def replace_wikilinks(pattern, data): """ This test replaces links of the form [[Obama|Barack_Obama]] to Obama. """ - return len(pattern.sub(r'\1', data)) + return len(pattern.sub(r'\1'.encode('utf-8'), data)) diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000..3151f2dd --- /dev/null +++ b/tox.ini @@ -0,0 +1,121 @@ +[tox] +envlist = py3{6,7,8,9} +skip_missing_interpreters = true +isolated_build = true + +[gh-actions] +3.6 = py36 +3.7 = py37 +3.8 = py38 +3.9 = py39 + +[testenv] +skip_install = true + +passenv = + CI + CC + CXX + CMAKE_BUILD_OVERRIDE + CMAKE_TOOLCHAIN_FILE + CMAKE_GENERATOR + PIP_DOWNLOAD_CACHE + +setenv = + PYTHONPATH=. + +deps = + pip>=20.0.1 + nose + +commands = + python setup.py build_ext --inplace + nosetests -sx tests/re2_test.py + nosetests -sx tests/test_re.py + +[testenv:dev] +passenv = + CI + CC + CXX + CMAKE_BUILD_OVERRIDE + CMAKE_TOOLCHAIN_FILE + CMAKE_GENERATOR + PIP_DOWNLOAD_CACHE + +deps = + pip>=20.0.1 + +commands = + pip install -e .[test] + # use --capture=no to see all the doctest output + python -m pytest -v tests/re2_test.py + python -m pytest -v tests/test_re.py + +[testenv:perf] +passenv = + CI + CC + CXX + CMAKE_BUILD_OVERRIDE + CMAKE_TOOLCHAIN_FILE + CMAKE_GENERATOR + PIP_DOWNLOAD_CACHE + +deps = + pip>=20.0.1 + +commands = + pip install .[perf] + python tests/performance.py + +[testenv:deploy] +passenv = + pythonLocation + CI + CC + CXX + CMAKE_BUILD_OVERRIDE + CMAKE_TOOLCHAIN_FILE + CMAKE_GENERATOR + PIP_DOWNLOAD_CACHE + +allowlist_externals = bash + +deps = + pip>=20.0.1 + pep517 + twine + #git+https://github.com/freepn/gitchangelog@3.0.5#egg=gitchangelog + +commands = + python -m pep517.build . + twine check dist/* + #bash -c 'gitchangelog $(git tag --sort=taggerdate | tail -n2 | head -n1)..' + +[testenv:check] +skip_install = true +passenv = + CI + +allowlist_externals = bash + +deps = + pip>=20.0.1 + +commands = + bash -c 'export WHL_FILE=$(find . -maxdepth 2 -name pyre2\*-l\*.whl); \ + python -m pip --disable-pip-version-check install --force-reinstall $WHL_FILE' + python -m unittest discover -f -s {toxinidir}/tests + +[testenv:fail] +skip_install = true +passenv = + CI + +deps = + pip>=20.0.1 + +commands = + pip install -e .[test,perf] + pytest --doctest-glob="*.txt" From 7e28b5f075c20e5982189d0022bf76a1ff9d79eb Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Sat, 6 Feb 2021 17:22:46 -0800 Subject: [PATCH 061/114] chg: dev: add more clean plus cleanup cfgs and remove cruft Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- .gitignore | 1 + Makefile | 5 ++++- setup.cfg | 11 ----------- tox.ini | 38 +++++++++++++------------------------- 4 files changed, 18 insertions(+), 37 deletions(-) diff --git a/.gitignore b/.gitignore index 4d9a9c8f..4bd9c8a3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ MANIFEST /build /dist +.tox/ src/re2.so src/re2.cpp src/*.html diff --git a/Makefile b/Makefile index d2b7fea3..af57fddc 100644 --- a/Makefile +++ b/Makefile @@ -11,9 +11,12 @@ test2: install2 python2 -m pytest --doctest-glob='*.txt' clean: - rm -rf build &>/dev/null + rm -rf build pyre2.egg-info &>/dev/null rm -f *.so src/*.so src/re2.cpp src/*.html &>/dev/null +distclean: clean + rm -rf .tox/ dist/ .pytest_cache/ + valgrind: python3.5-dbg setup.py install --user && \ (cd tests && valgrind --tool=memcheck --suppressions=../valgrind-python.supp \ diff --git a/setup.cfg b/setup.cfg index 26f17b08..5223702a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -28,17 +28,6 @@ test = perf = regex - simplejson - -[nosetests] -verbosity = 3 -with-doctest = 1 -doctest-extension = txt -exe = 1 -#with-coverage = 1 -#cover-package = py_re2 -#cover-min-percentage = 90 -doctest-options = +ELLIPSIS,+NORMALIZE_WHITESPACE [flake8] # these error codes interfere with Black diff --git a/tox.ini b/tox.ini index 3151f2dd..69386e13 100644 --- a/tox.ini +++ b/tox.ini @@ -10,8 +10,6 @@ isolated_build = true 3.9 = py39 [testenv] -skip_install = true - passenv = CI CC @@ -21,19 +19,18 @@ passenv = CMAKE_GENERATOR PIP_DOWNLOAD_CACHE -setenv = - PYTHONPATH=. - deps = pip>=20.0.1 - nose + path commands = - python setup.py build_ext --inplace - nosetests -sx tests/re2_test.py - nosetests -sx tests/test_re.py + python -c "import path; path.Path('build').rmtree_p()" + pip install -e .[test] + pytest --doctest-glob="*.txt" [testenv:dev] +skip_install = true + passenv = CI CC @@ -43,13 +40,16 @@ passenv = CMAKE_GENERATOR PIP_DOWNLOAD_CACHE +setenv = + PYTHONPATH=. + deps = pip>=20.0.1 commands = - pip install -e .[test] + python setup.py build_ext --inplace # use --capture=no to see all the doctest output - python -m pytest -v tests/re2_test.py + python -m pytest -v --ignore=tests/test_re.py --doctest-glob=*.txt . python -m pytest -v tests/test_re.py [testenv:perf] @@ -86,12 +86,12 @@ deps = pip>=20.0.1 pep517 twine - #git+https://github.com/freepn/gitchangelog@3.0.5#egg=gitchangelog + git+https://github.com/freepn/gitchangelog@3.0.5#egg=gitchangelog commands = python -m pep517.build . twine check dist/* - #bash -c 'gitchangelog $(git tag --sort=taggerdate | tail -n2 | head -n1)..' + bash -c 'gitchangelog $(git tag --sort=taggerdate | tail -n2 | head -n1)..' [testenv:check] skip_install = true @@ -107,15 +107,3 @@ commands = bash -c 'export WHL_FILE=$(find . -maxdepth 2 -name pyre2\*-l\*.whl); \ python -m pip --disable-pip-version-check install --force-reinstall $WHL_FILE' python -m unittest discover -f -s {toxinidir}/tests - -[testenv:fail] -skip_install = true -passenv = - CI - -deps = - pip>=20.0.1 - -commands = - pip install -e .[test,perf] - pytest --doctest-glob="*.txt" From a3a5754e29f6e67b06a9a1ca1aa2ff25fe826537 Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Sat, 6 Feb 2021 17:24:53 -0800 Subject: [PATCH 062/114] fix: test: handle invalid escape sequence warnings, revert path changes Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- tests/finditer.txt | 2 +- tests/mmap.txt | 2 +- tests/re_utils.py | 4 +-- tests/search.txt | 2 +- tests/sub.txt | 2 +- tests/test_re.py | 80 +++++++++++++++++++++++----------------------- 6 files changed, 46 insertions(+), 46 deletions(-) diff --git a/tests/finditer.txt b/tests/finditer.txt index 3d60d199..52934c45 100644 --- a/tests/finditer.txt +++ b/tests/finditer.txt @@ -4,7 +4,7 @@ Simple tests for the ``finditer`` function. >>> import re2 >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) - >>> with open('tests/cnn_homepage.dat') as tmp: + >>> with open('cnn_homepage.dat') as tmp: ... data = tmp.read() >>> len(list(re2.finditer(r'\w+', data))) 14230 diff --git a/tests/mmap.txt b/tests/mmap.txt index 12ffa974..07534413 100644 --- a/tests/mmap.txt +++ b/tests/mmap.txt @@ -6,7 +6,7 @@ Testing re2 on buffer object >>> import mmap >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) - >>> tmp = open("tests/cnn_homepage.dat", "rb+") + >>> tmp = open("cnn_homepage.dat", "rb+") >>> data = mmap.mmap(tmp.fileno(), 0) >>> len(list(re2.finditer(b'\\w+', data))) diff --git a/tests/re_utils.py b/tests/re_utils.py index d3de23c9..348c3ce9 100644 --- a/tests/re_utils.py +++ b/tests/re_utils.py @@ -158,7 +158,7 @@ ('(abc', '-', SYNTAX_ERROR), ('a]', 'a]', SUCCEED, 'found', 'a]'), ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'), - ('a[\]]b', 'a]b', SUCCEED, 'found', 'a]b'), + (r'a[\]]b', 'a]b', SUCCEED, 'found', 'a]b'), ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'), ('a[^bc]d', 'abd', FAIL), ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'), @@ -551,7 +551,7 @@ # lookbehind: split by : but not if it is escaped by -. ('(?<!-):(.*?)(?<!-):', 'a:bc-:de:f', SUCCEED, 'g1', 'bc-:de' ), # escaping with \ as we know it - ('(?<!\\\):(.*?)(?<!\\\):', 'a:bc\\:de:f', SUCCEED, 'g1', 'bc\\:de' ), + (r'(?<!\\\):(.*?)(?<!\\\):', 'a:bc\\:de:f', SUCCEED, 'g1', 'bc\\:de' ), # terminating with ' and escaping with ? as in edifact ("(?<!\\?)'(.*?)(?<!\\?)'", "a'bc?'de'f", SUCCEED, 'g1', "bc?'de" ), diff --git a/tests/search.txt b/tests/search.txt index 9c1e18f0..7d64869a 100644 --- a/tests/search.txt +++ b/tests/search.txt @@ -16,7 +16,7 @@ These are simple tests of the ``search`` function >>> len(re2.search('(?:a{1000})?a{999}', input).group()) 999 - >>> with open('tests/cnn_homepage.dat') as tmp: + >>> with open('cnn_homepage.dat') as tmp: ... data = tmp.read() >>> re2.search(r'\n#hdr-editions(.*?)\n', data).groups() (' a { text-decoration:none; }',) diff --git a/tests/sub.txt b/tests/sub.txt index cca1f0b0..3c74450d 100644 --- a/tests/sub.txt +++ b/tests/sub.txt @@ -12,7 +12,7 @@ with an empty string. >>> import warnings >>> warnings.filterwarnings('ignore', category=DeprecationWarning) - >>> with gzip.open('tests/wikipages.xml.gz', 'rb') as tmp: + >>> with gzip.open('wikipages.xml.gz', 'rb') as tmp: ... data = tmp.read() >>> print(hashlib.md5(re2.sub(b'\(.*?\)', b'', data)).hexdigest()) b7a469f55ab76cd5887c81dbb0cfe6d3 diff --git a/tests/test_re.py b/tests/test_re.py index 56012bee..5d78264c 100644 --- a/tests/test_re.py +++ b/tests/test_re.py @@ -61,10 +61,10 @@ def test_basic_re_sub(self): self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s) self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s) - self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx') - self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx') - self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx') - self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx') + self.assertEqual(re.sub('(?P<a>x)', r'\g<a>\g<a>', 'xx'), 'xxxx') + self.assertEqual(re.sub('(?P<a>x)', r'\g<a>\g<1>', 'xx'), 'xxxx') + self.assertEqual(re.sub('(?P<unk>x)', r'\g<unk>\g<unk>', 'xx'), 'xxxx') + self.assertEqual(re.sub('(?P<unk>x)', r'\g<1>\g<1>', 'xx'), 'xxxx') self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'), '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D') @@ -72,12 +72,12 @@ def test_basic_re_sub(self): self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7))) - self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest') + self.assertEqual(re.sub(r'^\s*', 'X', 'test'), 'Xtest') def test_bug_449964(self): # fails for group followed by other escape self.assertEqual( - re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'), 'xx\bxx\b') + re.sub(r'(?P<unk>x)', '\\g<1>\\g<1>\\b', 'xx'), 'xx\bxx\b') def test_bug_449000(self): # Test for sub() on escaped characters @@ -183,16 +183,16 @@ def test_bug_462270(self): self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d') def test_symbolic_refs(self): - self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx') - self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx') - self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx') - self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx') - self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx') - self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx') + self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<a', 'xx') + self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<', 'xx') + self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g', 'xx') + self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<a a>', 'xx') + self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<1a1>', 'xx') + self.assertRaises(IndexError, re.sub, '(?P<a>x)', r'\g<ab>', 'xx') # non-matched groups no longer raise an error: # self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx') # self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx') - self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx') + self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<-1>', 'xx') def test_re_subn(self): self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) @@ -265,12 +265,12 @@ def test_re_match(self): self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c')) def test_re_groupref_exists(self): - self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(), + self.assertEqual(re.match(r'^(\()?([^()]+)(?(1)\))$', '(a)').groups(), ('(', 'a')) - self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(), + self.assertEqual(re.match(r'^(\()?([^()]+)(?(1)\))$', 'a').groups(), (None, 'a')) - self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None) - self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None) + self.assertEqual(re.match(r'^(\()?([^()]+)(?(1)\))$', 'a)'), None) + self.assertEqual(re.match(r'^(\()?([^()]+)(?(1)\))$', '(a'), None) self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(), ('a', 'b')) self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(), @@ -313,19 +313,19 @@ def test_expand(self): "second first second first") def test_repeat_minmax(self): - self.assertEqual(re.match("^(\w){1}$", "abc"), None) - self.assertEqual(re.match("^(\w){1}?$", "abc"), None) - self.assertEqual(re.match("^(\w){1,2}$", "abc"), None) - self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None) - - self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c") - self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c") - self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c") - self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c") - self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c") - self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c") - self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c") - self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c") + self.assertEqual(re.match(r"^(\w){1}$", "abc"), None) + self.assertEqual(re.match(r"^(\w){1}?$", "abc"), None) + self.assertEqual(re.match(r"^(\w){1,2}$", "abc"), None) + self.assertEqual(re.match(r"^(\w){1,2}?$", "abc"), None) + + self.assertEqual(re.match(r"^(\w){3}$", "abc").group(1), "c") + self.assertEqual(re.match(r"^(\w){1,3}$", "abc").group(1), "c") + self.assertEqual(re.match(r"^(\w){1,4}$", "abc").group(1), "c") + self.assertEqual(re.match(r"^(\w){3,4}?$", "abc").group(1), "c") + self.assertEqual(re.match(r"^(\w){3}?$", "abc").group(1), "c") + self.assertEqual(re.match(r"^(\w){1,3}?$", "abc").group(1), "c") + self.assertEqual(re.match(r"^(\w){1,4}?$", "abc").group(1), "c") + self.assertEqual(re.match(r"^(\w){3,4}?$", "abc").group(1), "c") self.assertEqual(re.match("^x{1}$", "xxx"), None) self.assertEqual(re.match("^x{1}?$", "xxx"), None) @@ -386,10 +386,10 @@ def test_anyall(self): "a\n\nb") def test_non_consuming(self): - self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a") - self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a") - self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a") - self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a") + self.assertEqual(re.match(r"(a(?=\s[^a]))", "a b").group(1), "a") + self.assertEqual(re.match(r"(a(?=\s[^a]*))", "a b").group(1), "a") + self.assertEqual(re.match(r"(a(?=\s[abc]))", "a b").group(1), "a") + self.assertEqual(re.match(r"(a(?=\s[abc]*))", "a bc").group(1), "a") self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a") self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a") self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a") @@ -419,12 +419,12 @@ def test_getlower(self): self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC") def test_not_literal(self): - self.assertEqual(re.search("\s([^a])", " b").group(1), "b") - self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb") + self.assertEqual(re.search(r"\s([^a])", " b").group(1), "b") + self.assertEqual(re.search(r"\s([^a]*)", " bb").group(1), "bb") def test_search_coverage(self): - self.assertEqual(re.search("\s(b)", " b").group(1), "b") - self.assertEqual(re.search("a\s", "a ").group(0), "a ") + self.assertEqual(re.search(r"\s(b)", " b").group(1), "b") + self.assertEqual(re.search(r"a\s", "a ").group(0), "a ") def test_re_escape(self): p = "" @@ -476,7 +476,7 @@ def test_sre_character_literals(self): self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None) self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None) self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None) - self.assertRaises(re.error, re.match, b"\911", b"") + self.assertRaises(re.error, re.match, b"\\911", b"") def test_sre_character_class_literals(self): for i in [0, 8, 16, 32, 64, 127, 128, 255]: @@ -486,7 +486,7 @@ def test_sre_character_class_literals(self): self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None) self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None) self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None) - self.assertRaises(re.error, re.match, b"[\911]", b"") + self.assertRaises(re.error, re.match, b"[\\911]", b"") def test_bug_113254(self): self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1) From 5bb2c49469fe7400b3d95ddd94da05e9041dd68d Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Sat, 6 Feb 2021 18:27:26 -0800 Subject: [PATCH 063/114] chg: ci: update workflows and tox cfg (use tox for smoke test) Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- .github/workflows/ci.yml | 55 ++++++++++++++++++++++++++++++++++++++ .github/workflows/main.yml | 4 +-- .gitignore | 5 ++-- tox.ini | 10 ++++--- 4 files changed, 67 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..4e240970 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,55 @@ +name: Smoke + +on: + workflow_dispatch: + pull_request: + +jobs: + python_wheels: + runs-on: ${{ matrix.os }} + defaults: + run: + shell: bash + env: + OS: ${{ matrix.os }} + PYTHON: ${{ matrix.python-version }} + PIP_DOWNLOAD_CACHE: ${{ github.workspace }}/../.pip_download_cache + strategy: + fail-fast: true + matrix: + os: [ubuntu-20.04] + python-version: [3.6, 3.7, 3.8, 3.9] + + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Add requirements + run: | + python -m pip install --upgrade pip + pip install tox tox-gh-actions + + - name: Install Ubuntu build deps + if: runner.os == 'Linux' + run: | + sudo apt-get -qq update + sudo apt-get install -y software-properties-common + sudo add-apt-repository -y -s ppa:nerdboy/embedded + sudo apt-get install -y pybind11-dev libre2-dev ninja-build + + - name: Test in place + run: | + tox -e dev + + - name: Build dist pkgs + run: | + tox -e deploy + + - name: Check wheel + run: | + tox -e check diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c05086a2..e58f0604 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -75,8 +75,8 @@ jobs: - name: Build sdist run: | - pip install --user cython - python setup.py sdist + pip install pep517 + python -m pep517.build -s . - uses: actions/upload-artifact@v2 with: diff --git a/.gitignore b/.gitignore index 4bd9c8a3..4d4ff6ee 100644 --- a/.gitignore +++ b/.gitignore @@ -2,12 +2,13 @@ MANIFEST /build /dist .tox/ -src/re2.so +src/*.so src/re2.cpp src/*.html -tests/re2.so +tests/*.so tests/access.log *~ +*.so *.pyc *.swp *.egg-info diff --git a/tox.ini b/tox.ini index 69386e13..d8933035 100644 --- a/tox.ini +++ b/tox.ini @@ -45,6 +45,8 @@ setenv = deps = pip>=20.0.1 + cython>=0.20 + pytest commands = python setup.py build_ext --inplace @@ -86,9 +88,11 @@ deps = pip>=20.0.1 pep517 twine + path git+https://github.com/freepn/gitchangelog@3.0.5#egg=gitchangelog commands = + python -c "import path; path.Path('build').rmtree_p()" python -m pep517.build . twine check dist/* bash -c 'gitchangelog $(git tag --sort=taggerdate | tail -n2 | head -n1)..' @@ -102,8 +106,8 @@ allowlist_externals = bash deps = pip>=20.0.1 + pytest commands = - bash -c 'export WHL_FILE=$(find . -maxdepth 2 -name pyre2\*-l\*.whl); \ - python -m pip --disable-pip-version-check install --force-reinstall $WHL_FILE' - python -m unittest discover -f -s {toxinidir}/tests + pip install pyre2 --force-reinstall --prefer-binary -f dist/ + pytest --doctest-glob="*.txt" From 3bcc431bc52851c41297b2abbfc2e5bfe2b012b2 Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Sat, 6 Feb 2021 20:36:20 -0800 Subject: [PATCH 064/114] chg: pkg: add .gitattributes and revert conda test change Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- .gitattributes | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..bb3f296d --- /dev/null +++ b/.gitattributes @@ -0,0 +1,11 @@ +# Set default behaviour to automatically normalize line endings. +* text=auto + +# Force batch scripts to always use CRLF line endings so that if a repo is +# accessed in Windows via a file share from Linux, the scripts will work. +*.{cmd,[cC][mM][dD]} text eol=crlf +*.{bat,[bB][aA][tT]} text eol=crlf + +# Force bash scripts to always use LF line endings so that if a repo is +# accessed in Unix via a file share from Windows, the scripts will work. +*.sh text eol=lf From 4c86ec7c34875537a5aa00ca258647543d60edbc Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Sat, 6 Feb 2021 23:49:37 -0800 Subject: [PATCH 065/114] chg: pkg: add pytest to conda recipe (plus test patch) Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- .github/workflows/release.yml | 4 +- conda.recipe/adjust-test-file-paths.patch | 52 +++++++++++++++++++++++ conda.recipe/meta.yaml | 9 +++- 3 files changed, 61 insertions(+), 4 deletions(-) create mode 100644 conda.recipe/adjust-test-file-paths.patch diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d4dac805..f020c723 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -91,8 +91,8 @@ jobs: - name: Generate changes file run: | - bash -c 'export GITCHANGELOG_CONFIG_FILENAME=$(get-rcpath); \ - gitchangelog $(git tag --sort=taggerdate | tail -n2 | head -n1)..${{ env.VERSION }} > CHANGES.md' + export GITCHANGELOG_CONFIG_FILENAME=$(get-rcpath) + gitchangelog $(git describe --abbrev=0 ${{ env.VERSION }})..${{ env.VERSION }} > CHANGES.md - name: Create draft release id: create_release diff --git a/conda.recipe/adjust-test-file-paths.patch b/conda.recipe/adjust-test-file-paths.patch new file mode 100644 index 00000000..26aa6388 --- /dev/null +++ b/conda.recipe/adjust-test-file-paths.patch @@ -0,0 +1,52 @@ +diff --git a/tests/finditer.txt b/tests/finditer.txt +index 52934c4..3d60d19 100644 +--- a/tests/finditer.txt ++++ b/tests/finditer.txt +@@ -4,7 +4,7 @@ Simple tests for the ``finditer`` function. + >>> import re2 + >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) + +- >>> with open('cnn_homepage.dat') as tmp: ++ >>> with open('tests/cnn_homepage.dat') as tmp: + ... data = tmp.read() + >>> len(list(re2.finditer(r'\w+', data))) + 14230 +diff --git a/tests/mmap.txt b/tests/mmap.txt +index 0753441..12ffa97 100644 +--- a/tests/mmap.txt ++++ b/tests/mmap.txt +@@ -6,7 +6,7 @@ Testing re2 on buffer object + >>> import mmap + >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) + +- >>> tmp = open("cnn_homepage.dat", "rb+") ++ >>> tmp = open("tests/cnn_homepage.dat", "rb+") + >>> data = mmap.mmap(tmp.fileno(), 0) + + >>> len(list(re2.finditer(b'\\w+', data))) +diff --git a/tests/search.txt b/tests/search.txt +index 7d64869..9c1e18f 100644 +--- a/tests/search.txt ++++ b/tests/search.txt +@@ -16,7 +16,7 @@ These are simple tests of the ``search`` function + >>> len(re2.search('(?:a{1000})?a{999}', input).group()) + 999 + +- >>> with open('cnn_homepage.dat') as tmp: ++ >>> with open('tests/cnn_homepage.dat') as tmp: + ... data = tmp.read() + >>> re2.search(r'\n#hdr-editions(.*?)\n', data).groups() + (' a { text-decoration:none; }',) +diff --git a/tests/sub.txt b/tests/sub.txt +index 3c74450..cca1f0b 100644 +--- a/tests/sub.txt ++++ b/tests/sub.txt +@@ -12,7 +12,7 @@ with an empty string. + >>> import warnings + >>> warnings.filterwarnings('ignore', category=DeprecationWarning) + +- >>> with gzip.open('wikipages.xml.gz', 'rb') as tmp: ++ >>> with gzip.open('tests/wikipages.xml.gz', 'rb') as tmp: + ... data = tmp.read() + >>> print(hashlib.md5(re2.sub(b'\(.*?\)', b'', data)).hexdigest()) + b7a469f55ab76cd5887c81dbb0cfe6d3 diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml index 0245b6a4..811888bb 100644 --- a/conda.recipe/meta.yaml +++ b/conda.recipe/meta.yaml @@ -7,6 +7,8 @@ package: source: path: .. + patches: + - adjust-test-file-paths.patch build: number: 0 @@ -29,14 +31,17 @@ requirements: - re2 test: + requires: + - pytest commands: - export "PYTHONIOENCODING=utf8" # [unix] - set "PYTHONIOENCODING=utf8" # [win] - - python -m unittest discover -f -s tests + - python -m pytest -v --ignore=tests/test_re.py --doctest-glob=*.txt tests/ + - python -m pytest -v . imports: - re2 source_files: - - tests + - tests/ about: home: "https://github.com/andreasvc/pyre2" From 6ed77f52736645d728f06087555d7effce1cbc7b Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Sun, 7 Feb 2021 16:26:35 +0100 Subject: [PATCH 066/114] fix narrow unicode detection --- src/includes.pxi | 2 +- src/re2.pyx | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/includes.pxi b/src/includes.pxi index 4726eac6..8c35b6d4 100644 --- a/src/includes.pxi +++ b/src/includes.pxi @@ -8,7 +8,7 @@ from cpython.version cimport PY_MAJOR_VERSION cdef extern from *: - cdef void emit_ifndef_py_unicode_wide "#if !defined(Py_UNICODE_WIDE) //" () + cdef void emit_if_narrow_unicode "#if !defined(Py_UNICODE_WIDE) && PY_VERSION_HEX < 0x03030000 //" () cdef void emit_endif "#endif //" () diff --git a/src/re2.pyx b/src/re2.pyx index 75150ffe..ffe65442 100644 --- a/src/re2.pyx +++ b/src/re2.pyx @@ -388,10 +388,9 @@ cdef utf8indices(char * cstring, int size, int *pos, int *endpos): else: cpos += 4 upos += 1 - # wide unicode chars get 2 unichars when python is compiled + # wide unicode chars get 2 unichars when Python <3.3 is compiled # with --enable-unicode=ucs2 - # TODO: verify this; cf. http://docs.cython.org/en/latest/src/tutorial/strings.html#narrow-unicode-builds - emit_ifndef_py_unicode_wide() + emit_if_narrow_unicode() upos += 1 emit_endif() @@ -436,10 +435,9 @@ cdef void unicodeindices(map[int, int] &positions, else: cpos[0] += 4 upos[0] += 1 - # wide unicode chars get 2 unichars when python is compiled + # wide unicode chars get 2 unichars when Python <3.3 is compiled # with --enable-unicode=ucs2 - # TODO: verify this; cf. http://docs.cython.org/en/latest/src/tutorial/strings.html#narrow-unicode-builds - emit_ifndef_py_unicode_wide() + emit_if_narrow_unicode() upos[0] += 1 emit_endif() From 7b28dbadb42489df959f4f2d6880a59b597ef815 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Sun, 7 Feb 2021 16:47:19 +0100 Subject: [PATCH 067/114] fix "make test"; rename doctest files for autodetection --- Makefile | 4 ++-- README.rst | 6 +++--- conda.recipe/meta.yaml | 2 +- pyproject.toml | 1 - pytest.ini | 1 - tests/{charliterals.txt => test_charliterals.txt} | 0 tests/{count.txt => test_count.txt} | 0 tests/{emptygroups.txt => test_emptygroups.txt} | 0 tests/{findall.txt => test_findall.txt} | 0 tests/{finditer.txt => test_finditer.txt} | 0 tests/{match_expand.txt => test_match_expand.txt} | 0 tests/{mmap.txt => test_mmap.txt} | 0 tests/{namedgroups.txt => test_namedgroups.txt} | 0 tests/{pattern.txt => test_pattern.txt} | 0 tests/{search.txt => test_search.txt} | 0 tests/{split.txt => test_split.txt} | 0 tests/{sub.txt => test_sub.txt} | 0 tests/{unicode.txt => test_unicode.txt} | 0 tox.ini | 2 +- 19 files changed, 7 insertions(+), 9 deletions(-) rename tests/{charliterals.txt => test_charliterals.txt} (100%) rename tests/{count.txt => test_count.txt} (100%) rename tests/{emptygroups.txt => test_emptygroups.txt} (100%) rename tests/{findall.txt => test_findall.txt} (100%) rename tests/{finditer.txt => test_finditer.txt} (100%) rename tests/{match_expand.txt => test_match_expand.txt} (100%) rename tests/{mmap.txt => test_mmap.txt} (100%) rename tests/{namedgroups.txt => test_namedgroups.txt} (100%) rename tests/{pattern.txt => test_pattern.txt} (100%) rename tests/{search.txt => test_search.txt} (100%) rename tests/{split.txt => test_split.txt} (100%) rename tests/{sub.txt => test_sub.txt} (100%) rename tests/{unicode.txt => test_unicode.txt} (100%) diff --git a/Makefile b/Makefile index af57fddc..37b3cb76 100644 --- a/Makefile +++ b/Makefile @@ -2,13 +2,13 @@ install: python3 setup.py install --user test: install - pytest --doctest-glob='*.txt' + (cd tests; pytest) install2: python2 setup.py install --user test2: install2 - python2 -m pytest --doctest-glob='*.txt' + (cd tests; python2 -m pytest) clean: rm -rf build pyre2.egg-info &>/dev/null diff --git a/README.rst b/README.rst index 9772fd05..3f46ff6e 100644 --- a/README.rst +++ b/README.rst @@ -223,7 +223,7 @@ The tests show the following differences with Python's ``re`` module: with ``\n``. This can be simulated using ``\n?$``, except when doing substitutions. * The ``pyre2`` module and Python's ``re`` may behave differently with nested groups. - See ``tests/emptygroups.txt`` for the examples. + See ``tests/test_emptygroups.txt`` for the examples. Please report any further issues with ``pyre2``. @@ -234,9 +234,9 @@ If you would like to help, one thing that would be very useful is writing comprehensive tests for this. It's actually really easy: * Come up with regular expression problems using the regular python 're' module. -* Write a session in python traceback format `Example <http://github.com/andreasvc/pyre2/blob/master/tests/search.txt>`_. +* Write a session in python traceback format `Example <http://github.com/andreasvc/pyre2/blob/master/tests/test_search.txt>`_. * Replace your ``import re`` with ``import re2 as re``. -* Save it as a .txt file in the tests directory. You can comment on it however you like and indent the code with 4 spaces. +* Save it with as ``test_<name>.txt`` in the tests directory. You can comment on it however you like and indent the code with 4 spaces. Credits diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml index 811888bb..e717e343 100644 --- a/conda.recipe/meta.yaml +++ b/conda.recipe/meta.yaml @@ -36,7 +36,7 @@ test: commands: - export "PYTHONIOENCODING=utf8" # [unix] - set "PYTHONIOENCODING=utf8" # [win] - - python -m pytest -v --ignore=tests/test_re.py --doctest-glob=*.txt tests/ + - python -m pytest -v --ignore=tests/test_re.py tests/ - python -m pytest -v . imports: - re2 diff --git a/pyproject.toml b/pyproject.toml index 94cf1179..18c975e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,6 @@ build-backend = "setuptools.build_meta" [tool.pytest.ini_options] minversion = "6.0" -addopts = "-ra -q --doctest-glob='*.txt'" testpaths = [ "tests", ] diff --git a/pytest.ini b/pytest.ini index 7c4eae74..60cef69f 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,6 +1,5 @@ [pytest] minversion = 6.0 -addopts = --doctest-modules doctest_optionflags = ELLIPSIS NORMALIZE_WHITESPACE diff --git a/tests/charliterals.txt b/tests/test_charliterals.txt similarity index 100% rename from tests/charliterals.txt rename to tests/test_charliterals.txt diff --git a/tests/count.txt b/tests/test_count.txt similarity index 100% rename from tests/count.txt rename to tests/test_count.txt diff --git a/tests/emptygroups.txt b/tests/test_emptygroups.txt similarity index 100% rename from tests/emptygroups.txt rename to tests/test_emptygroups.txt diff --git a/tests/findall.txt b/tests/test_findall.txt similarity index 100% rename from tests/findall.txt rename to tests/test_findall.txt diff --git a/tests/finditer.txt b/tests/test_finditer.txt similarity index 100% rename from tests/finditer.txt rename to tests/test_finditer.txt diff --git a/tests/match_expand.txt b/tests/test_match_expand.txt similarity index 100% rename from tests/match_expand.txt rename to tests/test_match_expand.txt diff --git a/tests/mmap.txt b/tests/test_mmap.txt similarity index 100% rename from tests/mmap.txt rename to tests/test_mmap.txt diff --git a/tests/namedgroups.txt b/tests/test_namedgroups.txt similarity index 100% rename from tests/namedgroups.txt rename to tests/test_namedgroups.txt diff --git a/tests/pattern.txt b/tests/test_pattern.txt similarity index 100% rename from tests/pattern.txt rename to tests/test_pattern.txt diff --git a/tests/search.txt b/tests/test_search.txt similarity index 100% rename from tests/search.txt rename to tests/test_search.txt diff --git a/tests/split.txt b/tests/test_split.txt similarity index 100% rename from tests/split.txt rename to tests/test_split.txt diff --git a/tests/sub.txt b/tests/test_sub.txt similarity index 100% rename from tests/sub.txt rename to tests/test_sub.txt diff --git a/tests/unicode.txt b/tests/test_unicode.txt similarity index 100% rename from tests/unicode.txt rename to tests/test_unicode.txt diff --git a/tox.ini b/tox.ini index d8933035..68b7417c 100644 --- a/tox.ini +++ b/tox.ini @@ -26,7 +26,7 @@ deps = commands = python -c "import path; path.Path('build').rmtree_p()" pip install -e .[test] - pytest --doctest-glob="*.txt" + pytest [testenv:dev] skip_install = true From e0f47d7cda9efdc2dd25cf50b19f360028b39b0f Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Sun, 7 Feb 2021 17:30:57 +0100 Subject: [PATCH 068/114] fix conda patch --- conda.recipe/adjust-test-file-paths.patch | 32 +++++++++++------------ 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/conda.recipe/adjust-test-file-paths.patch b/conda.recipe/adjust-test-file-paths.patch index 26aa6388..c6c4217d 100644 --- a/conda.recipe/adjust-test-file-paths.patch +++ b/conda.recipe/adjust-test-file-paths.patch @@ -1,52 +1,52 @@ -diff --git a/tests/finditer.txt b/tests/finditer.txt +diff --git a/tests/test_finditer.txt b/tests/test_finditer.txt index 52934c4..3d60d19 100644 ---- a/tests/finditer.txt -+++ b/tests/finditer.txt +--- a/tests/test_finditer.txt ++++ b/tests/test_finditer.txt @@ -4,7 +4,7 @@ Simple tests for the ``finditer`` function. >>> import re2 >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) - >>> with open('cnn_homepage.dat') as tmp: -+ >>> with open('tests/cnn_homepage.dat') as tmp: ++ >>> with open('tests/test_cnn_homepage.dat') as tmp: ... data = tmp.read() >>> len(list(re2.finditer(r'\w+', data))) 14230 -diff --git a/tests/mmap.txt b/tests/mmap.txt +diff --git a/tests/test_mmap.txt b/tests/test_mmap.txt index 0753441..12ffa97 100644 ---- a/tests/mmap.txt -+++ b/tests/mmap.txt +--- a/tests/test_mmap.txt ++++ b/tests/test_mmap.txt @@ -6,7 +6,7 @@ Testing re2 on buffer object >>> import mmap >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) - >>> tmp = open("cnn_homepage.dat", "rb+") -+ >>> tmp = open("tests/cnn_homepage.dat", "rb+") ++ >>> tmp = open("tests/test_cnn_homepage.dat", "rb+") >>> data = mmap.mmap(tmp.fileno(), 0) >>> len(list(re2.finditer(b'\\w+', data))) -diff --git a/tests/search.txt b/tests/search.txt +diff --git a/tests/test_search.txt b/tests/test_search.txt index 7d64869..9c1e18f 100644 ---- a/tests/search.txt -+++ b/tests/search.txt +--- a/tests/test_search.txt ++++ b/tests/test_search.txt @@ -16,7 +16,7 @@ These are simple tests of the ``search`` function >>> len(re2.search('(?:a{1000})?a{999}', input).group()) 999 - >>> with open('cnn_homepage.dat') as tmp: -+ >>> with open('tests/cnn_homepage.dat') as tmp: ++ >>> with open('tests/test_cnn_homepage.dat') as tmp: ... data = tmp.read() >>> re2.search(r'\n#hdr-editions(.*?)\n', data).groups() (' a { text-decoration:none; }',) -diff --git a/tests/sub.txt b/tests/sub.txt +diff --git a/tests/test_sub.txt b/tests/test_sub.txt index 3c74450..cca1f0b 100644 ---- a/tests/sub.txt -+++ b/tests/sub.txt +--- a/tests/test_sub.txt ++++ b/tests/test_sub.txt @@ -12,7 +12,7 @@ with an empty string. >>> import warnings >>> warnings.filterwarnings('ignore', category=DeprecationWarning) - >>> with gzip.open('wikipages.xml.gz', 'rb') as tmp: -+ >>> with gzip.open('tests/wikipages.xml.gz', 'rb') as tmp: ++ >>> with gzip.open('tests/test_wikipages.xml.gz', 'rb') as tmp: ... data = tmp.read() >>> print(hashlib.md5(re2.sub(b'\(.*?\)', b'', data)).hexdigest()) b7a469f55ab76cd5887c81dbb0cfe6d3 From 25e83ce76554e902e81a835d5d7e253ddb3331e5 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Sun, 7 Feb 2021 17:50:29 +0100 Subject: [PATCH 069/114] fix fix of conda patch --- conda.recipe/adjust-test-file-paths.patch | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conda.recipe/adjust-test-file-paths.patch b/conda.recipe/adjust-test-file-paths.patch index c6c4217d..f4e0c30c 100644 --- a/conda.recipe/adjust-test-file-paths.patch +++ b/conda.recipe/adjust-test-file-paths.patch @@ -7,7 +7,7 @@ index 52934c4..3d60d19 100644 >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) - >>> with open('cnn_homepage.dat') as tmp: -+ >>> with open('tests/test_cnn_homepage.dat') as tmp: ++ >>> with open('tests/cnn_homepage.dat') as tmp: ... data = tmp.read() >>> len(list(re2.finditer(r'\w+', data))) 14230 @@ -20,7 +20,7 @@ index 0753441..12ffa97 100644 >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) - >>> tmp = open("cnn_homepage.dat", "rb+") -+ >>> tmp = open("tests/test_cnn_homepage.dat", "rb+") ++ >>> tmp = open("tests/cnn_homepage.dat", "rb+") >>> data = mmap.mmap(tmp.fileno(), 0) >>> len(list(re2.finditer(b'\\w+', data))) @@ -46,7 +46,7 @@ index 3c74450..cca1f0b 100644 >>> warnings.filterwarnings('ignore', category=DeprecationWarning) - >>> with gzip.open('wikipages.xml.gz', 'rb') as tmp: -+ >>> with gzip.open('tests/test_wikipages.xml.gz', 'rb') as tmp: ++ >>> with gzip.open('tests/wikipages.xml.gz', 'rb') as tmp: ... data = tmp.read() >>> print(hashlib.md5(re2.sub(b'\(.*?\)', b'', data)).hexdigest()) b7a469f55ab76cd5887c81dbb0cfe6d3 From 0a389d6fb8e72448675d086247dd0db56ea71bc9 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Sun, 7 Feb 2021 18:33:59 +0100 Subject: [PATCH 070/114] another one --- conda.recipe/adjust-test-file-paths.patch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda.recipe/adjust-test-file-paths.patch b/conda.recipe/adjust-test-file-paths.patch index f4e0c30c..11fd7d3e 100644 --- a/conda.recipe/adjust-test-file-paths.patch +++ b/conda.recipe/adjust-test-file-paths.patch @@ -33,7 +33,7 @@ index 7d64869..9c1e18f 100644 999 - >>> with open('cnn_homepage.dat') as tmp: -+ >>> with open('tests/test_cnn_homepage.dat') as tmp: ++ >>> with open('tests/cnn_homepage.dat') as tmp: ... data = tmp.read() >>> re2.search(r'\n#hdr-editions(.*?)\n', data).groups() (' a { text-decoration:none; }',) From 85e4349531a5d3f5b29af532b719eb27d6b56353 Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Sun, 7 Feb 2021 13:24:58 -0800 Subject: [PATCH 071/114] fix: test: apply test patch, cleanup tox and pytest args Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- tests/test_finditer.txt | 2 +- tests/test_mmap.txt | 2 +- tests/test_search.txt | 2 +- tests/test_sub.txt | 2 +- tox.ini | 11 +++++++---- 5 files changed, 11 insertions(+), 8 deletions(-) diff --git a/tests/test_finditer.txt b/tests/test_finditer.txt index 52934c45..3d60d199 100644 --- a/tests/test_finditer.txt +++ b/tests/test_finditer.txt @@ -4,7 +4,7 @@ Simple tests for the ``finditer`` function. >>> import re2 >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) - >>> with open('cnn_homepage.dat') as tmp: + >>> with open('tests/cnn_homepage.dat') as tmp: ... data = tmp.read() >>> len(list(re2.finditer(r'\w+', data))) 14230 diff --git a/tests/test_mmap.txt b/tests/test_mmap.txt index 07534413..12ffa974 100644 --- a/tests/test_mmap.txt +++ b/tests/test_mmap.txt @@ -6,7 +6,7 @@ Testing re2 on buffer object >>> import mmap >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) - >>> tmp = open("cnn_homepage.dat", "rb+") + >>> tmp = open("tests/cnn_homepage.dat", "rb+") >>> data = mmap.mmap(tmp.fileno(), 0) >>> len(list(re2.finditer(b'\\w+', data))) diff --git a/tests/test_search.txt b/tests/test_search.txt index 7d64869a..9c1e18f0 100644 --- a/tests/test_search.txt +++ b/tests/test_search.txt @@ -16,7 +16,7 @@ These are simple tests of the ``search`` function >>> len(re2.search('(?:a{1000})?a{999}', input).group()) 999 - >>> with open('cnn_homepage.dat') as tmp: + >>> with open('tests/cnn_homepage.dat') as tmp: ... data = tmp.read() >>> re2.search(r'\n#hdr-editions(.*?)\n', data).groups() (' a { text-decoration:none; }',) diff --git a/tests/test_sub.txt b/tests/test_sub.txt index 3c74450d..cca1f0b0 100644 --- a/tests/test_sub.txt +++ b/tests/test_sub.txt @@ -12,7 +12,7 @@ with an empty string. >>> import warnings >>> warnings.filterwarnings('ignore', category=DeprecationWarning) - >>> with gzip.open('wikipages.xml.gz', 'rb') as tmp: + >>> with gzip.open('tests/wikipages.xml.gz', 'rb') as tmp: ... data = tmp.read() >>> print(hashlib.md5(re2.sub(b'\(.*?\)', b'', data)).hexdigest()) b7a469f55ab76cd5887c81dbb0cfe6d3 diff --git a/tox.ini b/tox.ini index 68b7417c..c7c0bdc6 100644 --- a/tox.ini +++ b/tox.ini @@ -26,7 +26,7 @@ deps = commands = python -c "import path; path.Path('build').rmtree_p()" pip install -e .[test] - pytest + pytest -v . [testenv:dev] skip_install = true @@ -46,9 +46,11 @@ setenv = deps = pip>=20.0.1 cython>=0.20 + path pytest commands = + python -c "import path; path.Path('build').rmtree_p()" python setup.py build_ext --inplace # use --capture=no to see all the doctest output python -m pytest -v --ignore=tests/test_re.py --doctest-glob=*.txt . @@ -89,13 +91,13 @@ deps = pep517 twine path - git+https://github.com/freepn/gitchangelog@3.0.5#egg=gitchangelog commands = python -c "import path; path.Path('build').rmtree_p()" python -m pep517.build . twine check dist/* - bash -c 'gitchangelog $(git tag --sort=taggerdate | tail -n2 | head -n1)..' + python -m pip install https://github.com/freepn/gitchangelog/archive/3.0.5.tar.gz + bash -c 'gitchangelog $(git describe --abbrev=0)..' [testenv:check] skip_install = true @@ -110,4 +112,5 @@ deps = commands = pip install pyre2 --force-reinstall --prefer-binary -f dist/ - pytest --doctest-glob="*.txt" + python -m unittest discover -f -s . + #pytest --doctest-glob="*.txt" From 94db661d63f5f97855840740bdbf2bc026fdf6c3 Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Sun, 7 Feb 2021 13:39:07 -0800 Subject: [PATCH 072/114] chg: pkg: remove test patch from conda recipe Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- conda.recipe/adjust-test-file-paths.patch | 52 ----------------------- conda.recipe/meta.yaml | 3 -- tox.ini | 3 ++ 3 files changed, 3 insertions(+), 55 deletions(-) delete mode 100644 conda.recipe/adjust-test-file-paths.patch diff --git a/conda.recipe/adjust-test-file-paths.patch b/conda.recipe/adjust-test-file-paths.patch deleted file mode 100644 index 11fd7d3e..00000000 --- a/conda.recipe/adjust-test-file-paths.patch +++ /dev/null @@ -1,52 +0,0 @@ -diff --git a/tests/test_finditer.txt b/tests/test_finditer.txt -index 52934c4..3d60d19 100644 ---- a/tests/test_finditer.txt -+++ b/tests/test_finditer.txt -@@ -4,7 +4,7 @@ Simple tests for the ``finditer`` function. - >>> import re2 - >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) - -- >>> with open('cnn_homepage.dat') as tmp: -+ >>> with open('tests/cnn_homepage.dat') as tmp: - ... data = tmp.read() - >>> len(list(re2.finditer(r'\w+', data))) - 14230 -diff --git a/tests/test_mmap.txt b/tests/test_mmap.txt -index 0753441..12ffa97 100644 ---- a/tests/test_mmap.txt -+++ b/tests/test_mmap.txt -@@ -6,7 +6,7 @@ Testing re2 on buffer object - >>> import mmap - >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) - -- >>> tmp = open("cnn_homepage.dat", "rb+") -+ >>> tmp = open("tests/cnn_homepage.dat", "rb+") - >>> data = mmap.mmap(tmp.fileno(), 0) - - >>> len(list(re2.finditer(b'\\w+', data))) -diff --git a/tests/test_search.txt b/tests/test_search.txt -index 7d64869..9c1e18f 100644 ---- a/tests/test_search.txt -+++ b/tests/test_search.txt -@@ -16,7 +16,7 @@ These are simple tests of the ``search`` function - >>> len(re2.search('(?:a{1000})?a{999}', input).group()) - 999 - -- >>> with open('cnn_homepage.dat') as tmp: -+ >>> with open('tests/cnn_homepage.dat') as tmp: - ... data = tmp.read() - >>> re2.search(r'\n#hdr-editions(.*?)\n', data).groups() - (' a { text-decoration:none; }',) -diff --git a/tests/test_sub.txt b/tests/test_sub.txt -index 3c74450..cca1f0b 100644 ---- a/tests/test_sub.txt -+++ b/tests/test_sub.txt -@@ -12,7 +12,7 @@ with an empty string. - >>> import warnings - >>> warnings.filterwarnings('ignore', category=DeprecationWarning) - -- >>> with gzip.open('wikipages.xml.gz', 'rb') as tmp: -+ >>> with gzip.open('tests/wikipages.xml.gz', 'rb') as tmp: - ... data = tmp.read() - >>> print(hashlib.md5(re2.sub(b'\(.*?\)', b'', data)).hexdigest()) - b7a469f55ab76cd5887c81dbb0cfe6d3 diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml index e717e343..dc567039 100644 --- a/conda.recipe/meta.yaml +++ b/conda.recipe/meta.yaml @@ -7,8 +7,6 @@ package: source: path: .. - patches: - - adjust-test-file-paths.patch build: number: 0 @@ -36,7 +34,6 @@ test: commands: - export "PYTHONIOENCODING=utf8" # [unix] - set "PYTHONIOENCODING=utf8" # [win] - - python -m pytest -v --ignore=tests/test_re.py tests/ - python -m pytest -v . imports: - re2 diff --git a/tox.ini b/tox.ini index c7c0bdc6..0e7a1d30 100644 --- a/tox.ini +++ b/tox.ini @@ -2,6 +2,7 @@ envlist = py3{6,7,8,9} skip_missing_interpreters = true isolated_build = true +skipsdist=True [gh-actions] 3.6 = py36 @@ -68,8 +69,10 @@ passenv = deps = pip>=20.0.1 + path commands = + python -c "import path; path.Path('build').rmtree_p()" pip install .[perf] python tests/performance.py From 94cb4f74e01a321c5cf691f9d70811463ef65e58 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Tue, 6 Apr 2021 21:10:19 +0200 Subject: [PATCH 073/114] fix "make test" and "make test2" --- Makefile | 4 ++-- pytest.ini | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 37b3cb76..c87d3497 100644 --- a/Makefile +++ b/Makefile @@ -2,13 +2,13 @@ install: python3 setup.py install --user test: install - (cd tests; pytest) + pytest install2: python2 setup.py install --user test2: install2 - (cd tests; python2 -m pytest) + python2 -m pytest clean: rm -rf build pyre2.egg-info &>/dev/null diff --git a/pytest.ini b/pytest.ini index 60cef69f..80909c8d 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,4 @@ [pytest] -minversion = 6.0 doctest_optionflags = ELLIPSIS NORMALIZE_WHITESPACE From 0974fae65ce4a05d232836c315902e6d5db9c379 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Tue, 6 Apr 2021 21:11:15 +0200 Subject: [PATCH 074/114] fix infinite loop on substitutions of empty matches; fixes #26 --- src/pattern.pxi | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/pattern.pxi b/src/pattern.pxi index 0950db2b..f54f1248 100644 --- a/src/pattern.pxi +++ b/src/pattern.pxi @@ -459,7 +459,8 @@ cdef class Pattern: cdef Py_ssize_t size cdef Py_buffer buf cdef int retval - cdef int endpos + cdef int prevendpos = 0 + cdef int endpos = 0 cdef int pos = 0 cdef int encoded = 0 cdef StringPiece * sp @@ -489,7 +490,11 @@ cdef class Pattern: if retval == 0: break + prevendpos = endpos endpos = m.matches[0].data() - cstring + # ignore empty match on latest position + if pos == endpos == prevendpos and num_repl[0] > 1: + break result.extend(sp.data()[pos:endpos]) pos = endpos + m.matches[0].length() @@ -519,7 +524,8 @@ cdef class Pattern: cdef Py_ssize_t size cdef Py_buffer buf cdef int retval - cdef int endpos + cdef int prevendpos = 0 + cdef int endpos = 0 cdef int pos = 0 cdef int encoded = 0 cdef StringPiece * sp @@ -548,7 +554,11 @@ cdef class Pattern: if retval == 0: break + prevendpos = endpos endpos = m.matches[0].data() - cstring + # ignore empty match on latest position + if pos == endpos == prevendpos and num_repl[0] > 1: + break result.extend(sp.data()[pos:endpos]) pos = endpos + m.matches[0].length() From db47c4ec561cb300497bd92ac2fb839dc6281b66 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Thu, 8 Apr 2021 23:37:01 +0200 Subject: [PATCH 075/114] add test for #26 --- tests/test_sub.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_sub.txt b/tests/test_sub.txt index cca1f0b0..194366dc 100644 --- a/tests/test_sub.txt +++ b/tests/test_sub.txt @@ -18,3 +18,8 @@ with an empty string. b7a469f55ab76cd5887c81dbb0cfe6d3 >>> re2.set_fallback_notification(re2.FALLBACK_QUIETLY) + +Issue #26 re2.sub replacements with a match of "(.*)" hangs forever + + >>> re2.sub('(.*)', r'\1;replacement', 'original') + 'original;replacement;replacement' From 8ab3163b57c3ae6acf9c5e8fec2d1ac975ba3e41 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Sat, 10 Apr 2021 16:13:06 +0200 Subject: [PATCH 076/114] improve fix for #26 --- src/pattern.pxi | 22 ++++++++++++---------- tests/test_sub.txt | 6 ++++++ 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/pattern.pxi b/src/pattern.pxi index f54f1248..b8439d20 100644 --- a/src/pattern.pxi +++ b/src/pattern.pxi @@ -459,7 +459,7 @@ cdef class Pattern: cdef Py_ssize_t size cdef Py_buffer buf cdef int retval - cdef int prevendpos = 0 + cdef int prevendpos = -1 cdef int endpos = 0 cdef int pos = 0 cdef int encoded = 0 @@ -490,11 +490,12 @@ cdef class Pattern: if retval == 0: break - prevendpos = endpos endpos = m.matches[0].data() - cstring - # ignore empty match on latest position - if pos == endpos == prevendpos and num_repl[0] > 1: - break + if endpos == prevendpos: + endpos += 1 + if endpos > size: + break + prevendpos = endpos result.extend(sp.data()[pos:endpos]) pos = endpos + m.matches[0].length() @@ -524,7 +525,7 @@ cdef class Pattern: cdef Py_ssize_t size cdef Py_buffer buf cdef int retval - cdef int prevendpos = 0 + cdef int prevendpos = -1 cdef int endpos = 0 cdef int pos = 0 cdef int encoded = 0 @@ -554,11 +555,12 @@ cdef class Pattern: if retval == 0: break - prevendpos = endpos endpos = m.matches[0].data() - cstring - # ignore empty match on latest position - if pos == endpos == prevendpos and num_repl[0] > 1: - break + if endpos == prevendpos: + endpos += 1 + if endpos > size: + break + prevendpos = endpos result.extend(sp.data()[pos:endpos]) pos = endpos + m.matches[0].length() diff --git a/tests/test_sub.txt b/tests/test_sub.txt index 194366dc..b41dd30d 100644 --- a/tests/test_sub.txt +++ b/tests/test_sub.txt @@ -23,3 +23,9 @@ Issue #26 re2.sub replacements with a match of "(.*)" hangs forever >>> re2.sub('(.*)', r'\1;replacement', 'original') 'original;replacement;replacement' + + >>> re2.sub('(.*)', lambda x: x.group() + ';replacement', 'original') + 'original;replacement;replacement' + + >>> re2.subn("b*", lambda x: "X", "xyz", 4) + ('XxXyXzX', 4) From e967c058d1835e49feec1fa6b6e37848c4dc87a2 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Sat, 10 Apr 2021 16:14:19 +0200 Subject: [PATCH 077/114] bump version --- conda.recipe/meta.yaml | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml index dc567039..a2e70793 100644 --- a/conda.recipe/meta.yaml +++ b/conda.recipe/meta.yaml @@ -1,5 +1,5 @@ {% set name = "pyre2" %} -{% set version = "0.3.4.dev0" %} +{% set version = "0.3.4" %} package: name: {{ name|lower }} diff --git a/setup.py b/setup.py index 5ad5bbe1..1f84276b 100755 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ # update the version both here and in conda.recipe/meta.yaml -__version__ = '0.3.4.dev0' +__version__ = '0.3.4' # Convert distutils Windows platform specifiers to CMake -A arguments PLAT_TO_CMAKE = { From 92ad32b3000f0c0cfaf6c8125228f80c9601f7e7 Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Sun, 11 Apr 2021 15:28:24 -0700 Subject: [PATCH 078/114] move pypi upload to end of release.yml, use gitchangelog action Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- .github/workflows/main.yml | 18 ------------------ .github/workflows/release.yml | 24 ++++++++++++++---------- 2 files changed, 14 insertions(+), 28 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e58f0604..dd68e1a3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -82,21 +82,3 @@ jobs: with: path: dist/*.tar.gz - upload_pypi: - needs: [build_wheels, build_sdist] - runs-on: ubuntu-latest - # upload to PyPI on every tag starting with 'v' - if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v') - # alternatively, to publish when a GitHub Release is created, use the following rule: - # if: github.event_name == 'release' && github.event.action == 'published' - steps: - - uses: actions/download-artifact@v2 - with: - name: artifact - path: dist - - - uses: pypa/gh-action-pypi-publish@master - with: - user: __token__ - password: ${{ secrets.pypi_password }} - diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f020c723..fac129bb 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -84,15 +84,13 @@ jobs: # download all artifacts to project dir - uses: actions/download-artifact@v2 - - - name: Install gitchangelog - run: | - pip install git+https://github.com/freepn/gitchangelog@3.0.5#egg=gitchangelog + with: + path: dist - name: Generate changes file - run: | - export GITCHANGELOG_CONFIG_FILENAME=$(get-rcpath) - gitchangelog $(git describe --abbrev=0 ${{ env.VERSION }})..${{ env.VERSION }} > CHANGES.md + uses: sarnold/gitchangelog-action@master + with: + github_token: ${{ secrets.GITHUB_TOKEN}} - name: Create draft release id: create_release @@ -103,7 +101,13 @@ jobs: tag_name: ${{ env.VERSION }} name: Release v${{ env.VERSION }} body_path: CHANGES.md - draft: true - prerelease: true + draft: false + prerelease: false # uncomment below to upload wheels to github releases - # files: wheels/pyre2*.whl + files: dist/cibw-wheels/pyre2*.whl + + - uses: pypa/gh-action-pypi-publish@master + with: + user: __token__ + password: ${{ secrets.pypi_password }} + packages_dir: dist/cibw-wheels/ From 699767ff0238cb0603d939e1a5663f49dce4ef8a Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Tue, 13 Apr 2021 11:45:23 +0200 Subject: [PATCH 079/114] bump version again --- conda.recipe/meta.yaml | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml index a2e70793..8f91b80b 100644 --- a/conda.recipe/meta.yaml +++ b/conda.recipe/meta.yaml @@ -1,5 +1,5 @@ {% set name = "pyre2" %} -{% set version = "0.3.4" %} +{% set version = "0.3.5" %} package: name: {{ name|lower }} diff --git a/setup.py b/setup.py index 1f84276b..6d25c876 100755 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ # update the version both here and in conda.recipe/meta.yaml -__version__ = '0.3.4' +__version__ = '0.3.5' # Convert distutils Windows platform specifiers to CMake -A arguments PLAT_TO_CMAKE = { From d1caa236e0630cc755ba4c4ef20473ea92012183 Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Tue, 4 May 2021 12:18:56 -0700 Subject: [PATCH 080/114] add missing sdist job and artifact check to workflows, bump version Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- .github/workflows/main.yml | 14 ++++++++++++++ .github/workflows/release.yml | 29 +++++++++++++++++++++++------ conda.recipe/meta.yaml | 2 +- setup.py | 4 ++-- 4 files changed, 40 insertions(+), 9 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index dd68e1a3..a9bc2175 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -82,3 +82,17 @@ jobs: with: path: dist/*.tar.gz + check_artifacts: + needs: [build_sdist, build_wheels] + defaults: + run: + shell: bash + name: Check artifacts are correct + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + - uses: actions/download-artifact@v2 + + # note wheels should be in subdirectory <upload_name> + - name: Check number of downloaded artifacts + run: ls -R diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index fac129bb..848fdbb4 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -59,11 +59,30 @@ jobs: - uses: actions/upload-artifact@v2 with: - name: cibw-wheels path: ./wheelhouse/*.whl + build_sdist: + name: Build source distribution + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - uses: actions/setup-python@v2 + name: Install Python + with: + python-version: '3.8' + + - name: Build sdist + run: | + pip install pep517 + python -m pep517.build -s . + + - uses: actions/upload-artifact@v2 + with: + path: dist/*.tar.gz + create_release: - needs: [cibw_wheels] + needs: [build_sdist, cibw_wheels] runs-on: ubuntu-20.04 steps: @@ -84,8 +103,6 @@ jobs: # download all artifacts to project dir - uses: actions/download-artifact@v2 - with: - path: dist - name: Generate changes file uses: sarnold/gitchangelog-action@master @@ -99,7 +116,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: tag_name: ${{ env.VERSION }} - name: Release v${{ env.VERSION }} + name: Release ${{ env.VERSION }} body_path: CHANGES.md draft: false prerelease: false @@ -110,4 +127,4 @@ jobs: with: user: __token__ password: ${{ secrets.pypi_password }} - packages_dir: dist/cibw-wheels/ + packages_dir: artifact/ diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml index 8f91b80b..f4b6bcc5 100644 --- a/conda.recipe/meta.yaml +++ b/conda.recipe/meta.yaml @@ -1,5 +1,5 @@ {% set name = "pyre2" %} -{% set version = "0.3.5" %} +{% set version = "0.3.6" %} package: name: {{ name|lower }} diff --git a/setup.py b/setup.py index 6d25c876..87c65b23 100755 --- a/setup.py +++ b/setup.py @@ -9,8 +9,8 @@ from setuptools.command.build_ext import build_ext -# update the version both here and in conda.recipe/meta.yaml -__version__ = '0.3.5' +# update the release version both here and in conda.recipe/meta.yaml +__version__ = '0.3.6' # Convert distutils Windows platform specifiers to CMake -A arguments PLAT_TO_CMAKE = { From f58792a39ed09a8f0dd80edbb4e4e2ea976c8974 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com> Date: Fri, 30 Jul 2021 02:12:22 +0000 Subject: [PATCH 081/114] Make Match objects subscriptable Fixes #31 --- src/match.pxi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/match.pxi b/src/match.pxi index 3eaae74b..279df6bc 100644 --- a/src/match.pxi +++ b/src/match.pxi @@ -101,6 +101,9 @@ cdef class Match: return None if result is None else result.decode('utf8') return self._group(groupnum) + def __getitem__(self, key): + return self.group(key) + def groupdict(self): result = self._groupdict() if self.encoded: From e8874180c32aa939770030e62e0b43e9077341e6 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com> Date: Fri, 30 Jul 2021 02:16:46 +0000 Subject: [PATCH 082/114] Add test for Match subscripting --- tests/test_re.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_re.py b/tests/test_re.py index 5d78264c..57992778 100644 --- a/tests/test_re.py +++ b/tests/test_re.py @@ -257,6 +257,8 @@ def test_re_match(self): self.assertEqual(m.group(0), 'a') self.assertEqual(m.group(1), 'a') self.assertEqual(m.group(1, 1), ('a', 'a')) + self.assertEqual(m[0], 'a') + self.assertEqual(m[1], 'a') pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?') self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None)) From 72f648b9eb16a5a79420a9310040ec51b1dc1c42 Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Fri, 10 Sep 2021 14:20:13 -0700 Subject: [PATCH 083/114] fix: dev: add conda-only patch for test_emptygroups failure Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- conda.recipe/conda-test_emptygroups.patch | 26 +++++++++++++++++++++++ conda.recipe/meta.yaml | 2 ++ 2 files changed, 28 insertions(+) create mode 100644 conda.recipe/conda-test_emptygroups.patch diff --git a/conda.recipe/conda-test_emptygroups.patch b/conda.recipe/conda-test_emptygroups.patch new file mode 100644 index 00000000..3b6a1109 --- /dev/null +++ b/conda.recipe/conda-test_emptygroups.patch @@ -0,0 +1,26 @@ +diff --git a/tests/test_emptygroups.txt b/tests/test_emptygroups.txt +index 424c8ba..bdfc350 100644 +--- a/tests/test_emptygroups.txt ++++ b/tests/test_emptygroups.txt +@@ -23,14 +23,16 @@ Unused vs. empty group: + + The following show different behavior for re and re2: + +- >>> re.search(r'((.*)*.)', 'a').groups() +- ('a', '') +- >>> re2.search(r'((.*)*.)', 'a').groups() +- ('a', None) +- + >>> re.search(r'((.*)*.)', 'Hello').groups() + ('Hello', '') + >>> re2.search(r'((.*)*.)', 'Hello').groups() + ('Hello', 'Hell') + ++This one was formerly a None vs empty string difference until July 2021: ++ ++ >>> re.search(r'((.*)*.)', 'a').groups() ++ ('a', '') ++ >>> re2.search(r'((.*)*.)', 'a').groups() ++ ('a', '') ++ + >>> re2.set_fallback_notification(re2.FALLBACK_QUIETLY) diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml index f4b6bcc5..f73bce71 100644 --- a/conda.recipe/meta.yaml +++ b/conda.recipe/meta.yaml @@ -7,6 +7,8 @@ package: source: path: .. + patches: + - conda-test_emptygroups.patch build: number: 0 From f3cb8f03f1c50cfb84706c4143d25a9094755b72 Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Fri, 10 Sep 2021 15:06:47 -0700 Subject: [PATCH 084/114] fix: dev: conda build workflow: drop py 3.6 and add py 3.10 Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- .github/workflows/conda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index b93ef097..ef5e8e40 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -13,7 +13,7 @@ jobs: fail-fast: false matrix: platform: [ubuntu-latest, windows-2016, macos-latest] - python-version: [3.6, 3.7, 3.8, 3.9] + python-version: [3.7, 3.8, 3.9, 3.10] runs-on: ${{ matrix.platform }} From de67b09737aa1e47fb61149c0755f1ae2e371128 Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Fri, 10 Sep 2021 15:47:00 -0700 Subject: [PATCH 085/114] fix: dev: limit conda build workflow to py 3.8 and 3.9 Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- .github/workflows/conda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index ef5e8e40..bf3abd30 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -13,7 +13,7 @@ jobs: fail-fast: false matrix: platform: [ubuntu-latest, windows-2016, macos-latest] - python-version: [3.7, 3.8, 3.9, 3.10] + python-version: [3.8, 3.9] runs-on: ${{ matrix.platform }} From bb60374d73cb9bd104f9766156a72048479591ca Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Sun, 12 Sep 2021 11:01:37 -0700 Subject: [PATCH 086/114] fix: apply emptygroups fix and remove conda-only patch, also * release workflow: restrict pypi upload to repo owner * tox.ini: replace deprecated pep517 module, update deploy url Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- .github/workflows/release.yml | 1 + conda.recipe/conda-test_emptygroups.patch | 26 ----------------------- conda.recipe/meta.yaml | 2 -- tests/test_emptygroups.txt | 12 ++++++----- tox.ini | 6 +++--- 5 files changed, 11 insertions(+), 36 deletions(-) delete mode 100644 conda.recipe/conda-test_emptygroups.patch diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 848fdbb4..e1251517 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -124,6 +124,7 @@ jobs: files: dist/cibw-wheels/pyre2*.whl - uses: pypa/gh-action-pypi-publish@master + if: ${{ github.actor == github.repository_owner && github.ref == 'refs/heads/master' }} with: user: __token__ password: ${{ secrets.pypi_password }} diff --git a/conda.recipe/conda-test_emptygroups.patch b/conda.recipe/conda-test_emptygroups.patch deleted file mode 100644 index 3b6a1109..00000000 --- a/conda.recipe/conda-test_emptygroups.patch +++ /dev/null @@ -1,26 +0,0 @@ -diff --git a/tests/test_emptygroups.txt b/tests/test_emptygroups.txt -index 424c8ba..bdfc350 100644 ---- a/tests/test_emptygroups.txt -+++ b/tests/test_emptygroups.txt -@@ -23,14 +23,16 @@ Unused vs. empty group: - - The following show different behavior for re and re2: - -- >>> re.search(r'((.*)*.)', 'a').groups() -- ('a', '') -- >>> re2.search(r'((.*)*.)', 'a').groups() -- ('a', None) -- - >>> re.search(r'((.*)*.)', 'Hello').groups() - ('Hello', '') - >>> re2.search(r'((.*)*.)', 'Hello').groups() - ('Hello', 'Hell') - -+This one was formerly a None vs empty string difference until July 2021: -+ -+ >>> re.search(r'((.*)*.)', 'a').groups() -+ ('a', '') -+ >>> re2.search(r'((.*)*.)', 'a').groups() -+ ('a', '') -+ - >>> re2.set_fallback_notification(re2.FALLBACK_QUIETLY) diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml index f73bce71..f4b6bcc5 100644 --- a/conda.recipe/meta.yaml +++ b/conda.recipe/meta.yaml @@ -7,8 +7,6 @@ package: source: path: .. - patches: - - conda-test_emptygroups.patch build: number: 0 diff --git a/tests/test_emptygroups.txt b/tests/test_emptygroups.txt index 424c8ba2..bdfc3500 100644 --- a/tests/test_emptygroups.txt +++ b/tests/test_emptygroups.txt @@ -23,14 +23,16 @@ Unused vs. empty group: The following show different behavior for re and re2: - >>> re.search(r'((.*)*.)', 'a').groups() - ('a', '') - >>> re2.search(r'((.*)*.)', 'a').groups() - ('a', None) - >>> re.search(r'((.*)*.)', 'Hello').groups() ('Hello', '') >>> re2.search(r'((.*)*.)', 'Hello').groups() ('Hello', 'Hell') +This one was formerly a None vs empty string difference until July 2021: + + >>> re.search(r'((.*)*.)', 'a').groups() + ('a', '') + >>> re2.search(r'((.*)*.)', 'a').groups() + ('a', '') + >>> re2.set_fallback_notification(re2.FALLBACK_QUIETLY) diff --git a/tox.ini b/tox.ini index 0e7a1d30..aa388ad0 100644 --- a/tox.ini +++ b/tox.ini @@ -91,15 +91,15 @@ allowlist_externals = bash deps = pip>=20.0.1 - pep517 + build twine path commands = python -c "import path; path.Path('build').rmtree_p()" - python -m pep517.build . + python -m build . twine check dist/* - python -m pip install https://github.com/freepn/gitchangelog/archive/3.0.5.tar.gz + python -m pip install https://github.com/sarnold/gitchangelog/archive/3.0.7.tar.gz bash -c 'gitchangelog $(git describe --abbrev=0)..' [testenv:check] From e632dbef0ee147df19d7bd346f22c95b4a3314b7 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Tue, 20 Dec 2022 18:55:28 +0100 Subject: [PATCH 087/114] remove python versions for make valgrind --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index c87d3497..3785bdce 100644 --- a/Makefile +++ b/Makefile @@ -18,13 +18,13 @@ distclean: clean rm -rf .tox/ dist/ .pytest_cache/ valgrind: - python3.5-dbg setup.py install --user && \ + python3-dbg setup.py install --user && \ (cd tests && valgrind --tool=memcheck --suppressions=../valgrind-python.supp \ --leak-check=full --show-leak-kinds=definite \ - python3.5-dbg test_re.py) + python3-dbg test_re.py) valgrind2: - python3.5-dbg setup.py install --user && \ + python2-dbg setup.py install --user && \ (cd tests && valgrind --tool=memcheck --suppressions=../valgrind-python.supp \ --leak-check=full --show-leak-kinds=definite \ - python3.5-dbg re2_test.py) + python2-dbg re2_test.py) From 01c73c9dac4f06c0e1e41519fa8f874bf02fb1b0 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Tue, 20 Dec 2022 18:56:04 +0100 Subject: [PATCH 088/114] add NOFLAGS and RegexFlags constants; #41 --- src/re2.pyx | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/re2.pyx b/src/re2.pyx index ffe65442..58c89a28 100644 --- a/src/re2.pyx +++ b/src/re2.pyx @@ -121,8 +121,31 @@ UNICODE = re.UNICODE VERBOSE = re.VERBOSE LOCALE = re.LOCALE DEBUG = re.DEBUG +NOFLAG = 0 # Python 3.11 ASCII = 256 # Python 3 +try: + import enum +except ImportError: + pass +else: + @enum.global_enum + @enum._simple_enum(enum.IntFlag, boundary=enum.KEEP) + class RegexFlag: + NOFLAG = 0 + ASCII = A = re.ASCII # assume ascii "locale" + IGNORECASE = I = re.IGNORECASE # ignore case + LOCALE = L = re.LOCALE # assume current 8-bit locale + UNICODE = U = re.UNICODE # assume unicode "locale" + MULTILINE = M = re.MULTILINE # make anchors look for newline + DOTALL = S = re.DOTALL # make dot match newline + VERBOSE = X = re.VERBOSE # ignore whitespace and comments + # sre extensions (experimental, don't rely on these + # TEMPLATE = T = _compiler.SRE_FLAG_TEMPLATE # unknown purpose, deprecated + DEBUG = re.DEBUG # dump pattern after compilation + __str__ = object.__str__ + _numeric_repr_ = hex + FALLBACK_QUIETLY = 0 FALLBACK_WARNING = 1 FALLBACK_EXCEPTION = 2 @@ -456,6 +479,7 @@ __all__ = [ 'FALLBACK_EXCEPTION', 'FALLBACK_QUIETLY', 'FALLBACK_WARNING', 'DEBUG', 'S', 'DOTALL', 'I', 'IGNORECASE', 'L', 'LOCALE', 'M', 'MULTILINE', 'U', 'UNICODE', 'X', 'VERBOSE', 'VERSION', 'VERSION_HEX', + 'NOFLAG', 'RegexFlag', # classes 'Match', 'Pattern', 'SREPattern', # functions From 017328ad4c057db4934df6f7e7c07a63b000e419 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Tue, 20 Dec 2022 18:59:20 +0100 Subject: [PATCH 089/114] make tests pass on my system; if this behavior turns out to be inconsistent across versions/platforms, maybe the test should be disabled altogether. #27 --- tests/test_emptygroups.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_emptygroups.txt b/tests/test_emptygroups.txt index bdfc3500..4a5bd5bc 100644 --- a/tests/test_emptygroups.txt +++ b/tests/test_emptygroups.txt @@ -28,11 +28,9 @@ The following show different behavior for re and re2: >>> re2.search(r'((.*)*.)', 'Hello').groups() ('Hello', 'Hell') -This one was formerly a None vs empty string difference until July 2021: - >>> re.search(r'((.*)*.)', 'a').groups() ('a', '') >>> re2.search(r'((.*)*.)', 'a').groups() - ('a', '') + ('a', None) >>> re2.set_fallback_notification(re2.FALLBACK_QUIETLY) From 27a0d98bc838474cf8edd002c448c2e2da246f54 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Tue, 20 Dec 2022 19:34:09 +0100 Subject: [PATCH 090/114] document lack of support for possessive quantifiers and atomic groups --- README.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 3f46ff6e..e939dfdc 100644 --- a/README.rst +++ b/README.rst @@ -126,8 +126,10 @@ That being said, there are features of the ``re`` module that this module may never have; these will be handled through fallback to the original ``re`` module: * lookahead assertions ``(?!...)`` -* backreferences (``\\n`` in search pattern) -* \W and \S not supported inside character classes +* backreferences, e.g., ``\\1`` in search pattern +* possessive quantifiers ``*+, ++, ?+, {m,n}+`` +* atomic groups ``(?>...)`` +* ``\W`` and ``\S`` not supported inside character classes On the other hand, unicode character classes are supported (e.g., ``\p{Greek}``). Syntax reference: https://github.com/google/re2/wiki/Syntax From 6bb18c2aca07d9e9c2a2e09e97acbf124db69cff Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Tue, 20 Dec 2022 19:34:49 +0100 Subject: [PATCH 091/114] support fallback to Python re for possessive quantifiers --- src/compile.pxi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compile.pxi b/src/compile.pxi index 887a2778..7d8ca97e 100644 --- a/src/compile.pxi +++ b/src/compile.pxi @@ -87,7 +87,7 @@ def _compile(object pattern, int flags=0, int max_mem=8388608): raise RegexError(error_msg) elif error_code not in (ErrorBadPerlOp, ErrorRepeatSize, # ErrorBadEscape, - ErrorPatternTooLarge): + ErrorRepeatOp, ErrorPatternTooLarge): # Raise an error because these will not be fixed by using the # ``re`` module. raise RegexError(error_msg) From 195c9234ba48aa51a471f0c6a6b459ab3819770b Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Tue, 20 Dec 2022 19:35:01 +0100 Subject: [PATCH 092/114] include current notification level in cache key this prevents a cached regular expression being used that was created with a different notification level. For example, the following now generates the expected warning: In [1]: import re2 In [2]: re2.compile('a*+') Out[2]: re.compile('a*+') In [3]: re2.set_fallback_notification(re2.FALLBACK_WARNING) In [4]: re2.compile('a*+') <ipython-input-5-041122e221c7>:1: UserWarning: WARNING: Using re module. Reason: bad repetition operator: *+ re2.compile('a*+') Out[4]: re.compile('a*+') --- src/compile.pxi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compile.pxi b/src/compile.pxi index 7d8ca97e..588584fd 100644 --- a/src/compile.pxi +++ b/src/compile.pxi @@ -1,6 +1,6 @@ def compile(pattern, int flags=0, int max_mem=8388608): - cachekey = (type(pattern), pattern, flags) + cachekey = (type(pattern), pattern, flags, current_notification) if cachekey in _cache: return _cache[cachekey] p = _compile(pattern, flags, max_mem) From 76cdec765244a8ff24f2e3088af1d3b0b6faba22 Mon Sep 17 00:00:00 2001 From: Andreas van Cranenburgh <andreas@unstable.nl> Date: Thu, 13 Apr 2023 12:47:37 +0200 Subject: [PATCH 093/114] fix #42 --- src/re2.pyx | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/re2.pyx b/src/re2.pyx index 58c89a28..21fa74ee 100644 --- a/src/re2.pyx +++ b/src/re2.pyx @@ -124,11 +124,9 @@ DEBUG = re.DEBUG NOFLAG = 0 # Python 3.11 ASCII = 256 # Python 3 -try: +if sys.version_info[:2] >= (3, 11): import enum -except ImportError: - pass -else: + @enum.global_enum @enum._simple_enum(enum.IntFlag, boundary=enum.KEEP) class RegexFlag: @@ -140,8 +138,6 @@ else: MULTILINE = M = re.MULTILINE # make anchors look for newline DOTALL = S = re.DOTALL # make dot match newline VERBOSE = X = re.VERBOSE # ignore whitespace and comments - # sre extensions (experimental, don't rely on these - # TEMPLATE = T = _compiler.SRE_FLAG_TEMPLATE # unknown purpose, deprecated DEBUG = re.DEBUG # dump pattern after compilation __str__ = object.__str__ _numeric_repr_ = hex From 03caeb00d996ff22353031f091570040ae691fa4 Mon Sep 17 00:00:00 2001 From: Steve Arnold <sarnold@vctlabs.com> Date: Mon, 8 Apr 2024 12:04:27 -0700 Subject: [PATCH 094/114] chg: dev: update python, deps, GH action versions, and tox file * update pybind11 usage and set cmake python vars to Title_CASE * refactor cmake extension build to use pybind11 module bits * move emptygroups test from "differences" Signed-off-by: Steve Arnold <sarnold@vctlabs.com> --- .github/workflows/ci.yml | 8 +++--- .github/workflows/conda.yml | 10 +++---- .github/workflows/main.yml | 20 +++++++------- .github/workflows/release.yml | 26 +++++++++--------- CMakeLists.txt | 33 ++++++++++++----------- cmake/modules/FindCython.cmake | 6 ++--- pyproject.toml | 1 - setup.py | 2 +- src/CMakeLists.txt | 23 +++++++--------- tests/test_emptygroups.txt | 9 +++---- tox.ini | 48 ++++++++++++++++++++-------------- 11 files changed, 95 insertions(+), 91 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4e240970..5a1e8245 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,15 +17,15 @@ jobs: strategy: fail-fast: true matrix: - os: [ubuntu-20.04] - python-version: [3.6, 3.7, 3.8, 3.9] + os: [ubuntu-22.04] + python-version: [3.8, 3.9, '3.10', '3.11', '3.12'] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index bf3abd30..2a298903 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -12,8 +12,8 @@ jobs: strategy: fail-fast: false matrix: - platform: [ubuntu-latest, windows-2016, macos-latest] - python-version: [3.8, 3.9] + platform: [ubuntu-latest, windows-2019, macos-latest] + python-version: [3.8, '3.10'] runs-on: ${{ matrix.platform }} @@ -23,18 +23,18 @@ jobs: shell: "bash -l {0}" steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: fetch-depth: 0 - name: Cache conda - uses: actions/cache@v1 + uses: actions/cache@v4 with: path: ~/conda_pkgs_dir key: ${{matrix.os}}-conda-pkgs-${{hashFiles('**/conda.recipe/meta.yaml')}} - name: Get conda - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: python-version: ${{ matrix.python-version }} channels: conda-forge diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a9bc2175..702aa5c8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,14 +13,14 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-20.04, macos-latest, windows-latest] + os: [ubuntu-22.04, macos-latest, windows-latest] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v5 name: Install Python with: python-version: '3.8' @@ -40,7 +40,7 @@ jobs: env: CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/manylinux2010_x86_64:latest CIBW_MANYLINUX_I686_IMAGE: quay.io/pypa/manylinux2010_i686:latest - CIBW_BUILD: cp36-* cp37-* cp38-* cp39-* + CIBW_BUILD: cp38-* cp39-* cp310-* cp311-* CIBW_SKIP: "*-win32" CIBW_BEFORE_ALL_LINUX: > yum -y -q --enablerepo=extras install epel-release @@ -58,7 +58,7 @@ jobs: run: | python -m cibuildwheel --output-dir wheelhouse - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v4 with: path: ./wheelhouse/*.whl @@ -66,9 +66,9 @@ jobs: name: Build source distribution runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v5 name: Install Python with: python-version: '3.7' @@ -78,7 +78,7 @@ jobs: pip install pep517 python -m pep517.build -s . - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v4 with: path: dist/*.tar.gz @@ -90,8 +90,8 @@ jobs: name: Check artifacts are correct runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 - - uses: actions/download-artifact@v2 + - uses: actions/checkout@v4 + - uses: actions/download-artifact@v4 # note wheels should be in subdirectory <upload_name> - name: Check number of downloaded artifacts diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e1251517..3fd96e07 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,14 +12,14 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-20.04, macos-latest, windows-latest] + os: [ubuntu-22.04, macos-latest, windows-latest] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v5 name: Install Python with: python-version: '3.8' @@ -39,7 +39,7 @@ jobs: env: CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/manylinux2010_x86_64:latest CIBW_MANYLINUX_I686_IMAGE: quay.io/pypa/manylinux2010_i686:latest - CIBW_BUILD: cp36-* cp37-* cp38-* cp39-* + CIBW_BUILD: cp38-* cp39-* cp310-* cp311-* CIBW_SKIP: "*-win32" CIBW_BEFORE_ALL_LINUX: > yum -y -q --enablerepo=extras install epel-release @@ -57,7 +57,7 @@ jobs: run: | python -m cibuildwheel --output-dir wheelhouse - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v4 with: path: ./wheelhouse/*.whl @@ -65,9 +65,9 @@ jobs: name: Build source distribution runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v5 name: Install Python with: python-version: '3.8' @@ -77,13 +77,13 @@ jobs: pip install pep517 python -m pep517.build -s . - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v4 with: path: dist/*.tar.gz create_release: needs: [build_sdist, cibw_wheels] - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - name: Get version @@ -92,17 +92,17 @@ jobs: echo "VERSION=${GITHUB_REF/refs\/tags\//}" >> $GITHUB_ENV echo ${{ env.VERSION }} - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v5 name: Install Python with: python-version: 3.7 # download all artifacts to project dir - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v4 - name: Generate changes file uses: sarnold/gitchangelog-action@master @@ -111,7 +111,7 @@ jobs: - name: Create draft release id: create_release - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@main env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: diff --git a/CMakeLists.txt b/CMakeLists.txt index 83eff6e0..d12bc608 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.15...3.18) +cmake_minimum_required(VERSION 3.15...3.28) project(re2 LANGUAGES CXX C) @@ -9,8 +9,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) if(CMAKE_CXX_COMPILER_ID STREQUAL Clang) - set(CLANG_DEFAULT_CXX_STDLIB libc++) - set(CLANG_DEFAULT_RTLIB compiler-rt) + set(CLANG_DEFAULT_CXX_STDLIB libc++) + set(CLANG_DEFAULT_RTLIB compiler-rt) endif() if(NOT CMAKE_BUILD_TYPE) @@ -20,27 +20,30 @@ endif() include(GNUInstallDirs) +# get rid of FindPython old warnings, refactor FindCython module +set(CMP0148 NEW) + find_package(pybind11 CONFIG) if(pybind11_FOUND) - message(STATUS "System pybind11 found") + message(STATUS "System pybind11 found") else() - message(STATUS "Fetching pybind11 from github") - # Fetch pybind11 - include(FetchContent) - - FetchContent_Declare( - pybind11 - GIT_REPOSITORY https://github.com/pybind/pybind11 - GIT_TAG v2.6.1 - ) - FetchContent_MakeAvailable(pybind11) + message(STATUS "Fetching pybind11 from github") + # Fetch pybind11 + include(FetchContent) + + FetchContent_Declare( + pybind11 + GIT_REPOSITORY https://github.com/pybind/pybind11 + GIT_TAG v2.12.0 + ) + FetchContent_MakeAvailable(pybind11) endif() find_package(Threads REQUIRED) if (${PYTHON_IS_DEBUG}) - set(PY_DEBUG ON) + set(PY_DEBUG ON) endif() set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} diff --git a/cmake/modules/FindCython.cmake b/cmake/modules/FindCython.cmake index 04aed1f8..c53e2b83 100644 --- a/cmake/modules/FindCython.cmake +++ b/cmake/modules/FindCython.cmake @@ -24,9 +24,9 @@ # Use the Cython executable that lives next to the Python executable # if it is a local installation. -find_package( PythonInterp ) -if( PYTHONINTERP_FOUND ) - get_filename_component( _python_path ${PYTHON_EXECUTABLE} PATH ) +find_package(Python) +if( Python_FOUND ) + get_filename_component( _python_path ${Python_EXECUTABLE} PATH ) find_program( CYTHON_EXECUTABLE NAMES cython cython.bat cython3 HINTS ${_python_path} diff --git a/pyproject.toml b/pyproject.toml index 18c975e1..d2d6a428 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,6 @@ [build-system] requires = [ "setuptools>=42", - "wheel", "Cython>=0.20", "pybind11>=2.6.0", "ninja; sys_platform != 'Windows'", diff --git a/setup.py b/setup.py index 87c65b23..e76c5e3d 100755 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ def build_extension(self, ext): # from Python. cmake_args = [ "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={}".format(extdir), - "-DPYTHON_EXECUTABLE={}".format(sys.executable), + "-DPython_EXECUTABLE={}".format(sys.executable), "-DSCM_VERSION_INFO={}".format(__version__), "-DCMAKE_BUILD_TYPE={}".format(cfg), # not used on MSVC, but no harm ] diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 61d63aa3..8e0372d2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -21,16 +21,11 @@ add_custom_command(OUTPUT ${cython_output} DEPENDS ${cy_srcs} COMMENT "Cythonizing extension ${cython_src}") -add_library(${cython_module} MODULE ${cython_output}) +pybind11_add_module(${cython_module} MODULE ${cython_output}) -set_target_properties(${cython_module} - PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}" - SUFFIX "${PYTHON_MODULE_EXTENSION}") - -target_include_directories(${cython_module} PUBLIC - ${PYTHON_INCLUDE_DIRS}) - -target_compile_definitions(${cython_module} PRIVATE VERSION_INFO=${SCM_VERSION_INFO}) +target_compile_definitions( + ${cython_module} PRIVATE VERSION_INFO=${SCM_VERSION_INFO} +) # here we get to jump through some hoops to find libre2 on the manylinux # docker CI images, etc @@ -57,12 +52,12 @@ endif() if(APPLE) # macos/appleclang needs this - target_link_libraries(${cython_module} PRIVATE pybind11::module) - target_link_libraries(${cython_module} PRIVATE pybind11::python_link_helper) + target_link_libraries(${cython_module} PUBLIC pybind11::module) + target_link_libraries(${cython_module} PUBLIC pybind11::python_link_helper) endif() if(MSVC) - target_compile_options(${cython_module} PRIVATE /utf-8) - target_link_libraries(${cython_module} PRIVATE ${PYTHON_LIBRARIES}) - target_link_libraries(${cython_module} PRIVATE pybind11::windows_extras) + target_compile_options(${cython_module} PUBLIC /utf-8) + target_link_libraries(${cython_module} PUBLIC ${Python_LIBRARIES}) + target_link_libraries(${cython_module} PUBLIC pybind11::windows_extras) endif() diff --git a/tests/test_emptygroups.txt b/tests/test_emptygroups.txt index 4a5bd5bc..b55ca650 100644 --- a/tests/test_emptygroups.txt +++ b/tests/test_emptygroups.txt @@ -20,6 +20,10 @@ Unused vs. empty group: ('a', '') >>> re2.search(r'((.*)+.)', 'a').groups() ('a', '') + >>> re.search(r'((.*)*.)', 'a').groups() + ('a', '') + >>> re2.search(r'((.*)*.)', 'a').groups() + ('a', '') The following show different behavior for re and re2: @@ -28,9 +32,4 @@ The following show different behavior for re and re2: >>> re2.search(r'((.*)*.)', 'Hello').groups() ('Hello', 'Hell') - >>> re.search(r'((.*)*.)', 'a').groups() - ('a', '') - >>> re2.search(r'((.*)*.)', 'a').groups() - ('a', None) - >>> re2.set_fallback_notification(re2.FALLBACK_QUIETLY) diff --git a/tox.ini b/tox.ini index aa388ad0..32143d92 100644 --- a/tox.ini +++ b/tox.ini @@ -1,14 +1,23 @@ [tox] -envlist = py3{6,7,8,9} +envlist = py3{7,8,9,10,11,12} skip_missing_interpreters = true isolated_build = true skipsdist=True [gh-actions] -3.6 = py36 -3.7 = py37 -3.8 = py38 -3.9 = py39 +python = + 3.7: py37 + 3.8: py38 + 3.9: py39 + 3.10: py310 + 3.11: py311 + 3.12: py312 + +[gh-actions:env] +PLATFORM = + ubuntu-22.04: linux + macos-latest: macos + windows-latest: windows [testenv] passenv = @@ -22,11 +31,9 @@ passenv = deps = pip>=20.0.1 - path + -e .[test] commands = - python -c "import path; path.Path('build').rmtree_p()" - pip install -e .[test] pytest -v . [testenv:dev] @@ -47,11 +54,9 @@ setenv = deps = pip>=20.0.1 cython>=0.20 - path pytest commands = - python -c "import path; path.Path('build').rmtree_p()" python setup.py build_ext --inplace # use --capture=no to see all the doctest output python -m pytest -v --ignore=tests/test_re.py --doctest-glob=*.txt . @@ -69,14 +74,12 @@ passenv = deps = pip>=20.0.1 - path + .[perf] commands = - python -c "import path; path.Path('build').rmtree_p()" - pip install .[perf] python tests/performance.py -[testenv:deploy] +[testenv:build] passenv = pythonLocation CI @@ -93,14 +96,10 @@ deps = pip>=20.0.1 build twine - path commands = - python -c "import path; path.Path('build').rmtree_p()" python -m build . twine check dist/* - python -m pip install https://github.com/sarnold/gitchangelog/archive/3.0.7.tar.gz - bash -c 'gitchangelog $(git describe --abbrev=0)..' [testenv:check] skip_install = true @@ -111,9 +110,18 @@ allowlist_externals = bash deps = pip>=20.0.1 - pytest commands = pip install pyre2 --force-reinstall --prefer-binary -f dist/ python -m unittest discover -f -s . - #pytest --doctest-glob="*.txt" + +[testenv:clean] +skip_install = true +allowlist_externals = + bash + +deps = + pip>=21.1 + +commands = + bash -c 'rm -rf *.egg-info re2*.so .coverage.* tests/__pycache__ dist/ build/' From fb2d2dff9bfc153997c9f8a8a92722290345a598 Mon Sep 17 00:00:00 2001 From: Steve Arnold <sarnold@vctlabs.com> Date: Mon, 8 Apr 2024 20:13:48 -0700 Subject: [PATCH 095/114] fix: dev: remove failing subscript test in single match group * cleanup asserts and add groups() test Signed-off-by: Steve Arnold <sarnold@vctlabs.com> --- tests/test_re.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_re.py b/tests/test_re.py index 57992778..a5644148 100644 --- a/tests/test_re.py +++ b/tests/test_re.py @@ -254,11 +254,10 @@ def test_re_match(self): # A single group m = re.match('(a)', 'a') self.assertEqual(m.group(0), 'a') - self.assertEqual(m.group(0), 'a') self.assertEqual(m.group(1), 'a') + self.assertEqual(m.group(0, 0), ('a', 'a')) self.assertEqual(m.group(1, 1), ('a', 'a')) - self.assertEqual(m[0], 'a') - self.assertEqual(m[1], 'a') + self.assertEqual(m.groups(), ('a',)) pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?') self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None)) From f3d03acdd41eb4f2f9751898ef4d0ad7c52d98eb Mon Sep 17 00:00:00 2001 From: Steve Arnold <sarnold@vctlabs.com> Date: Mon, 8 Apr 2024 20:18:57 -0700 Subject: [PATCH 096/114] chg: dev: update packaging files, add setuptools_scm support * refactor setup.py after pybind11 upstream changes Signed-off-by: Steve Arnold <sarnold@vctlabs.com> --- pyproject.toml | 5 ++- setup.cfg | 8 ++--- setup.py | 85 ++++++++++++++++++++++++++++---------------------- tox.ini | 49 ++++++++++++++++++++++------- 4 files changed, 93 insertions(+), 54 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d2d6a428..27348999 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,14 +1,17 @@ [build-system] requires = [ "setuptools>=42", + "setuptools_scm[toml]>=6.2", "Cython>=0.20", "pybind11>=2.6.0", "ninja; sys_platform != 'Windows'", - "cmake>=3.15", + "cmake>=3.18", ] build-backend = "setuptools.build_meta" +[tool.setuptools_scm] + [tool.pytest.ini_options] minversion = "6.0" testpaths = [ diff --git a/setup.cfg b/setup.cfg index 5223702a..36d2928f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,6 @@ [metadata] name = pyre2 +version = attr: setuptools_scm.get_version author = Andreas van Cranenburgh author_email = andreas@unstable.nl maintainer = Steve Arnold @@ -20,7 +21,8 @@ classifiers = [options] python_requires = >=3.6 -zip_safe = False +setup_requires = + setuptools_scm[toml] [options.extras_require] test = @@ -33,7 +35,3 @@ perf = # these error codes interfere with Black ignore = E203, E231, E501, W503, B950 select = C,E,F,W,B,B9 - -[egg_info] -tag_build = -tag_date = 0 diff --git a/setup.py b/setup.py index e76c5e3d..588ff332 100755 --- a/setup.py +++ b/setup.py @@ -2,16 +2,14 @@ # import os -import sys +import re import subprocess +import sys +from pathlib import Path from setuptools import setup, Extension from setuptools.command.build_ext import build_ext - -# update the release version both here and in conda.recipe/meta.yaml -__version__ = '0.3.6' - # Convert distutils Windows platform specifiers to CMake -A arguments PLAT_TO_CMAKE = { "win32": "Win32", @@ -20,42 +18,39 @@ "win-arm64": "ARM64", } + # A CMakeExtension needs a sourcedir instead of a file list. class CMakeExtension(Extension): - def __init__(self, name, sourcedir=""): - Extension.__init__(self, name, sources=[], libraries=['re2']) - self.sourcedir = os.path.abspath(sourcedir) + def __init__(self, name: str, sourcedir: str = "") -> None: + super().__init__(name, sources=[], libraries=['re2']) + self.sourcedir = os.fspath(Path(sourcedir).resolve()) class CMakeBuild(build_ext): + def build_extension(self, ext: CMakeExtension) -> None: + # Must be in this form due to bug in .resolve() only fixed in Python 3.10+ + ext_fullpath = Path.cwd() / self.get_ext_fullpath(ext.name) + extdir = ext_fullpath.parent.resolve() - def build_extension(self, ext): - extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) + # Using this requires trailing slash for auto-detection & inclusion of + # auxiliary "native" libs - # required for auto-detection of auxiliary "native" libs - if not extdir.endswith(os.path.sep): - extdir += os.path.sep - - # Set a sensible default build type for packaging - if "CMAKE_BUILD_OVERRIDE" not in os.environ: - cfg = "Debug" if self.debug else "RelWithDebInfo" - else: - cfg = os.environ.get("CMAKE_BUILD_OVERRIDE", "") + debug = int(os.environ.get("DEBUG", 0)) if self.debug is None else self.debug + cfg = "Debug" if debug else "Release" # CMake lets you override the generator - we need to check this. # Can be set with Conda-Build, for example. cmake_generator = os.environ.get("CMAKE_GENERATOR", "") # Set Python_EXECUTABLE instead if you use PYBIND11_FINDPYTHON - # SCM_VERSION_INFO shows you how to pass a value into the C++ code + # EXAMPLE_VERSION_INFO shows you how to pass a value into the C++ code # from Python. cmake_args = [ - "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={}".format(extdir), - "-DPython_EXECUTABLE={}".format(sys.executable), - "-DSCM_VERSION_INFO={}".format(__version__), - "-DCMAKE_BUILD_TYPE={}".format(cfg), # not used on MSVC, but no harm + f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}{os.sep}", + f"-DPython_EXECUTABLE={sys.executable}", + f"-DCMAKE_BUILD_TYPE={cfg}", # not used on MSVC, but no harm ] - build_args = ["--verbose"] + build_args = [] # CMake also lets you provide a toolchain file. # Can be set in CI build environments for example. @@ -63,17 +58,27 @@ def build_extension(self, ext): if cmake_toolchain_file: cmake_args += ["-DCMAKE_TOOLCHAIN_FILE={}".format(cmake_toolchain_file)] + cmake_args += [f"-DSCM_VERSION_INFO={self.distribution.get_version()}"] + if self.compiler.compiler_type != "msvc": # Using Ninja-build since it a) is available as a wheel and b) # multithreads automatically. MSVC would require all variables be # exported for Ninja to pick it up, which is a little tricky to do. # Users can override the generator with CMAKE_GENERATOR in CMake # 3.15+. - if not cmake_generator: - cmake_args += ["-GNinja"] + if not cmake_generator or cmake_generator == "Ninja": + try: + import ninja + + ninja_executable_path = Path(ninja.BIN_DIR) / "ninja" + cmake_args += [ + "-GNinja", + f"-DCMAKE_MAKE_PROGRAM:FILEPATH={ninja_executable_path}", + ] + except ImportError: + pass else: - # Single config generators are handled "normally" single_config = any(x in cmake_generator for x in {"NMake", "Ninja"}) @@ -89,10 +94,16 @@ def build_extension(self, ext): # Multi-config generators have a different way to specify configs if not single_config: cmake_args += [ - "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), extdir) + f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}" ] build_args += ["--config", cfg] + if sys.platform.startswith("darwin"): + # Cross-compile support for macOS - respect ARCHFLAGS if set + archs = re.findall(r"-arch (\S+)", os.environ.get("ARCHFLAGS", "")) + if archs: + cmake_args += ["-DCMAKE_OSX_ARCHITECTURES={}".format(";".join(archs))] + # Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level # across all generators. if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ: @@ -100,21 +111,21 @@ def build_extension(self, ext): # using -j in the build_ext call, not supported by pip or PyPA-build. if hasattr(self, "parallel") and self.parallel: # CMake 3.12+ only. - build_args += ["-j{}".format(self.parallel)] + build_args += [f"-j{self.parallel}"] - if not os.path.exists(self.build_temp): - os.makedirs(self.build_temp) + build_temp = Path(self.build_temp) / ext.name + if not build_temp.exists(): + build_temp.mkdir(parents=True) - subprocess.check_call( - ["cmake", ext.sourcedir] + cmake_args, cwd=self.build_temp + subprocess.run( + ["cmake", ext.sourcedir, *cmake_args], cwd=build_temp, check=True ) - subprocess.check_call( - ["cmake", "--build", "."] + build_args, cwd=self.build_temp + subprocess.run( + ["cmake", "--build", ".", *build_args], cwd=build_temp, check=True ) setup( - version=__version__, ext_modules=[CMakeExtension('re2')], cmdclass={'build_ext': CMakeBuild}, zip_safe=False, diff --git a/tox.ini b/tox.ini index 32143d92..616be0fb 100644 --- a/tox.ini +++ b/tox.ini @@ -19,8 +19,28 @@ PLATFORM = macos-latest: macos windows-latest: windows +[base] +deps = + pip>=21.1 + setuptools_scm[toml] + +[build] +deps = + pip>=21.1 + build + twine + [testenv] +skip_install = true + +setenv = + PYTHONPATH = {toxinidir} + passenv = + HOME + USERNAME + USER + XDG_* CI CC CXX @@ -29,9 +49,12 @@ passenv = CMAKE_GENERATOR PIP_DOWNLOAD_CACHE +allowlist_externals = + bash + deps = - pip>=20.0.1 - -e .[test] + {[base]deps} + .[test] commands = pytest -v . @@ -40,6 +63,10 @@ commands = skip_install = true passenv = + HOME + USERNAME + USER + XDG_* CI CC CXX @@ -49,15 +76,17 @@ passenv = PIP_DOWNLOAD_CACHE setenv = - PYTHONPATH=. + PYTHONPATH = {toxinidir} deps = - pip>=20.0.1 - cython>=0.20 - pytest + {[base]deps} + cython + -r requirements-dev.txt + -e . commands = - python setup.py build_ext --inplace + # this is deprecated => _DeprecatedInstaller warning from setuptools + #python setup.py build_ext --inplace # use --capture=no to see all the doctest output python -m pytest -v --ignore=tests/test_re.py --doctest-glob=*.txt . python -m pytest -v tests/test_re.py @@ -73,7 +102,7 @@ passenv = PIP_DOWNLOAD_CACHE deps = - pip>=20.0.1 + {[base]deps} .[perf] commands = @@ -93,9 +122,7 @@ passenv = allowlist_externals = bash deps = - pip>=20.0.1 - build - twine + {[build]deps} commands = python -m build . From b98576414378cc4f31e838edb18fad3203df0ae6 Mon Sep 17 00:00:00 2001 From: Steve Arnold <sarnold@vctlabs.com> Date: Mon, 8 Apr 2024 21:20:16 -0700 Subject: [PATCH 097/114] chg: dev: bump cibw version, update workflows and min py version Signed-off-by: Steve Arnold <sarnold@vctlabs.com> --- .github/workflows/ci.yml | 26 ++++++----- .github/workflows/conda.yml | 87 +++++++++++++++++++++---------------- .github/workflows/main.yml | 22 +++++----- environment.devenv.yml | 14 ++++++ requirements-cibw.txt | 2 +- setup.cfg | 4 +- setup.py | 2 +- 7 files changed, 94 insertions(+), 63 deletions(-) create mode 100644 environment.devenv.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5a1e8245..d1422423 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,9 +3,13 @@ name: Smoke on: workflow_dispatch: pull_request: + push: + branches: + - master jobs: python_wheels: + name: Python wheels for ${{ matrix.python-version }} on ${{ matrix.os }} runs-on: ${{ matrix.os }} defaults: run: @@ -13,12 +17,13 @@ jobs: env: OS: ${{ matrix.os }} PYTHON: ${{ matrix.python-version }} + PYTHONIOENCODING: utf-8 PIP_DOWNLOAD_CACHE: ${{ github.workspace }}/../.pip_download_cache strategy: - fail-fast: true + fail-fast: false matrix: os: [ubuntu-22.04] - python-version: [3.8, 3.9, '3.10', '3.11', '3.12'] + python-version: [3.8, 3.9, '3.10', '3.11'] steps: - uses: actions/checkout@v4 @@ -35,21 +40,20 @@ jobs: pip install tox tox-gh-actions - name: Install Ubuntu build deps - if: runner.os == 'Linux' run: | sudo apt-get -qq update - sudo apt-get install -y software-properties-common - sudo add-apt-repository -y -s ppa:nerdboy/embedded - sudo apt-get install -y pybind11-dev libre2-dev ninja-build + sudo apt-get install -y libre2-dev - - name: Test in place + - name: Test using pip install run: | - tox -e dev + tox + env: + PLATFORM: ${{ matrix.os }} - - name: Build dist pkgs + - name: Build sdist and wheel pkgs run: | - tox -e deploy + tox -e build - - name: Check wheel + - name: Test using built wheel run: | tox -e check diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index 2a298903..af361830 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -1,55 +1,68 @@ -name: Conda +name: CondaDev on: workflow_dispatch: - pull_request: push: branches: - master + pull_request: jobs: build: + name: Test on Python ${{ matrix.python-version }} and ${{ matrix.os }} + runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - platform: [ubuntu-latest, windows-2019, macos-latest] - python-version: [3.8, '3.10'] - - runs-on: ${{ matrix.platform }} - - # The setup-miniconda action needs this to activate miniconda - defaults: - run: - shell: "bash -l {0}" + os: ['macos-11', 'ubuntu-22.04'] + python-version: ['3.8', '3.11'] + env: + OS: ${{ matrix.os }} + PYTHON: ${{ matrix.python-version }} + PYTHONIOENCODING: utf-8 steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Cache conda - uses: actions/cache@v4 - with: - path: ~/conda_pkgs_dir - key: ${{matrix.os}}-conda-pkgs-${{hashFiles('**/conda.recipe/meta.yaml')}} - - - name: Get conda - uses: conda-incubator/setup-miniconda@v3 - with: - python-version: ${{ matrix.python-version }} - channels: conda-forge - channel-priority: strict - use-only-tar-bz2: true - auto-activate-base: true + - uses: actions/checkout@v4 + with: + fetch-depth: 0 - - name: Prepare - run: conda install conda-build conda-verify + - uses: conda-incubator/setup-miniconda@v3 + with: + auto-update-conda: true + python-version: ${{ matrix.python-version }} + channels: conda-forge + channel-priority: strict + use-only-tar-bz2: true - - name: Build - run: conda build conda.recipe + - name: Cache conda packages + id: cache + uses: actions/cache@v4 + env: + # Increase this value to reset cache and rebuild the env during the PR + CACHE_NUMBER: 3 + with: + path: /home/runner/conda_pkgs_dir + key: + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-${{hashFiles('environment.devenv.yml') }} + restore-keys: | + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}- + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- - - name: Install - run: conda install -c ${CONDA_PREFIX}/conda-bld/ pyre2 + - name: Configure condadev environment + shell: bash -l {0} + env: + PY_VER: ${{ matrix.python-version }} + run: | + conda config --set always_yes yes --set changeps1 no + conda config --add channels conda-forge + conda install conda-devenv + conda devenv - - name: Test - run: python -m unittest discover -f -s tests/ + - name: Build and test + shell: bash -l {0} + env: + PY_VER: ${{ matrix.python-version }} + run: | + source activate pyre2 + python -m pip install .[test] -vv + python -m pytest -v . diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 702aa5c8..21855fbc 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -3,17 +3,17 @@ name: Build on: workflow_dispatch: pull_request: - push: - branches: - - master + #push: + #branches: + #- master jobs: - build_wheels: - name: Build wheels on ${{ matrix.os }} for Python + cibw_wheels: runs-on: ${{ matrix.os }} strategy: + fail-fast: false matrix: - os: [ubuntu-22.04, macos-latest, windows-latest] + os: [ubuntu-22.04, macos-11, windows-2019] steps: - uses: actions/checkout@v4 @@ -71,19 +71,19 @@ jobs: - uses: actions/setup-python@v5 name: Install Python with: - python-version: '3.7' + python-version: '3.8' - name: Build sdist run: | - pip install pep517 - python -m pep517.build -s . + pip install build + python -m build -s . - uses: actions/upload-artifact@v4 with: path: dist/*.tar.gz check_artifacts: - needs: [build_sdist, build_wheels] + needs: [build_sdist, cibw_wheels] defaults: run: shell: bash @@ -95,4 +95,4 @@ jobs: # note wheels should be in subdirectory <upload_name> - name: Check number of downloaded artifacts - run: ls -R + run: ls -l artifact diff --git a/environment.devenv.yml b/environment.devenv.yml new file mode 100644 index 00000000..dcdee670 --- /dev/null +++ b/environment.devenv.yml @@ -0,0 +1,14 @@ +name: pyre2 + +dependencies: + - python ==3.11 + - cmake >=3.18 + - ninja + - cython + - cxx-compiler + - pybind11 + - pip + - re2 + - pytest + - regex + - six diff --git a/requirements-cibw.txt b/requirements-cibw.txt index 932364dd..3d34eb3b 100644 --- a/requirements-cibw.txt +++ b/requirements-cibw.txt @@ -1 +1 @@ -cibuildwheel==1.7.4 +cibuildwheel==2.11.3 diff --git a/setup.cfg b/setup.cfg index 36d2928f..f8c4c6bc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -14,12 +14,12 @@ license_files = LICENSE classifiers = License :: OSI Approved :: BSD License Programming Language :: Cython - Programming Language :: Python :: 3.6 + Programming Language :: Python :: 3.8 Intended Audience :: Developers Topic :: Software Development :: Libraries :: Python Modules [options] -python_requires = >=3.6 +python_requires = >=3.8 setup_requires = setuptools_scm[toml] diff --git a/setup.py b/setup.py index 588ff332..814368ae 100755 --- a/setup.py +++ b/setup.py @@ -128,5 +128,5 @@ def build_extension(self, ext: CMakeExtension) -> None: setup( ext_modules=[CMakeExtension('re2')], cmdclass={'build_ext': CMakeBuild}, - zip_safe=False, + package_dir={'': 'src'}, ) From 7fea650c89fe60f77522ee70eb079017cef21a0f Mon Sep 17 00:00:00 2001 From: Steve Arnold <sarnold@vctlabs.com> Date: Tue, 9 Apr 2024 18:49:21 -0700 Subject: [PATCH 098/114] chg: dev: add/update cfgs, modernize cibw build workflow Signed-off-by: Steve Arnold <sarnold@vctlabs.com> --- .github/workflows/ci.yml | 6 +----- .github/workflows/main.yml | 42 ++++++++++++++++++-------------------- .pep8speaks.yml | 15 ++++++++++++++ pyproject.toml | 2 +- setup.cfg | 22 ++++++++++++++++++-- 5 files changed, 57 insertions(+), 30 deletions(-) create mode 100644 .pep8speaks.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d1422423..95c25204 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,8 +52,4 @@ jobs: - name: Build sdist and wheel pkgs run: | - tox -e build - - - name: Test using built wheel - run: | - tox -e check + tox -e build,check diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 21855fbc..0bdccb53 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -25,38 +25,36 @@ jobs: with: python-version: '3.8' - - name: Prepare compiler environment for Windows - if: runner.os == 'Windows' - uses: ilammy/msvc-dev-cmd@v1 + - name: Set up QEMU + if: runner.os == 'Linux' + uses: docker/setup-qemu-action@v3 with: - arch: amd64 - - - name: Install cibuildwheel - run: | - python -m pip install --upgrade pip - python -m pip install -r requirements-cibw.txt + platforms: all - name: Build wheels + uses: pypa/cibuildwheel@v2.17 env: - CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/manylinux2010_x86_64:latest - CIBW_MANYLINUX_I686_IMAGE: quay.io/pypa/manylinux2010_i686:latest - CIBW_BUILD: cp38-* cp39-* cp310-* cp311-* - CIBW_SKIP: "*-win32" + # configure cibuildwheel to build native archs ('auto'), and some + # emulated ones, plus cross-compile on macos + CIBW_ARCHS_LINUX: auto aarch64 + CIBW_ARCHS_MACOS: auto arm64 + CIBW_TEST_SKIP: "*_arm64 *universal2:arm64 *linux_i686" + CIBW_ARCHS_WINDOWS: auto64 + CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014 + CIBW_MANYLINUX_I686_IMAGE: manylinux2010 + CIBW_BUILD: cp37-* cp38-* cp39-* cp310-* cp311-* + CIBW_SKIP: "*musllinux* cp311-*i686" CIBW_BEFORE_ALL_LINUX: > - yum -y -q --enablerepo=extras install epel-release - && yum install -y re2-devel ninja-build - CIBW_REPAIR_WHEEL_COMMAND_LINUX: "auditwheel show {wheel} && auditwheel repair -w {dest_dir} {wheel}" + yum -y install epel-release && yum install -y re2-devel ninja-build CIBW_BEFORE_ALL_MACOS: > - brew install re2 pybind11 ninja - CIBW_ENVIRONMENT_MACOS: MACOSX_DEPLOYMENT_TARGET=10.09 - CIBW_REPAIR_WHEEL_COMMAND_MACOS: "pip uninstall -y delocate && pip install git+https://github.com/Chia-Network/delocate.git && delocate-listdeps {wheel} && delocate-wheel -w {dest_dir} -v {wheel}" + brew install re2 pybind11 + #CIBW_ENVIRONMENT_MACOS: MACOSX_DEPLOYMENT_TARGET=10.14 CIBW_BEFORE_ALL_WINDOWS: > vcpkg install re2:x64-windows && vcpkg integrate install CIBW_ENVIRONMENT_WINDOWS: 'CMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake' - CIBW_TEST_COMMAND: python -c "import re2" - run: | - python -m cibuildwheel --output-dir wheelhouse + CIBW_TEST_REQUIRES: pytest + CIBW_TEST_COMMAND: pytest -v {package}/tests/test_re.py - uses: actions/upload-artifact@v4 with: diff --git a/.pep8speaks.yml b/.pep8speaks.yml new file mode 100644 index 00000000..4f120b0d --- /dev/null +++ b/.pep8speaks.yml @@ -0,0 +1,15 @@ +scanner: + diff_only: True # If False, the entire file touched by the Pull Request is scanned for errors. If True, only the diff is scanned. + linter: flake8 # Other option is pycodestyle + +no_blank_comment: False # If True, no comment is made on PR without any errors. +descending_issues_order: True # If True, PEP 8 issues in message will be displayed in descending order of line numbers in the file + +pycodestyle: # Same as scanner.linter value. Other option is flake8 + max-line-length: 90 # Default is 79 in PEP 8 + +flake8: + max-line-length: 90 # Default is 79 in PEP 8 + exclude: + - tests + - docs diff --git a/pyproject.toml b/pyproject.toml index 27348999..02531078 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = [ "setuptools>=42", "setuptools_scm[toml]>=6.2", "Cython>=0.20", - "pybind11>=2.6.0", + "pybind11>=2.11.1", "ninja; sys_platform != 'Windows'", "cmake>=3.18", ] diff --git a/setup.cfg b/setup.cfg index f8c4c6bc..f4bf3678 100644 --- a/setup.cfg +++ b/setup.cfg @@ -31,7 +31,25 @@ test = perf = regex +[aliases] +test=pytest + +[check] +metadata = true +restructuredtext = true +strict = false + +[check-manifest] +ignore = + .gitattributes + .gitchangelog.rc + .gitignore + conda/** + [flake8] # these error codes interfere with Black -ignore = E203, E231, E501, W503, B950 -select = C,E,F,W,B,B9 +#ignore = E203, E231, E501, W503, B950, +ignore = E225,E226,E227,E402,E703,E999 +max-line-length = 90 +filename = *.pyx, *.px* +exclude = .git, .eggs, *.egg, .tox, build From 9e6daec3b312195c2d2ae19a4d1279f3ab382f4a Mon Sep 17 00:00:00 2001 From: Steve Arnold <sarnold@vctlabs.com> Date: Tue, 9 Apr 2024 21:43:56 -0700 Subject: [PATCH 099/114] chg: dev: update cmake and workflow files, delete unused bits * cleanup ci workflow, remove crufty makefile with deprecated setup.py commands * remove the package_dir bit from setup.py Signed-off-by: Steve Arnold <sarnold@vctlabs.com> --- .github/workflows/ci.yml | 2 +- .github/workflows/conda.yml | 8 ++++---- CMakeLists.txt | 2 +- MANIFEST.in | 8 -------- Makefile | 30 ------------------------------ cmake/modules/FindCython.cmake | 10 +++++----- setup.py | 1 - src/CMakeLists.txt | 2 +- tox.ini | 2 +- 9 files changed, 13 insertions(+), 52 deletions(-) delete mode 100644 MANIFEST.in delete mode 100644 Makefile diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 95c25204..5a21d080 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-22.04] - python-version: [3.8, 3.9, '3.10', '3.11'] + python-version: [3.9, '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index af361830..6312fc24 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -2,10 +2,10 @@ name: CondaDev on: workflow_dispatch: - push: - branches: - - master - pull_request: + #push: + #branches: + #- master + #pull_request: jobs: build: diff --git a/CMakeLists.txt b/CMakeLists.txt index d12bc608..87627f90 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,7 +21,7 @@ endif() include(GNUInstallDirs) # get rid of FindPython old warnings, refactor FindCython module -set(CMP0148 NEW) +#set(CMP0148 NEW) find_package(pybind11 CONFIG) diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 305a4445..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,8 +0,0 @@ -global-include CMakeLists.txt *.cmake -include AUTHORS README.rst HISTORY CHANGELOG.rst LICENSE -graft src -graft tests -global-exclude *.py[cod] __pycache__ -recursive-exclude .tox * -recursive-exclude .github * -recursive-exclude vcpkg * diff --git a/Makefile b/Makefile deleted file mode 100644 index 3785bdce..00000000 --- a/Makefile +++ /dev/null @@ -1,30 +0,0 @@ -install: - python3 setup.py install --user - -test: install - pytest - -install2: - python2 setup.py install --user - -test2: install2 - python2 -m pytest - -clean: - rm -rf build pyre2.egg-info &>/dev/null - rm -f *.so src/*.so src/re2.cpp src/*.html &>/dev/null - -distclean: clean - rm -rf .tox/ dist/ .pytest_cache/ - -valgrind: - python3-dbg setup.py install --user && \ - (cd tests && valgrind --tool=memcheck --suppressions=../valgrind-python.supp \ - --leak-check=full --show-leak-kinds=definite \ - python3-dbg test_re.py) - -valgrind2: - python2-dbg setup.py install --user && \ - (cd tests && valgrind --tool=memcheck --suppressions=../valgrind-python.supp \ - --leak-check=full --show-leak-kinds=definite \ - python2-dbg re2_test.py) diff --git a/cmake/modules/FindCython.cmake b/cmake/modules/FindCython.cmake index c53e2b83..83ac106e 100644 --- a/cmake/modules/FindCython.cmake +++ b/cmake/modules/FindCython.cmake @@ -2,7 +2,7 @@ # # This code sets the following variables: # -# CYTHON_EXECUTABLE +# Cython_EXECUTABLE # # See also UseCython.cmake @@ -27,18 +27,18 @@ find_package(Python) if( Python_FOUND ) get_filename_component( _python_path ${Python_EXECUTABLE} PATH ) - find_program( CYTHON_EXECUTABLE + find_program( Cython_EXECUTABLE NAMES cython cython.bat cython3 HINTS ${_python_path} ) else() - find_program( CYTHON_EXECUTABLE + find_program( Cython_EXECUTABLE NAMES cython cython.bat cython3 ) endif() include( FindPackageHandleStandardArgs ) -FIND_PACKAGE_HANDLE_STANDARD_ARGS( Cython REQUIRED_VARS CYTHON_EXECUTABLE ) +FIND_PACKAGE_HANDLE_STANDARD_ARGS( Cython REQUIRED_VARS Cython_EXECUTABLE ) -mark_as_advanced( CYTHON_EXECUTABLE ) +mark_as_advanced( Cython_EXECUTABLE ) diff --git a/setup.py b/setup.py index 814368ae..73c87fc9 100755 --- a/setup.py +++ b/setup.py @@ -128,5 +128,4 @@ def build_extension(self, ext: CMakeExtension) -> None: setup( ext_modules=[CMakeExtension('re2')], cmdclass={'build_ext': CMakeBuild}, - package_dir={'': 'src'}, ) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8e0372d2..d28e325c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -12,7 +12,7 @@ file(GLOB cy_srcs *.pyx *.pxi *.h) # .pyx -> .cpp add_custom_command(OUTPUT ${cython_output} - COMMAND ${CYTHON_EXECUTABLE} + COMMAND ${Cython_EXECUTABLE} -a -3 --fast-fail --cplus -I ${re2_include_dir} diff --git a/tox.ini b/tox.ini index 616be0fb..67eb8056 100644 --- a/tox.ini +++ b/tox.ini @@ -151,4 +151,4 @@ deps = pip>=21.1 commands = - bash -c 'rm -rf *.egg-info re2*.so .coverage.* tests/__pycache__ dist/ build/' + bash -c 'rm -rf src/*.egg-info re2*.so src/re2*.so src/re2.cpp *coverage.* tests/__pycache__ dist/ build/' From 7a329427c711312ecf2bff7b8b091cb250cafe80 Mon Sep 17 00:00:00 2001 From: Steve Arnold <sarnold@vctlabs.com> Date: Fri, 12 Apr 2024 19:44:49 -0700 Subject: [PATCH 100/114] chg: dev: cleanup metadata and test imports, disable platform whl tests * check if find_package py3 works across all CI runners Signed-off-by: Steve Arnold <sarnold@vctlabs.com> --- .github/workflows/main.yml | 6 +++--- cmake/modules/FindCython.cmake | 2 +- setup.cfg | 2 +- tests/test_re.py | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 0bdccb53..374e5acb 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,7 +13,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-22.04, macos-11, windows-2019] + os: [ubuntu-22.04, macos-11, windows-latest] steps: - uses: actions/checkout@v4 @@ -53,8 +53,8 @@ jobs: vcpkg install re2:x64-windows && vcpkg integrate install CIBW_ENVIRONMENT_WINDOWS: 'CMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake' - CIBW_TEST_REQUIRES: pytest - CIBW_TEST_COMMAND: pytest -v {package}/tests/test_re.py + CIBW_TEST_REQUIRES: "" + CIBW_TEST_COMMAND: "" - uses: actions/upload-artifact@v4 with: diff --git a/cmake/modules/FindCython.cmake b/cmake/modules/FindCython.cmake index 83ac106e..fde6edde 100644 --- a/cmake/modules/FindCython.cmake +++ b/cmake/modules/FindCython.cmake @@ -24,7 +24,7 @@ # Use the Cython executable that lives next to the Python executable # if it is a local installation. -find_package(Python) +find_package(Python3) if( Python_FOUND ) get_filename_component( _python_path ${Python_EXECUTABLE} PATH ) find_program( Cython_EXECUTABLE diff --git a/setup.cfg b/setup.cfg index f4bf3678..7bf19496 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,7 +5,7 @@ author = Andreas van Cranenburgh author_email = andreas@unstable.nl maintainer = Steve Arnold maintainer_email = nerdboy@gentoo.org -description = Python wrapper for Google\'s RE2 using Cython +description = Python wrapper for Google RE2 library using Cython long_description = file: README.rst long_description_content_type = text/x-rst; charset=UTF-8 url = https://github.com/andreasvc/pyre2 diff --git a/tests/test_re.py b/tests/test_re.py index a5644148..567e54fa 100644 --- a/tests/test_re.py +++ b/tests/test_re.py @@ -689,9 +689,9 @@ def test_dealloc(self): def test_re_suite(): try: - from tests.re_utils import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR + from tests.re_utils import tests, SUCCEED, FAIL, SYNTAX_ERROR except ImportError: - from re_utils import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR + from re_utils import tests, SUCCEED, FAIL, SYNTAX_ERROR if verbose: print('\nRunning test_re_suite ...') From 816705b6bf49ea653d6ad6bddde4767981bc50b7 Mon Sep 17 00:00:00 2001 From: Steve Arnold <sarnold@vctlabs.com> Date: Fri, 12 Apr 2024 20:55:52 -0700 Subject: [PATCH 101/114] chg: dev: enable findpython policy, use matrix uploads * no epel pkgs for linux aarch64, enable PYBIND11_FINDPYTHON * set macos deployment target to 10.9 Signed-off-by: Steve Arnold <sarnold@vctlabs.com> --- .github/workflows/main.yml | 7 ++++--- CMakeLists.txt | 3 ++- cmake/modules/FindCython.cmake | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 374e5acb..56deffd4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,7 +13,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-22.04, macos-11, windows-latest] + os: [ubuntu-20.04, macos-11, windows-latest] steps: - uses: actions/checkout@v4 @@ -36,7 +36,7 @@ jobs: env: # configure cibuildwheel to build native archs ('auto'), and some # emulated ones, plus cross-compile on macos - CIBW_ARCHS_LINUX: auto aarch64 + CIBW_ARCHS_LINUX: auto CIBW_ARCHS_MACOS: auto arm64 CIBW_TEST_SKIP: "*_arm64 *universal2:arm64 *linux_i686" CIBW_ARCHS_WINDOWS: auto64 @@ -48,7 +48,7 @@ jobs: yum -y install epel-release && yum install -y re2-devel ninja-build CIBW_BEFORE_ALL_MACOS: > brew install re2 pybind11 - #CIBW_ENVIRONMENT_MACOS: MACOSX_DEPLOYMENT_TARGET=10.14 + CIBW_ENVIRONMENT_MACOS: MACOSX_DEPLOYMENT_TARGET=10.9 CIBW_BEFORE_ALL_WINDOWS: > vcpkg install re2:x64-windows && vcpkg integrate install @@ -58,6 +58,7 @@ jobs: - uses: actions/upload-artifact@v4 with: + name: wheels-${{ matrix.os }} path: ./wheelhouse/*.whl build_sdist: diff --git a/CMakeLists.txt b/CMakeLists.txt index 87627f90..9b805e97 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,8 +21,9 @@ endif() include(GNUInstallDirs) # get rid of FindPython old warnings, refactor FindCython module -#set(CMP0148 NEW) +set(CMP0148 NEW) +set(PYBIND11_FINDPYTHON ON) find_package(pybind11 CONFIG) if(pybind11_FOUND) diff --git a/cmake/modules/FindCython.cmake b/cmake/modules/FindCython.cmake index fde6edde..83ac106e 100644 --- a/cmake/modules/FindCython.cmake +++ b/cmake/modules/FindCython.cmake @@ -24,7 +24,7 @@ # Use the Cython executable that lives next to the Python executable # if it is a local installation. -find_package(Python3) +find_package(Python) if( Python_FOUND ) get_filename_component( _python_path ${Python_EXECUTABLE} PATH ) find_program( Cython_EXECUTABLE From 621888a399f87dd0eb9a2abde82207d50f36e9ba Mon Sep 17 00:00:00 2001 From: Steve Arnold <sarnold@vctlabs.com> Date: Sat, 13 Apr 2024 15:18:40 -0700 Subject: [PATCH 102/114] chg: dev: try pkgconf on windows CI runner Signed-off-by: Steve Arnold <sarnold@vctlabs.com> --- .github/workflows/main.yml | 2 +- src/CMakeLists.txt | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 56deffd4..f25c147f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -50,7 +50,7 @@ jobs: brew install re2 pybind11 CIBW_ENVIRONMENT_MACOS: MACOSX_DEPLOYMENT_TARGET=10.9 CIBW_BEFORE_ALL_WINDOWS: > - vcpkg install re2:x64-windows + vcpkg install pkgconf re2:x64-windows && vcpkg integrate install CIBW_ENVIRONMENT_WINDOWS: 'CMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake' CIBW_TEST_REQUIRES: "" diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d28e325c..18b42fc8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -29,12 +29,14 @@ target_compile_definitions( # here we get to jump through some hoops to find libre2 on the manylinux # docker CI images, etc -find_package(re2 CONFIG NAMES re2) +if(NOT MSVC) + find_package(re2 CONFIG NAMES re2) +endif() if(re2_FOUND) message(STATUS "System re2 found") target_link_libraries(${cython_module} PRIVATE re2::re2) -elseif(NOT MSVC) +else() message(STATUS "Trying PkgConfig") find_package(PkgConfig REQUIRED) pkg_check_modules(RE2 IMPORTED_TARGET re2) From df921a8681dace056dae0c42c132a9899333534f Mon Sep 17 00:00:00 2001 From: Stephen L Arnold <nerdboy@gentoo.org> Date: Thu, 5 Sep 2024 19:57:06 -0700 Subject: [PATCH 103/114] fix: dev: bump CMake to c++17 with extensions to build against re2-0.2024.07.02 Signed-off-by: Stephen L Arnold <nerdboy@gentoo.org> --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b805e97..eea96df3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,9 +4,9 @@ project(re2 LANGUAGES CXX C) option(PY_DEBUG "Set if python being linked is a Py_DEBUG build" OFF) -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_CXX_EXTENSIONS ON) if(CMAKE_CXX_COMPILER_ID STREQUAL Clang) set(CLANG_DEFAULT_CXX_STDLIB libc++) From c3ebb3056465cddf3da8e8725515f6c8eb431836 Mon Sep 17 00:00:00 2001 From: Stephen Arnold <nerdboy@gentoo.org> Date: Thu, 5 Sep 2024 20:54:39 -0700 Subject: [PATCH 104/114] fix: dev: bump ubuntu and mac workflow runners * revert to macos-13 with the same version as target * In Theory this should get us full c++17 Signed-off-by: Stephen Arnold <nerdboy@gentoo.org> --- .github/workflows/main.yml | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f25c147f..53d4ff36 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,7 +13,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-20.04, macos-11, windows-latest] + os: [ubuntu-22.04, windows-latest, macos-13] steps: - uses: actions/checkout@v4 @@ -23,7 +23,7 @@ jobs: - uses: actions/setup-python@v5 name: Install Python with: - python-version: '3.8' + python-version: '3.9' - name: Set up QEMU if: runner.os == 'Linux' @@ -32,7 +32,7 @@ jobs: platforms: all - name: Build wheels - uses: pypa/cibuildwheel@v2.17 + uses: pypa/cibuildwheel@v2.20 env: # configure cibuildwheel to build native archs ('auto'), and some # emulated ones, plus cross-compile on macos @@ -42,13 +42,14 @@ jobs: CIBW_ARCHS_WINDOWS: auto64 CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014 CIBW_MANYLINUX_I686_IMAGE: manylinux2010 - CIBW_BUILD: cp37-* cp38-* cp39-* cp310-* cp311-* - CIBW_SKIP: "*musllinux* cp311-*i686" + CIBW_BUILD: cp38-* cp39-* cp310-* cp311-* cp312-* + CIBW_SKIP: "*musllinux* *i686" CIBW_BEFORE_ALL_LINUX: > - yum -y install epel-release && yum install -y re2-devel ninja-build + yum -y update && yum -y install epel-release && yum install -y re2-devel ninja-build CIBW_BEFORE_ALL_MACOS: > brew install re2 pybind11 - CIBW_ENVIRONMENT_MACOS: MACOSX_DEPLOYMENT_TARGET=10.9 + # macos target should be 10.14 to get c++17 + CIBW_ENVIRONMENT_MACOS: MACOSX_DEPLOYMENT_TARGET=13.0 CIBW_BEFORE_ALL_WINDOWS: > vcpkg install pkgconf re2:x64-windows && vcpkg integrate install @@ -70,7 +71,7 @@ jobs: - uses: actions/setup-python@v5 name: Install Python with: - python-version: '3.8' + python-version: '3.9' - name: Build sdist run: | From 8fcfdedc68f4f31563fd4c9d376dbc2c469ed45a Mon Sep 17 00:00:00 2001 From: Stephen Arnold <nerdboy@gentoo.org> Date: Sat, 7 Sep 2024 12:06:30 -0700 Subject: [PATCH 105/114] chg: swap out flake8 for cython-lint, update setup files, remove pep8 cfg Signed-off-by: Stephen Arnold <nerdboy@gentoo.org> --- .pep8speaks.yml | 15 --------------- pyproject.toml | 7 ++++++- setup.cfg | 8 -------- setup.py | 22 ++++++++++++++++------ tox.ini | 12 ++++++++++++ 5 files changed, 34 insertions(+), 30 deletions(-) delete mode 100644 .pep8speaks.yml diff --git a/.pep8speaks.yml b/.pep8speaks.yml deleted file mode 100644 index 4f120b0d..00000000 --- a/.pep8speaks.yml +++ /dev/null @@ -1,15 +0,0 @@ -scanner: - diff_only: True # If False, the entire file touched by the Pull Request is scanned for errors. If True, only the diff is scanned. - linter: flake8 # Other option is pycodestyle - -no_blank_comment: False # If True, no comment is made on PR without any errors. -descending_issues_order: True # If True, PEP 8 issues in message will be displayed in descending order of line numbers in the file - -pycodestyle: # Same as scanner.linter value. Other option is flake8 - max-line-length: 90 # Default is 79 in PEP 8 - -flake8: - max-line-length: 90 # Default is 79 in PEP 8 - exclude: - - tests - - docs diff --git a/pyproject.toml b/pyproject.toml index 02531078..2b24a200 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = [ "setuptools>=42", "setuptools_scm[toml]>=6.2", "Cython>=0.20", - "pybind11>=2.11.1", + "pybind11>=2.12", "ninja; sys_platform != 'Windows'", "cmake>=3.18", ] @@ -17,3 +17,8 @@ minversion = "6.0" testpaths = [ "tests", ] + +[tool.cython-lint] +max-line-length = 110 +ignore = ['E225','E226','E227','E402','E703','E999'] +# exclude = 'my_project/excluded_cython_file.pyx' diff --git a/setup.cfg b/setup.cfg index 7bf19496..b086a46a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -45,11 +45,3 @@ ignore = .gitchangelog.rc .gitignore conda/** - -[flake8] -# these error codes interfere with Black -#ignore = E203, E231, E501, W503, B950, -ignore = E225,E226,E227,E402,E703,E999 -max-line-length = 90 -filename = *.pyx, *.px* -exclude = .git, .eggs, *.egg, .tox, build diff --git a/setup.py b/setup.py index 73c87fc9..b89812b5 100755 --- a/setup.py +++ b/setup.py @@ -32,11 +32,17 @@ def build_extension(self, ext: CMakeExtension) -> None: ext_fullpath = Path.cwd() / self.get_ext_fullpath(ext.name) extdir = ext_fullpath.parent.resolve() - # Using this requires trailing slash for auto-detection & inclusion of - # auxiliary "native" libs + # Set a sensible default build type for packaging + if "CMAKE_BUILD_OVERRIDE" not in os.environ: + cfg = "Debug" if self.debug else "RelWithDebInfo" + else: + cfg = os.environ.get("CMAKE_BUILD_OVERRIDE", "") - debug = int(os.environ.get("DEBUG", 0)) if self.debug is None else self.debug - cfg = "Debug" if debug else "Release" + # Set a coverage flag if provided + if "WITH_COVERAGE" not in os.environ: + coverage = "OFF" + else: + coverage = os.environ.get("WITH_COVERAGE", "") # CMake lets you override the generator - we need to check this. # Can be set with Conda-Build, for example. @@ -50,7 +56,11 @@ def build_extension(self, ext: CMakeExtension) -> None: f"-DPython_EXECUTABLE={sys.executable}", f"-DCMAKE_BUILD_TYPE={cfg}", # not used on MSVC, but no harm ] - build_args = [] + build_args = ["--verbose"] + + # Add CMake arguments set as environment variable + if "CMAKE_ARGS" in os.environ: + cmake_args += [item for item in os.environ["CMAKE_ARGS"].split(" ") if item] # CMake also lets you provide a toolchain file. # Can be set in CI build environments for example. @@ -68,7 +78,7 @@ def build_extension(self, ext: CMakeExtension) -> None: # 3.15+. if not cmake_generator or cmake_generator == "Ninja": try: - import ninja + import ninja # noqa: F401 ninja_executable_path = Path(ninja.BIN_DIR) / "ninja" cmake_args += [ diff --git a/tox.ini b/tox.ini index 67eb8056..92528038 100644 --- a/tox.ini +++ b/tox.ini @@ -142,6 +142,18 @@ commands = pip install pyre2 --force-reinstall --prefer-binary -f dist/ python -m unittest discover -f -s . +[testenv:style] +envdir = {toxworkdir}/tests + +passenv = + {[testenv:tests]passenv} + +deps = + pip>=23.1 + cython-lint + +commands = + cython-lint src/ [testenv:clean] skip_install = true allowlist_externals = From e975b7cf9692a715c9103c67bbaa6f7943687801 Mon Sep 17 00:00:00 2001 From: Stephen Arnold <nerdboy@gentoo.org> Date: Sat, 7 Sep 2024 16:37:52 -0700 Subject: [PATCH 106/114] fix: cleanup tests and fix a raw string test, enable more win32 * split all runners into separate arch via matrix * macos does need macos-14 to get a proper arm64 build Signed-off-by: Stephen Arnold <nerdboy@gentoo.org> --- .github/workflows/main.yml | 32 +++++++++++++++++++++++--------- tests/re_utils.py | 5 +++-- tests/test_re.py | 17 +++++++++++------ 3 files changed, 37 insertions(+), 17 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 53d4ff36..1c2beded 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,7 +13,23 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-22.04, windows-latest, macos-13] + include: + - os: "ubuntu-22.04" + arch: "x86_64" + - os: "ubuntu-22.04" + arch: "aarch64" + - os: "macos-13" + arch: "x86_64" + macosx_deployment_target: "13.0" + - os: "macos-14" + arch: "arm64" + macosx_deployment_target: "14.0" + - os: "windows-latest" + arch: "auto64" + triplet: "x64-windows" + - os: "windows-latest" + arch: "auto32" + triplet: "x86-windows" steps: - uses: actions/checkout@v4 @@ -36,10 +52,8 @@ jobs: env: # configure cibuildwheel to build native archs ('auto'), and some # emulated ones, plus cross-compile on macos - CIBW_ARCHS_LINUX: auto - CIBW_ARCHS_MACOS: auto arm64 + CIBW_ARCHS: ${{ matrix.arch }} CIBW_TEST_SKIP: "*_arm64 *universal2:arm64 *linux_i686" - CIBW_ARCHS_WINDOWS: auto64 CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014 CIBW_MANYLINUX_I686_IMAGE: manylinux2010 CIBW_BUILD: cp38-* cp39-* cp310-* cp311-* cp312-* @@ -48,10 +62,10 @@ jobs: yum -y update && yum -y install epel-release && yum install -y re2-devel ninja-build CIBW_BEFORE_ALL_MACOS: > brew install re2 pybind11 - # macos target should be 10.14 to get c++17 - CIBW_ENVIRONMENT_MACOS: MACOSX_DEPLOYMENT_TARGET=13.0 + # macos target should be at least 10.13 to get full c++17 + CIBW_ENVIRONMENT_MACOS: MACOSX_DEPLOYMENT_TARGET=${{ matrix.macosx_deployment_target }} CIBW_BEFORE_ALL_WINDOWS: > - vcpkg install pkgconf re2:x64-windows + vcpkg install pkgconf:${{ matrix.triplet }} re2:${{ matrix.triplet }} && vcpkg integrate install CIBW_ENVIRONMENT_WINDOWS: 'CMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake' CIBW_TEST_REQUIRES: "" @@ -59,7 +73,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: wheels-${{ matrix.os }} + name: wheels-${{ matrix.os }}-${{ matrix.arch }} path: ./wheelhouse/*.whl build_sdist: @@ -88,7 +102,7 @@ jobs: run: shell: bash name: Check artifacts are correct - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v4 - uses: actions/download-artifact@v4 diff --git a/tests/re_utils.py b/tests/re_utils.py index 348c3ce9..6ddecd9b 100644 --- a/tests/re_utils.py +++ b/tests/re_utils.py @@ -550,8 +550,9 @@ # lookbehind: split by : but not if it is escaped by -. ('(?<!-):(.*?)(?<!-):', 'a:bc-:de:f', SUCCEED, 'g1', 'bc-:de' ), - # escaping with \ as we know it - (r'(?<!\\\):(.*?)(?<!\\\):', 'a:bc\\:de:f', SUCCEED, 'g1', 'bc\\:de' ), + # escaping with \ as we know it => updated for py311+ + # by removing one backslash from each set of 3 + (r'(?<!\\):(.*?)(?<!\\):', 'a:bc\\:de:f', SUCCEED, 'g1', 'bc\\:de' ), # terminating with ' and escaping with ? as in edifact ("(?<!\\?)'(.*?)(?<!\\?)'", "a'bc?'de'f", SUCCEED, 'g1', "bc?'de" ), diff --git a/tests/test_re.py b/tests/test_re.py index 567e54fa..9fd541ed 100644 --- a/tests/test_re.py +++ b/tests/test_re.py @@ -1,14 +1,19 @@ from __future__ import print_function -try: - from test.test_support import verbose -except ImportError: - from test.support import verbose -import re2 as re -from re import Scanner + import os import sys import traceback from weakref import proxy + +import re2 as re +from re import Scanner + +try: + from test import support + from test.support import verbose +except ImportError: # import error on Windows + verbose = re.VERBOSE + if sys.version_info[0] > 2: unicode = str unichr = chr From 1465367ab228e1ceae8bb28f624bf2bb602fcffa Mon Sep 17 00:00:00 2001 From: Stephen Arnold <nerdboy@gentoo.org> Date: Sat, 7 Sep 2024 22:12:24 -0700 Subject: [PATCH 107/114] chg: switch conda workflow to condadev environment Signed-off-by: Stephen Arnold <nerdboy@gentoo.org> --- .github/workflows/{conda.yml => conda-dev.yml} | 17 +++++++---------- environment.devenv.yml | 17 ++++++++++++++--- 2 files changed, 21 insertions(+), 13 deletions(-) rename .github/workflows/{conda.yml => conda-dev.yml} (87%) diff --git a/.github/workflows/conda.yml b/.github/workflows/conda-dev.yml similarity index 87% rename from .github/workflows/conda.yml rename to .github/workflows/conda-dev.yml index 6312fc24..41e5da74 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda-dev.yml @@ -2,10 +2,10 @@ name: CondaDev on: workflow_dispatch: - #push: - #branches: - #- master - #pull_request: + push: + branches: + - master + pull_request: jobs: build: @@ -14,8 +14,8 @@ jobs: strategy: fail-fast: false matrix: - os: ['macos-11', 'ubuntu-22.04'] - python-version: ['3.8', '3.11'] + os: ['ubuntu-22.04', 'macos-13'] + python-version: ['3.9', '3.10', '3.11', '3.12'] env: OS: ${{ matrix.os }} PYTHON: ${{ matrix.python-version }} @@ -23,15 +23,12 @@ jobs: steps: - uses: actions/checkout@v4 - with: - fetch-depth: 0 - uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true python-version: ${{ matrix.python-version }} channels: conda-forge - channel-priority: strict use-only-tar-bz2: true - name: Cache conda packages @@ -39,7 +36,7 @@ jobs: uses: actions/cache@v4 env: # Increase this value to reset cache and rebuild the env during the PR - CACHE_NUMBER: 3 + CACHE_NUMBER: 0 with: path: /home/runner/conda_pkgs_dir key: diff --git a/environment.devenv.yml b/environment.devenv.yml index dcdee670..1e45b2b6 100644 --- a/environment.devenv.yml +++ b/environment.devenv.yml @@ -1,11 +1,22 @@ name: pyre2 +{% set python_version = os.environ.get("PY_VER", "3.11") %} + +channels: + - conda-forge + dependencies: - - python ==3.11 - - cmake >=3.18 + - cmake>=3.18 - ninja + - ccache + - clangxx_osx-64 # [osx] + - gxx_linux-64 # [linux] + - pybind11-abi + - pybind11-stubgen + - vs2019_win-64 # [win] + - pkgconfig # [win] + - python={{ python_version }} - cython - - cxx-compiler - pybind11 - pip - re2 From db22ef17fe8d73eb9871d2c529325a2d130c232f Mon Sep 17 00:00:00 2001 From: Stephen Arnold <nerdboy@gentoo.org> Date: Sun, 8 Sep 2024 16:16:57 -0700 Subject: [PATCH 108/114] chg: dev: make sure conda-devenv installs specific pkgs with pip * this is essentially a workaround for non-pypi pkg cruft Signed-off-by: Stephen Arnold <nerdboy@gentoo.org> --- environment.devenv.yml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/environment.devenv.yml b/environment.devenv.yml index 1e45b2b6..f961082c 100644 --- a/environment.devenv.yml +++ b/environment.devenv.yml @@ -9,6 +9,7 @@ dependencies: - cmake>=3.18 - ninja - ccache + - re2 - clangxx_osx-64 # [osx] - gxx_linux-64 # [linux] - pybind11-abi @@ -18,8 +19,9 @@ dependencies: - python={{ python_version }} - cython - pybind11 - - pip - - re2 - - pytest - - regex - - six + - pip: + # these two need to be newer than broken runner packages + - pytest + - regex + - urllib3 + - six From f254daa2e95de75122f42746a6ead3600592692d Mon Sep 17 00:00:00 2001 From: Stephen Arnold <nerdboy@gentoo.org> Date: Sun, 8 Sep 2024 16:51:11 -0700 Subject: [PATCH 109/114] chg: dev: switch conda env to use mamba with newer workflow cmds Signed-off-by: Stephen Arnold <nerdboy@gentoo.org> --- .github/workflows/conda-dev.yml | 20 ++++++++++++-------- environment.devenv.yml | 12 ++++++------ 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/.github/workflows/conda-dev.yml b/.github/workflows/conda-dev.yml index 41e5da74..12642de4 100644 --- a/.github/workflows/conda-dev.yml +++ b/.github/workflows/conda-dev.yml @@ -28,8 +28,11 @@ jobs: with: auto-update-conda: true python-version: ${{ matrix.python-version }} - channels: conda-forge - use-only-tar-bz2: true + mamba-version: "*" + channels: conda-forge,defaults + channel-priority: true + activate-environment: pyre2 + environment-file: environment.devenv.yml - name: Cache conda packages id: cache @@ -46,20 +49,21 @@ jobs: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- - name: Configure condadev environment - shell: bash -l {0} + shell: bash -el {0} env: PY_VER: ${{ matrix.python-version }} run: | conda config --set always_yes yes --set changeps1 no - conda config --add channels conda-forge - conda install conda-devenv - conda devenv + conda info + conda list + conda config --show-sources + conda config --show + printenv | sort - name: Build and test - shell: bash -l {0} + shell: bash -el {0} env: PY_VER: ${{ matrix.python-version }} run: | - source activate pyre2 python -m pip install .[test] -vv python -m pytest -v . diff --git a/environment.devenv.yml b/environment.devenv.yml index f961082c..e660e76c 100644 --- a/environment.devenv.yml +++ b/environment.devenv.yml @@ -19,9 +19,9 @@ dependencies: - python={{ python_version }} - cython - pybind11 - - pip: - # these two need to be newer than broken runner packages - - pytest - - regex - - urllib3 - - six + - pip + - pytest + - regex + # these two need to be newer than broken runner packages + - urllib3 + - six From ae58b0b531cf537905915efa8bf94d22dee45fd2 Mon Sep 17 00:00:00 2001 From: Stephen Arnold <nerdboy@gentoo.org> Date: Sun, 8 Sep 2024 17:16:38 -0700 Subject: [PATCH 110/114] chg: dev: one more conda-devenv refactor based on latest docs * also cleanup the wheel artifact check, download to artifacts/ Signed-off-by: Stephen Arnold <nerdboy@gentoo.org> --- .github/workflows/conda-dev.yml | 21 ++++++++++----------- .github/workflows/main.yml | 7 ++++--- environment.devenv.yml | 9 ++------- 3 files changed, 16 insertions(+), 21 deletions(-) diff --git a/.github/workflows/conda-dev.yml b/.github/workflows/conda-dev.yml index 12642de4..3e3cb9d3 100644 --- a/.github/workflows/conda-dev.yml +++ b/.github/workflows/conda-dev.yml @@ -25,14 +25,14 @@ jobs: - uses: actions/checkout@v4 - uses: conda-incubator/setup-miniconda@v3 + env: + PY_VER: ${{ matrix.python-version }} with: - auto-update-conda: true - python-version: ${{ matrix.python-version }} - mamba-version: "*" - channels: conda-forge,defaults - channel-priority: true activate-environment: pyre2 - environment-file: environment.devenv.yml + channels: conda-forge + allow-softlinks: true + channel-priority: flexible + show-channel-urls: true - name: Cache conda packages id: cache @@ -54,16 +54,15 @@ jobs: PY_VER: ${{ matrix.python-version }} run: | conda config --set always_yes yes --set changeps1 no - conda info - conda list - conda config --show-sources - conda config --show - printenv | sort + conda config --add channels conda-forge + conda install conda-devenv + conda devenv - name: Build and test shell: bash -el {0} env: PY_VER: ${{ matrix.python-version }} run: | + source activate pyre2 python -m pip install .[test] -vv python -m pytest -v . diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1c2beded..1ec3f0cf 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -104,9 +104,10 @@ jobs: name: Check artifacts are correct runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v4 - uses: actions/download-artifact@v4 + with: + path: artifacts - # note wheels should be in subdirectory <upload_name> + # note wheels should be in subdirectories named <artifact_name> - name: Check number of downloaded artifacts - run: ls -l artifact + run: ls -l artifacts/* diff --git a/environment.devenv.yml b/environment.devenv.yml index e660e76c..95fd733c 100644 --- a/environment.devenv.yml +++ b/environment.devenv.yml @@ -1,10 +1,5 @@ name: pyre2 -{% set python_version = os.environ.get("PY_VER", "3.11") %} - -channels: - - conda-forge - dependencies: - cmake>=3.18 - ninja @@ -16,12 +11,12 @@ dependencies: - pybind11-stubgen - vs2019_win-64 # [win] - pkgconfig # [win] - - python={{ python_version }} + - python ={{ get_env("PY_VER", default="3.9") }} - cython - pybind11 - pip - pytest - regex - # these two need to be newer than broken runner packages + # these two need to be newer than broken runner packages, 3.12 only - urllib3 - six From 8d64c5f42359d31719581acf8e06188c8e20fd7f Mon Sep 17 00:00:00 2001 From: Stephen Arnold <nerdboy@gentoo.org> Date: Sun, 8 Sep 2024 15:30:50 -0700 Subject: [PATCH 111/114] new: doc: add basic sphinx docs build using apidoc, update changelog * update .gitignore and .gitchangelog.rc and (re)generate new changelog * add sphinx docs build using apidoc extension and readme/changelog symlinks * rst apidoc modules are auto-generated and are in .gitignore along with the generated html dir * add dependencies to packaging and add docs/changes cmds to tox file. Includes a tox extension for shared tox environments; the new tox commands are an example of this => 4 cmds using one tox env Signed-off-by: Stephen Arnold <nerdboy@gentoo.org> --- .gitchangelog.rc | 20 +- .gitignore | 49 ++++- CHANGELOG.rst | 414 +++++++++++++++++++++++++++++++++++++- docs/Makefile | 19 ++ docs/make.bat | 36 ++++ docs/source/CHANGELOG.rst | 1 + docs/source/README.rst | 1 + docs/source/conf.py | 181 +++++++++++++++++ docs/source/index.rst | 24 +++ setup.cfg | 9 +- tox.ini | 33 ++- toxfile.py | 77 +++++++ 12 files changed, 845 insertions(+), 19 deletions(-) create mode 100644 docs/Makefile create mode 100644 docs/make.bat create mode 120000 docs/source/CHANGELOG.rst create mode 120000 docs/source/README.rst create mode 100644 docs/source/conf.py create mode 100644 docs/source/index.rst create mode 100644 toxfile.py diff --git a/.gitchangelog.rc b/.gitchangelog.rc index c658c92a..4790956f 100644 --- a/.gitchangelog.rc +++ b/.gitchangelog.rc @@ -64,6 +64,7 @@ ignore_regexps = [ r'@wip', r'!wip', r'^([cC]hg|[fF]ix|[nN]ew)\s*:\s*[p|P]kg:', r'^([cC]hg|[fF]ix|[nN]ew)\s*:\s*[d|D]ev:', + r'^([cC]i)\s*:', r'^(.{3,3}\s*:)?\s*[fF]irst commit.?\s*$', r'^$', ## ignore commits with empty messages ] @@ -85,16 +86,17 @@ section_regexps = [ ('New', [ r'^[nN]ew\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', ]), + ('Features', [ + r'^([nN]ew|[fF]eat)\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', + ]), ('Changes', [ r'^[cC]hg\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', ]), - ('Fix', [ + ('Fixes', [ r'^[fF]ix\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', ]), - ('Other', None ## Match all lines ), - ] @@ -150,7 +152,9 @@ subject_process = (strip | ## ## Tags that will be used for the changelog must match this regexp. ## -tag_filter_regexp = r'^[0-9]+\.[0-9]+(\.[0-9]+)?$' +#tag_filter_regexp = r'^v?[0-9]+\.[0-9]+(\.[0-9]+)?$' +#tag_filter_regexp = r'^[0-9]+\.[0-9]+(\.[0-9]+)?$' +tag_filter_regexp = r'.*?$' # accept funky tag strings ## ``unreleased_version_label`` is a string or a callable that outputs a string @@ -160,7 +164,9 @@ tag_filter_regexp = r'^[0-9]+\.[0-9]+(\.[0-9]+)?$' #unreleased_version_label = "(unreleased)" unreleased_version_label = lambda: swrap( ["git", "describe", "--tags"], -shell=False) + shell=False, +) + ## ``output_engine`` is a callable ## @@ -227,7 +233,7 @@ include_merge = True ## Outputs directly to standard output ## (This is the default) ## -## - FileInsertAtFirstRegexMatch(file, pattern, idx=lamda m: m.start()) +## - FileInsertAtFirstRegexMatch(file, pattern, idx=lamda m: m.start(), flags) ## ## Creates a callable that will parse given file for the given ## regex pattern and will insert the output in the file. @@ -242,7 +248,7 @@ include_merge = True ## take care of everything and might be more complex. Check the README ## for a complete copy-pastable example. ## -# publish = FileInsertIntoFirstRegexMatch( +# publish = FileInsertAtFirstRegexMatch( # "CHANGELOG.rst", # r'/(?P<rev>[0-9]+\.[0-9]+(\.[0-9]+)?)\s+\([0-9]+-[0-9]{2}-[0-9]{2}\)\n--+\n/', # idx=lambda m: m.start(1) diff --git a/.gitignore b/.gitignore index 4d4ff6ee..16d42f88 100644 --- a/.gitignore +++ b/.gitignore @@ -8,7 +8,54 @@ src/*.html tests/*.so tests/access.log *~ -*.so *.pyc *.swp *.egg-info + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Sphinx documentation +docs/_build/ +docs/source/api/ + diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 0e1cb859..61ffc108 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,20 +1,172 @@ +Changelog +========= + + +v0.3.6-29-g1465367 +------------------ + +Changes +~~~~~~~ +- Switch conda workflow to condadev environment. [Stephen Arnold] +- Swap out flake8 for cython-lint, update setup files, remove pep8 cfg. + [Stephen Arnold] + +Fixes +~~~~~ +- Cleanup tests and fix a raw string test, enable more win32. [Stephen + Arnold] + + * split all runners into separate arch via matrix + * macos does need macos-14 to get a proper arm64 build +- Apply emptygroups fix and remove conda-only patch, also. [Stephen L + Arnold] + + * release workflow: restrict pypi upload to repo owner + * tox.ini: replace deprecated pep517 module, update deploy url + +Other +~~~~~ +- Fix #42. [Andreas van Cranenburgh] +- Include current notification level in cache key. [Andreas van + Cranenburgh] + + this prevents a cached regular expression being used that was created + with a different notification level. + + For example, the following now generates the expected warning:: + + In [1]: import re2 + In [2]: re2.compile('a*+') + Out[2]: re.compile('a*+') + In [3]: re2.set_fallback_notification(re2.FALLBACK_WARNING) + In [4]: re2.compile('a*+') + <ipython-input-5-041122e221c7>:1: UserWarning: WARNING: Using re module. Reason: bad repetition operator: *+ + re2.compile('a*+') + Out[4]: re.compile('a*+') +- Support fallback to Python re for possessive quantifiers. [Andreas van + Cranenburgh] +- Document lack of support for possessive quantifiers and atomic groups. + [Andreas van Cranenburgh] +- Make tests pass on my system; if this behavior turns out to be + inconsistent across versions/platforms, maybe the test should be + disabled altogether. #27. [Andreas van Cranenburgh] +- Add NOFLAGS and RegexFlags constants; #41. [Andreas van Cranenburgh] +- Remove python versions for make valgrind. [Andreas van Cranenburgh] +- Merge pull request #33 from sarnold/conda-patch. [Andreas van + Cranenburgh] + + Conda patch for None vs empty string change +- Merge pull request #32 from JustAnotherArchivist/match-getitem. + [Andreas van Cranenburgh] + + Make Match objects subscriptable +- Add test for Match subscripting. [JustAnotherArchivist] +- Make Match objects subscriptable. [JustAnotherArchivist] + + Fixes #31 + + +v0.3.6 (2021-05-05) +------------------- +- Merge pull request #30 from sarnold/release-pr. [Andreas van + Cranenburgh] + + workflow updates +- Add missing sdist job and artifact check to workflows, bump version. + [Stephen L Arnold] +- Bump version again. [Andreas van Cranenburgh] +- Merge pull request #28 from sarnold/use-action. [Andreas van + Cranenburgh] + + Workflow cleanup +- Move pypi upload to end of release.yml, use gitchangelog action. + [Stephen L Arnold] + + +v0.3.4 (2021-04-10) +------------------- + +Changes +~~~~~~~ +- Ci: update workflows and tox cfg (use tox for smoke test) [Stephen L + Arnold] +- Rename imported test helpers to avoid any discovery issues. [Stephen L + Arnold] + +Fixes +~~~~~ +- Apply test patch, cleanup tox and pytest args. [Stephen L Arnold] +- Handle invalid escape sequence warnings, revert path changes. [Stephen + L Arnold] + +Other +~~~~~ +- Bump version. [Andreas van Cranenburgh] +- Improve fix for #26. [Andreas van Cranenburgh] +- Add test for #26. [Andreas van Cranenburgh] +- Fix infinite loop on substitutions of empty matches; fixes #26. + [Andreas van Cranenburgh] +- Fix "make test" and "make test2" [Andreas van Cranenburgh] +- Merge pull request #25 from sarnold/last-moves. [Andreas van + Cranenburgh] + + Last moves +- Another one. [Andreas van Cranenburgh] +- Fix fix of conda patch. [Andreas van Cranenburgh] +- Fix conda patch. [Andreas van Cranenburgh] +- Fix "make test"; rename doctest files for autodetection. [Andreas van + Cranenburgh] +- Fix narrow unicode detection. [Andreas van Cranenburgh] +- Merge pull request #24 from sarnold/tst-cleanup. [Andreas van + Cranenburgh] + + Test cleanup +- Fix Python 2 compatibility. [Andreas van Cranenburgh] +- Use pytest; fixes #23. [Andreas van Cranenburgh] +- Tweak order of badges. [Andreas van Cranenburgh] +- Makefile: default to Python 3. [Andreas van Cranenburgh] +- Update README, fix Makefile. [Andreas van Cranenburgh] +- Merge pull request #22 from sarnold/missing-tests. [Andreas van + Cranenburgh] + + add missing tests to sdist package, update readme and ci worflows (#1) +- Fix pickle_test (tests.test_re.ReTests) ... ERROR (run tests with + nose) [Stephen L Arnold] +- Update changelog (and trigger ci rebuild) [Stephen L Arnold] +- Add missing tests to sdist package, update readme and ci worflows (#1) + [Steve Arnold] + + readme: update badges, merge install sections, fix some rendering issues + + v0.3.3 (2021-01-26) ------------------- +Changes +~~~~~~~ +- Add .gitchangelog.rc and generated CHANGELOG.rst (keep HISTORY) + [Stephen L Arnold] +- Ci: update wheel builds for Linux, Macos, and Windows. [Stephen L + Arnold] + +Fixes +~~~~~ +- Ci: make sure wheel path is correct for uploading. [Stephen L Arnold] + +Other +~~~~~ - Bump version. [Andreas van Cranenburgh] - Update README.rst. fixes #21. [Andreas van Cranenburgh] - Merge pull request #20 from freepn/new-bld. [Andreas van Cranenburgh] New cmake and pybind11 build setup -- Add .gitchangelog.rc and generated CHANGELOG.rst (keep HISTORY) - [Stephen L Arnold] -- Update wheel builds for Linux, Macos, and Windows. [Stephen L Arnold] v0.3.2 (2020-12-16) ------------------- - Bump version. [Andreas van Cranenburgh] -- Merge pull request #18 from freepn/github-ci. [Andreas van Cranenburgh] +- Merge pull request #18 from freepn/github-ci. [Andreas van + Cranenburgh] workaroud for manylinux dependency install error plus release automation @@ -28,7 +180,8 @@ v0.3.1 (2020-10-27) v0.3 (2020-10-27) ----------------- - Bump version. [Andreas van Cranenburgh] -- Merge pull request #14 from yoav-orca/master. [Andreas van Cranenburgh] +- Merge pull request #14 from yoav-orca/master. [Andreas van + Cranenburgh] Support building wheels automatically using github actions - Creating github actions for building wheels. [Yoav Alon] @@ -194,3 +347,254 @@ v0.3 (2020-10-27) - Add Python 3 support. [Tarashish Mishra] - Version bump. [Michael Axiak] + +release/0.2.22 (2015-05-15) +--------------------------- +- Version bump. [Michael Axiak] +- Merge pull request #22 from socketpair/release_gil_on_compile. + [Michael Axiak] + + Release GIL during regex compilation. +- Release GIL during regex compilation. [Коренберг Марк] + + (src/re2.cpp is not regenerated in this commit) + + +release/0.2.21 (2015-05-14) +--------------------------- +- Release bump. [Michael Axiak] +- Merge pull request #18 from offlinehacker/master. [Michael Axiak] + + setup.py: Continue with default lib paths if not detected automatically +- Setup.py: Continue with default lib paths if not detected + automatically. [Jaka Hudoklin] +- Fix issue #11. [Michael Axiak] +- Remove spurious print statement. [Michael Axiak] +- Added version check in setup.py to prevent people from shooting + themselves in the foot trying to compile with an old cython version. + [Michael Axiak] + + +release/0.2.20 (2011-11-15) +--------------------------- +- Version bump to 0.2.20. [Michael Axiak] +- Version bump to 0.2.18 and use MANIFEST.in since python broke how + sdist works in 2.7.1 (but fixes it in 2.7.3...) [Michael Axiak] + + +release/0.2.16 (2011-11-08) +--------------------------- + +Fixes +~~~~~ +- Unmatched group span (-1,-1) caused exception in _convert_pos. [Israel + Tsadok] +- Last item in qualified split included the item before last. [Israel + Tsadok] +- Exception in callback would cause a memory leak. [Israel Tsadok] +- Group spans need to be translated to their relative decoded positions. + [Israel Tsadok] +- This is not what verbose means in this context. [Israel Tsadok] +- Findall used group(0) instead of group(1) when there was a group. + [Israel Tsadok] +- Dangling reference when _subn_callback breaks on limit. [Israel + Tsadok] +- Infinite loop in pathological case of findall(".*", "foo") [Israel + Tsadok] + +Other +~~~~~ +- Version bump to 0.2.16. [Michael Axiak] +- Merged itsadok's changes to fix treatment of \D and \W. Added tests to + reflection issue #4. [Michael Axiak] +- Fixed issue #5, support \W, \S and \D. [Israel Tsadok] +- Fixed issue #3, changed code to work with new re2 api. [Michael Axiak] +- Merge branch 'itsadok-master' [Michael Axiak] +- Merge branch 'master' of https://github.com/itsadok/pyre2 into + itsadok-master. [Michael Axiak] +- Failing tests for pos and endpos. [Israel Tsadok] +- Set default notification to FALLBACK_QUIETLY, as per the + documentation. [Israel Tsadok] +- Get rid of deprecation warning. [Israel Tsadok] +- Fix hang on findall('', 'x') [Israel Tsadok] +- Allow weak reference to Pattern object. [Israel Tsadok] +- Allow named groups in span(), convert all unicode positions in one + scan. [Israel Tsadok] +- Added failing test for named groups. [Israel Tsadok] +- Fix lastgroup and lastindex. [Israel Tsadok] +- Verify that flags do not get silently ignored with compiled patterns. + [Israel Tsadok] +- Had to cheat on a test, since we can't support arbitrary bytes. + [Israel Tsadok] +- Fix lastindex. [Israel Tsadok] +- Pass some more tests - added pos, endpos, regs, re attributes to Match + object. [Israel Tsadok] +- Added max_mem parameter, bumped version to 0.2.13. [Michael Axiak] +- Remove spurious get_authors() call. [Michael Axiak] +- Added Alex to the authors file. [Michael Axiak] +- Version bumped to 0.2.11. [Michael Axiak] +- Added difference in version to changelist, added AUTHORS parsing to + setup.py. [Michael Axiak] +- Added check for array, added synonym 'error' for RegexError to help + pass more python tests. [Michael Axiak] +- Made make test run a little nicer. [Michael Axiak] +- Added note about copyright assignment to authors. [Michael Axiak] +- Fix test_re_match. [Israel Tsadok] +- Fix test_bug_1140. [Israel Tsadok] +- Update readme. [Israel Tsadok] +- Preprocess pattern to match re2 quirks. Fixes several bugs. [Israel + Tsadok] +- Re2 doesn't like when you escape non-ascii chars. [Israel Tsadok] +- Merge remote branch 'moreati/master' [Israel Tsadok] +- Merge from axiak/HEAD. [Alex Willmer] +- Merge from axiak/master. [Alex Willmer] +- Pass ErrorBadEscape patterns to re.compile(). Have re.compile() accept + SRE objects. [Alex Willmer] +- Ignore .swp files. [Alex Willmer] +- Remove superfluous differences to axiak/master. [Alex Willmer] +- Merge remote branch 'upstream/master' [Alex Willmer] +- Merge changes from axiak master. [Alex Willmer] +- Fix previous additions to setup.py. [Alex Willmer] +- Add url to setup.py. [Alex Willmer] +- Remove #! line from re.py module, since it isn't a script. [Alex + Willmer] +- Add classifers and long description to setup.py. [Alex Willmer] +- Add MANIFEST.in. [Alex Willmer] +- Merge branch 'master' of git://github.com/facebook/pyre2. [Alex + Willmer] + + Conflicts: + re2.py +- Ignore byte compiled python files. [Alex Willmer] +- Add copyright, license, and three convenience functions to re2.py. + [Alex Willmer] +- Switch re2.h include to angle brackets. [Alex Willmer] +- Handle \n in replace template, since re2 doesn't. [Israel Tsadok] +- Import escape function as it from re. [Israel Tsadok] +- The unicode char classes never worked. TIL testing is important. + [Israel Tsadok] +- Make unicode stickyness rules like in re module. [Israel Tsadok] +- Return an iterator from finditer. [Israel Tsadok] +- Have re.compile() accept SRE objects. (copied from moreati's fork) + [Israel Tsadok] +- Fix var name mixup. [Israel Tsadok] +- Added self to authors. [Israel Tsadok] +- Fix memory leak. [Israel Tsadok] +- Added re unit test from python2.6. [Israel Tsadok] +- Make match group allocation more RAII style. [Israel Tsadok] +- Use None for unused groups in split. [Israel Tsadok] +- Change split to match sre.c implementation, and handle empty matches. + [Israel Tsadok] +- Match delete[] to new[] calls. [Israel Tsadok] +- Added group property to match re API. [Israel Tsadok] +- Use appropriate char classes in case of re.UNICODE. [Israel Tsadok] +- Fall back on re in case of back references. [Israel Tsadok] +- Use unmangled pattern in case of fallback to re. [Israel Tsadok] +- Simplistic cache, copied from re.py. [Israel Tsadok] +- Fix some memory leaks. [Israel Tsadok] +- Allow multiple arguments to group() [Israel Tsadok] +- Allow multiple arguments to group() [Israel Tsadok] +- Fixed path issues. [Michael Axiak] +- Added flags to pattern object, bumped version number. [Michael Axiak] +- Change import path to fix include dirs. [Michael Axiak] +- Updated manifest and changelog. [Michael Axiak] +- Added .expand() to group objects. [Michael Axiak] +- Removed the excessive thanks. [Michael Axiak] +- Added regex module to performance tests. [Michael Axiak] +- Pattern objects should be able to return the input pattern. [Alec + Berryman] + + I'm just storing the original object passed in (found it could be str or + unicode - thanks, unicode tests!). It could be calculated if important + to save space. +- Further findall fix: one-match finds. [Alec Berryman] + + I read the spec more carefully this time. +- Added changelist file for future releases. [Michael Axiak] +- Added alec to AUTHORS. Bumped potential version number. Fixed findall + support to work without closures. [Michael Axiak] +- Make pyre2 64-bit safe. [Alec Berryman] + + Now compiles on a 64-bit system; previously, complained that you might + have a string size that couldn't fit into an int. Py_ssize_t is + required instead of plain size_t because Python's is signed. +- Fix findall behavior to match re. [Alec Berryman] + + findall is to return a list of strings or tuples of strings, while + finditer is to return an iterator of match objects; previously both were + returning lists of match objects. findall was fixed, but finditer is + still returning a list. + + New tests. +- Makefile: test target. [Alec Berryman] +- Note Cython 0.13 dependency. [Alec Berryman] +- Moved to my own license. I can do that since I started the code from + scratch. [Michael Axiak] +- Fix formatting of table. [Michael Axiak] +- Added number of trials, fixed some wording. [Michael Axiak] +- Added missing files to MANIFEST. [Michael Axiak] +- Added performance testing, bumped version number. [Michael Axiak] +- Added readme data. [Michael Axiak] +- Updated installation language. [Michael Axiak] +- Incremented version number. [Michael Axiak] +- Fixed split regression with missing unicode translation. [Michael + Axiak] +- Maybe utf8 works? [Michael Axiak] +- Got unicode to past broken test. [Michael Axiak] +- Added debug message for issue with decoding. [Michael Axiak] +- Added more utf8 stuff. [Michael Axiak] +- Delay group construction for faster match testing. [Michael Axiak] +- Got utf8 support to work. [Michael Axiak] +- Starting to add unicode support... BROKEN. [Michael Axiak] +- Added fallback and notification. [Michael Axiak] +- Added runtime library path to fix /usr/local/ bug for installation of + re2 library. [Michael Axiak] +- Fixed setup and updated readme. [Michael Axiak] +- Added more setup.py antics. [Michael Axiak] +- Added ancillary stuff. [Michael Axiak] +- Added sub, subn, findall, finditer, split. Bam. [Michael Axiak] +- Added finditer along with tests. [Michael Axiak] +- Fix rst. [Michael Axiak] +- Added contact link to readme. [Michael Axiak] +- Fixed formatting for readme. [Michael Axiak] +- Added lastindex, lastgroup, and updated README file. [Michael Axiak] +- Added some simple tests. Got match() to work, named groups to work. + spanning to work. pretty much at the same level as pyre2 proper. + [Michael Axiak] +- Added more things to gitignore, another compile. [Michael Axiak] +- Move code to src directory for better sanity. [Michael Axiak] +- Updated re2 module to use Cython instead, got matching working almost + completely with minimal flag support. [Michael Axiak] +- Suppress logging of failed pattern compilation. [David Reiss] + + This isn't necessary in Python since errors are reported by exceptions. + Use a slightly more verbose form to allow easier setting of more options + later. +- Don't define tp_new for regexps and matches. [David Reiss] + + It turns out that these are not required to use PyObject_New. They are + only required to allow Python code to call the type to create a new + instance, which I don't want to allow. Remove them. +- Fix a segfault by initializing attr_dict to NULL. [David Reiss] + + I thought PyObject_New would call my tp_new, which is set to + PyType_GenericNew, which NULLs out all user-defined fields (actually + memset, but the docs say NULL). However, this appears to not be the + case. Explicitly NULL out attr_dict in create_regexp to avoid segfaults + when compiling a bad pattern. Do it for create_match too for future + safety, even though it is not required with the current code. +- Add struct tags for easier debugging with gdb. [David Reiss] + + See http://sourceware.org/ml/archer/2009-q1/msg00085.html for a little + more information. +- Add some convenience functions to re2.py. [Alex Willmer] +- Add a copyright and license comment to re2.py. [David Reiss] + + This is to prepare for adding some not-totally-trivial code to it. +- Use PEP-8-compliant 4-space indent in re2.py. [David Reiss] +- Use angle-brackets for the re2.h include. [Alex Willmer] +- Add build information to the README. [David Reiss] +- Add contact info to README. [David Reiss] +- Initial commit of pyre2. [David Reiss] + + diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..abc576af --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,19 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 00000000..d806eb7f --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,36 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build +set SPHINXPROJ=MAVConn + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/docs/source/CHANGELOG.rst b/docs/source/CHANGELOG.rst new file mode 120000 index 00000000..bfa394db --- /dev/null +++ b/docs/source/CHANGELOG.rst @@ -0,0 +1 @@ +../../CHANGELOG.rst \ No newline at end of file diff --git a/docs/source/README.rst b/docs/source/README.rst new file mode 120000 index 00000000..c768ff7d --- /dev/null +++ b/docs/source/README.rst @@ -0,0 +1 @@ +../../README.rst \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 00000000..acd4b358 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,181 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys + +if sys.version_info < (3, 8): + from importlib_metadata import version +else: + from importlib.metadata import version + + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))) + +# -- Project information ----------------------------------------------------- + +project = 'pyre2' +copyright = '2024, Andreas van Cranenburgh' +author = 'Andreas van Cranenburgh' + +# The full version, including alpha/beta/rc tags +release = version('pyre2') +# The short X.Y version. +version = '.'.join(release.split('.')[:2]) + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx_git', + 'sphinxcontrib.apidoc', + 'sphinx.ext.autodoc', + 'sphinx.ext.doctest', + 'sphinx.ext.intersphinx', + 'sphinx.ext.todo', + 'sphinx.ext.coverage', + 'sphinx.ext.viewcode', +] + +apidoc_module_dir = '../../src/' +apidoc_output_dir = 'api' +apidoc_excluded_paths = ['tests'] +apidoc_separate_modules = True + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# Brief project description +# +description = 'Python RE2 wrapper for linear-time regular expressions' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = 'en' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This patterns also effect to html_static_path and html_extra_path +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = True + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +#html_static_path = ['_static'] + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# This is required for the alabaster theme +# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars +html_sidebars = { + '**': [ + 'relations.html', # needs 'show_related': True theme option to display + 'searchbox.html', + ] +} + + +# -- Options for HTMLHelp output ------------------------------------------ + +# Output file base name for HTML help builder. +htmlhelp_basename = 'pyre2doc' + + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'pyre2.tex', 'pyre2 Documentation', + [author], 'manual'), +] + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'pyre2', 'pyre2 Documentation', + [author], 1) +] + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'pyre2', 'pyre2 Documentation', + [author], 'pyre2', description, + 'Miscellaneous'), +] + diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 00000000..9d0fde97 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,24 @@ +Welcome to the Pyre2 documentation! +=================================== + +.. git_commit_detail:: + :branch: + :commit: + :sha_length: 10 + :uncommitted: + :untracked: + +.. toctree:: + :caption: Contents: + :maxdepth: 3 + + README + api/modules + CHANGELOG + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` diff --git a/setup.cfg b/setup.cfg index b086a46a..0bd1af2f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,15 +25,18 @@ setup_requires = setuptools_scm[toml] [options.extras_require] +doc = + sphinx + sphinx_git + sphinx_rtd_theme + sphinxcontrib-apidoc + test = pytest perf = regex -[aliases] -test=pytest - [check] metadata = true restructuredtext = true diff --git a/tox.ini b/tox.ini index 92528038..428c1cb4 100644 --- a/tox.ini +++ b/tox.ini @@ -80,9 +80,8 @@ setenv = deps = {[base]deps} - cython - -r requirements-dev.txt - -e . + #-r requirements-dev.txt + -e .[test] commands = # this is deprecated => _DeprecatedInstaller warning from setuptools @@ -108,6 +107,34 @@ deps = commands = python tests/performance.py +[testenv:{docs,ldocs,cdocs}] +# these tox env cmds share a virtual env using the following plugin +# https://github.com/masenf/tox-ignore-env-name-mismatch +envdir = {toxworkdir}/docs +runner = ignore_env_name_mismatch +skip_install = true + +description = + docs: Build the docs using sphinx + ldocs: Lint the docs (mainly link checking) + cdocs: Clean the docs build artifacts + changes: Generate full or partial changelog; use git delta syntax for changes-since + +allowlist_externals = + make + bash + +deps = + {[base]deps} + gitchangelog @ https://github.com/sarnold/gitchangelog/releases/download/3.2.0/gitchangelog-3.2.0.tar.gz + -e .[doc] # using editable here is the "best" equivalent to build_ext --inplace + +commands = + docs: make -C docs html + ldocs: make -C docs linkcheck + cdocs: make -C docs clean + changes: bash -c 'gitchangelog {posargs} > CHANGELOG.rst' + [testenv:build] passenv = pythonLocation diff --git a/toxfile.py b/toxfile.py new file mode 100644 index 00000000..ae19a7b6 --- /dev/null +++ b/toxfile.py @@ -0,0 +1,77 @@ +""" +https://github.com/masenf/tox-ignore-env-name-mismatch + +MIT License +Copyright (c) 2023 Masen Furer +""" +from contextlib import contextmanager +from typing import Any, Iterator, Optional, Sequence, Tuple + +from tox.plugin import impl +from tox.tox_env.api import ToxEnv +from tox.tox_env.info import Info +from tox.tox_env.python.virtual_env.runner import VirtualEnvRunner +from tox.tox_env.register import ToxEnvRegister + + +class FilteredInfo(Info): + """Subclass of Info that optionally filters specific keys during compare().""" + + def __init__( + self, + *args: Any, + filter_keys: Optional[Sequence[str]] = None, + filter_section: Optional[str] = None, + **kwargs: Any, + ): + """ + :param filter_keys: key names to pop from value + :param filter_section: if specified, only pop filter_keys when the compared section matches + + All other args and kwargs are passed to super().__init__ + """ + self.filter_keys = filter_keys + self.filter_section = filter_section + super().__init__(*args, **kwargs) + + @contextmanager + def compare( + self, + value: Any, + section: str, + sub_section: Optional[str] = None, + ) -> Iterator[Tuple[bool, Optional[Any]]]: + """Perform comparison and update cached info after filtering `value`.""" + if self.filter_section is None or section == self.filter_section: + try: + value = value.copy() + except AttributeError: # pragma: no cover + pass + else: + for fkey in self.filter_keys or []: + value.pop(fkey, None) + with super().compare(value, section, sub_section) as rv: + yield rv + + +class IgnoreEnvNameMismatchVirtualEnvRunner(VirtualEnvRunner): + """EnvRunner that does NOT save the env name as part of the cached info.""" + + @staticmethod + def id() -> str: + return "ignore_env_name_mismatch" + + @property + def cache(self) -> Info: + """Return a modified Info class that does NOT pass "name" key to `Info.compare`.""" + return FilteredInfo( + self.env_dir, + filter_keys=["name"], + filter_section=ToxEnv.__name__, + ) + + +@impl +def tox_register_tox_env(register: ToxEnvRegister) -> None: + """tox4 entry point: add IgnoreEnvNameMismatchVirtualEnvRunner to registry.""" + register.add_run_env(IgnoreEnvNameMismatchVirtualEnvRunner) From 4a38e786ad3eef866b21c279969faf943b744d2d Mon Sep 17 00:00:00 2001 From: Stephen Arnold <nerdboy@gentoo.org> Date: Tue, 10 Sep 2024 14:20:19 -0700 Subject: [PATCH 112/114] chg: add gh workflow for sphinx build/deploy * cleanup docs config, remove dicey sphinx_git extension * switch readme badge, download wheel artifacts to single directory Signed-off-by: Stephen Arnold <nerdboy@gentoo.org> --- .github/workflows/main.yml | 3 ++- .github/workflows/sphinx.yml | 49 ++++++++++++++++++++++++++++++++++++ README.rst | 4 +-- docs/source/conf.py | 2 -- docs/source/index.rst | 7 ------ setup.cfg | 1 - 6 files changed, 53 insertions(+), 13 deletions(-) create mode 100644 .github/workflows/sphinx.yml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1ec3f0cf..e8f56f0a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -106,8 +106,9 @@ jobs: steps: - uses: actions/download-artifact@v4 with: + merge-multiple: true path: artifacts # note wheels should be in subdirectories named <artifact_name> - name: Check number of downloaded artifacts - run: ls -l artifacts/* + run: ls -l artifacts/ diff --git a/.github/workflows/sphinx.yml b/.github/workflows/sphinx.yml new file mode 100644 index 00000000..02a9dbf8 --- /dev/null +++ b/.github/workflows/sphinx.yml @@ -0,0 +1,49 @@ +name: Docs +on: + workflow_dispatch: + pull_request: + push: + branches: + - master + +jobs: + build: + runs-on: ubuntu-22.04 + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v5 + with: + python-version: '3.9' + + - name: Add python requirements + run: | + python -m pip install --upgrade pip + pip install tox + + - name: Install Ubuntu build deps + run: | + sudo apt-get -qq update + sudo apt-get install -y libre2-dev + + - name: Build docs + run: | + tox -e ldocs,docs + + - uses: actions/upload-artifact@v4 + with: + name: ApiDocsHTML + path: "docs/_build/html/" + + - name: set nojekyll for github + run: | + sudo touch docs/_build/html/.nojekyll + + - name: Deploy docs to gh-pages + if: ${{ github.event_name == 'push' }} + uses: JamesIves/github-pages-deploy-action@v4 + with: + folder: docs/_build/html/ diff --git a/README.rst b/README.rst index e939dfdc..8379036f 100644 --- a/README.rst +++ b/README.rst @@ -18,8 +18,8 @@ :target: https://badge.fury.io/py/pyre2 :alt: Pypi version -.. image:: https://github.com/andreasvc/pyre2/workflows/Conda/badge.svg - :target: https://github.com/andreasvc/pyre2/actions?query=workflow:Conda +.. image:: https://github.com/andreasvc/pyre2/actions/workflows/conda-dev.yml/badge.svg + :target: https://github.com/andreasvc/pyre2/actions/workflows/conda-dev.yml :alt: Conda CI Status .. image:: https://img.shields.io/github/license/andreasvc/pyre2 diff --git a/docs/source/conf.py b/docs/source/conf.py index acd4b358..3b7cbf45 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -42,9 +42,7 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx_git', 'sphinxcontrib.apidoc', - 'sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', diff --git a/docs/source/index.rst b/docs/source/index.rst index 9d0fde97..00061805 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,13 +1,6 @@ Welcome to the Pyre2 documentation! =================================== -.. git_commit_detail:: - :branch: - :commit: - :sha_length: 10 - :uncommitted: - :untracked: - .. toctree:: :caption: Contents: :maxdepth: 3 diff --git a/setup.cfg b/setup.cfg index 0bd1af2f..020d563f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -27,7 +27,6 @@ setup_requires = [options.extras_require] doc = sphinx - sphinx_git sphinx_rtd_theme sphinxcontrib-apidoc From 772faa1b9621caff74bb9c1fa62871bb0870918e Mon Sep 17 00:00:00 2001 From: Stephen Arnold <nerdboy@gentoo.org> Date: Tue, 10 Sep 2024 14:52:55 -0700 Subject: [PATCH 113/114] fix: dev: update release workflow for new platform wheels * also cleanup sphinx workflow Signed-off-by: Stephen Arnold <nerdboy@gentoo.org> --- .github/workflows/release.yml | 79 +++++++++++++++++++++-------------- .github/workflows/sphinx.yml | 9 ++++ 2 files changed, 57 insertions(+), 31 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 3fd96e07..bc9c7ea3 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,7 +12,23 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-22.04, macos-latest, windows-latest] + include: + - os: "ubuntu-22.04" + arch: "x86_64" + - os: "ubuntu-22.04" + arch: "aarch64" + - os: "macos-13" + arch: "x86_64" + macosx_deployment_target: "13.0" + - os: "macos-14" + arch: "arm64" + macosx_deployment_target: "14.0" + - os: "windows-latest" + arch: "auto64" + triplet: "x64-windows" + - os: "windows-latest" + arch: "auto32" + triplet: "x86-windows" steps: - uses: actions/checkout@v4 @@ -22,43 +38,41 @@ jobs: - uses: actions/setup-python@v5 name: Install Python with: - python-version: '3.8' + python-version: '3.9' - - name: Prepare compiler environment for Windows - if: runner.os == 'Windows' - uses: ilammy/msvc-dev-cmd@v1 + - name: Set up QEMU + if: runner.os == 'Linux' + uses: docker/setup-qemu-action@v3 with: - arch: amd64 - - - name: Install cibuildwheel - run: | - python -m pip install --upgrade pip - python -m pip install -r requirements-cibw.txt + platforms: all - name: Build wheels + uses: pypa/cibuildwheel@v2.20 env: - CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/manylinux2010_x86_64:latest - CIBW_MANYLINUX_I686_IMAGE: quay.io/pypa/manylinux2010_i686:latest - CIBW_BUILD: cp38-* cp39-* cp310-* cp311-* - CIBW_SKIP: "*-win32" + # configure cibuildwheel to build native archs ('auto'), and some + # emulated ones, plus cross-compile on macos + CIBW_ARCHS: ${{ matrix.arch }} + CIBW_TEST_SKIP: "*_arm64 *universal2:arm64 *linux_i686" + CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014 + CIBW_MANYLINUX_I686_IMAGE: manylinux2010 + CIBW_BUILD: cp38-* cp39-* cp310-* cp311-* cp312-* + CIBW_SKIP: "*musllinux* *i686" CIBW_BEFORE_ALL_LINUX: > - yum -y -q --enablerepo=extras install epel-release - && yum install -y re2-devel ninja-build - CIBW_REPAIR_WHEEL_COMMAND_LINUX: "auditwheel show {wheel} && auditwheel repair -w {dest_dir} {wheel}" + yum -y update && yum -y install epel-release && yum install -y re2-devel ninja-build CIBW_BEFORE_ALL_MACOS: > - brew install re2 pybind11 ninja - CIBW_ENVIRONMENT_MACOS: MACOSX_DEPLOYMENT_TARGET=10.09 - CIBW_REPAIR_WHEEL_COMMAND_MACOS: "pip uninstall -y delocate && pip install git+https://github.com/Chia-Network/delocate.git && delocate-listdeps {wheel} && delocate-wheel -w {dest_dir} -v {wheel}" + brew install re2 pybind11 + # macos target should be at least 10.13 to get full c++17 + CIBW_ENVIRONMENT_MACOS: MACOSX_DEPLOYMENT_TARGET=${{ matrix.macosx_deployment_target }} CIBW_BEFORE_ALL_WINDOWS: > - vcpkg install re2:x64-windows + vcpkg install pkgconf:${{ matrix.triplet }} re2:${{ matrix.triplet }} && vcpkg integrate install CIBW_ENVIRONMENT_WINDOWS: 'CMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake' - CIBW_TEST_COMMAND: python -c "import re2" - run: | - python -m cibuildwheel --output-dir wheelhouse + CIBW_TEST_REQUIRES: "" + CIBW_TEST_COMMAND: "" - uses: actions/upload-artifact@v4 with: + name: wheels-${{ matrix.os }}-${{ matrix.arch }} path: ./wheelhouse/*.whl build_sdist: @@ -70,12 +84,12 @@ jobs: - uses: actions/setup-python@v5 name: Install Python with: - python-version: '3.8' + python-version: '3.9' - name: Build sdist run: | - pip install pep517 - python -m pep517.build -s . + pip install build + python -m build -s . - uses: actions/upload-artifact@v4 with: @@ -99,10 +113,13 @@ jobs: - uses: actions/setup-python@v5 name: Install Python with: - python-version: 3.7 + python-version: 3.9 - # download all artifacts to project dir + # download all artifacts to artifacts dir - uses: actions/download-artifact@v4 + with: + merge-multiple: true + path: artifacts - name: Generate changes file uses: sarnold/gitchangelog-action@master @@ -121,7 +138,7 @@ jobs: draft: false prerelease: false # uncomment below to upload wheels to github releases - files: dist/cibw-wheels/pyre2*.whl + files: artifacts/pyre2* - uses: pypa/gh-action-pypi-publish@master if: ${{ github.actor == github.repository_owner && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/sphinx.yml b/.github/workflows/sphinx.yml index 02a9dbf8..6d290bdc 100644 --- a/.github/workflows/sphinx.yml +++ b/.github/workflows/sphinx.yml @@ -5,6 +5,8 @@ on: push: branches: - master +permissions: + contents: write jobs: build: @@ -15,6 +17,11 @@ jobs: with: fetch-depth: 0 + - name: Install Ubuntu build deps + run: | + sudo apt-get -qq update + sudo apt-get install -y libre2-dev + - uses: actions/setup-python@v5 with: python-version: '3.9' @@ -46,4 +53,6 @@ jobs: if: ${{ github.event_name == 'push' }} uses: JamesIves/github-pages-deploy-action@v4 with: + branch: gh-pages folder: docs/_build/html/ + single-commit: true From ba4931b499c09af3741b14f5abf5eb01e1e2c736 Mon Sep 17 00:00:00 2001 From: Stephen Arnold <nerdboy@gentoo.org> Date: Tue, 10 Sep 2024 16:55:04 -0700 Subject: [PATCH 114/114] chg: dev: minor workflow changes to kickstart deployment Signed-off-by: Stephen Arnold <nerdboy@gentoo.org> --- .github/workflows/sphinx.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/sphinx.yml b/.github/workflows/sphinx.yml index 6d290bdc..46811883 100644 --- a/.github/workflows/sphinx.yml +++ b/.github/workflows/sphinx.yml @@ -5,6 +5,7 @@ on: push: branches: - master + permissions: contents: write @@ -47,7 +48,7 @@ jobs: - name: set nojekyll for github run: | - sudo touch docs/_build/html/.nojekyll + touch docs/_build/html/.nojekyll - name: Deploy docs to gh-pages if: ${{ github.event_name == 'push' }}