You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
When building the latest stable release (5.0.2) on Fedora, test_strings.py::test_string_split fails with re.error: global flags not at the start of the expression at position 1.
After it fails, you can use mock -r fedora-rawhide-x86_64 --shell --enable-network to get an interactive shell inside the build environment to troubleshoot (use dnf install to get any additional packages you might need).
Output Given
=================================== FAILURES ===================================
______________________________ test_string_split _______________________________
def test_string_split():
for str_expr, str_expected in (
('StringSplit["a bbb cccc aa d"]', "{a, bbb, cccc, aa, d}"),
('StringSplit["a--bbb---ccc--dddd", "--"]', "{a, bbb, -ccc, dddd}"),
('StringSplit["the cat in the hat"]', "{the, cat, in, the, hat}"),
('StringSplit["192.168.0.1", "."]', "{192, 168, 0, 1}"),
('StringSplit["123 2.3 4 6", WhitespaceCharacter ..]', "{123, 2.3, 4, 6}"),
(
'StringSplit[StringSplit["11:12:13//21:22:23//31:32:33", "//"], ":"]',
"{{11, 12, 13}, {21, 22, 23}, {31, 32, 33}}",
),
(
'StringSplit["A tree, an apple, four pears. And more: two sacks", RegularExpression["\\W+"]]',
"{A, tree, an, apple, four, pears, And, more, two, sacks}",
),
(
'StringSplit["primes: 2 two 3 three 5 five ...", Whitespace ~~ RegularExpression["\\d"] ~~ Whitespace]',
"{primes:, two, three, five ...}",
),
('StringSplit["a-b:c-d:e-f-g", {":", "-"}]', "{a, b, c, d, e, f, g}"),
('StringSplit["a-b:c-d:e-f-g", ":" | "-"]', "{a, b, c, d, e, f, g}"),
(
'StringSplit[{"a:b:c:d", "listable:element"}, ":"]',
"{{a, b, c, d}, {listable, element}}",
),
(
'StringSplit["cat Cat hat CAT", "c", IgnoreCase -> True]',
"{at , at hat , AT}",
),
(
'StringSplit["This is a sentence, which goes on.", Except[WordCharacter] ..]',
"{This, is, a, sentence, which, goes, on}",
)
# # FIXME: these forms are not implemented yet:
# ('StringSplit["11a22b3", _?LetterQ]', '{11, 22, 3}'),
# ('StringSplit["a b::c d::e f g", "::" -> "--"]'), '{a, b, --, c d, --, e f g}'),
# ('StringSplit["a--b c--d e", x : "--" :> x]', {a, --, b c, --, d e}),
# ('StringSplit[":a:b:c:", ":", All]', '{"", "a", "b", "c", ""}'),
):
> check_evaluation(
str_expr,
str_expected,
to_string_expr=True,
hold_expected=True,
to_string_expected=True,
)
test/test_strings.py:70:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
test/helper.py:65: in check_evaluation
result = evaluate_value(str_expr)
test/helper.py:15: in evaluate_value
return session.evaluate(str_expr).value
mathics/session.py:75: in evaluate
self.last_result = expr.evaluate(self.evaluation)
mathics/core/expression.py:454: in evaluate
expr, reevaluate = expr.rewrite_apply_eval_step(evaluation)
mathics/core/expression.py:1041: in rewrite_apply_eval_step
eval_elements()
mathics/core/expression.py:1027: in eval_elements
eval_range(range(len(elements)))
mathics/core/expression.py:995: in eval_range
new_value = element.evaluate(evaluation)
mathics/core/expression.py:454: in evaluate
expr, reevaluate = expr.rewrite_apply_eval_step(evaluation)
mathics/core/expression.py:1201: in rewrite_apply_eval_step
result = rule.apply(new, evaluation, fully=False)
mathics/core/rules.py:82: in apply
self.pattern.match(yield_match, expression, {}, evaluation, fully=fully)
mathics/core/pattern.py:351: in match
self.head.match(yield_head, expression.get_head(), vars, evaluation)
mathics/core/pattern.py:193: in match_symbol
yield_func(vars, None)
mathics/core/pattern.py:341: in yield_head
self.get_pre_choices(
mathics/core/pattern.py:489: in get_pre_choices
yield_func(vars)
mathics/core/pattern.py:318: in yield_choice
self.match_element(
mathics/core/pattern.py:702: in match_element
self.get_wrappings(
mathics/core/pattern.py:518: in get_wrappings
yield_func(items[0])
mathics/core/pattern.py:690: in yield_wrapping
element.match(
mathics/builtin/patterns.py:1023: in match
self.pattern.match(yield_func, expression, new_vars, evaluation)
mathics/builtin/patterns.py:1228: in match
yield_func(vars, None)
mathics/core/pattern.py:670: in match_yield
self.match_element(
mathics/core/pattern.py:702: in match_element
self.get_wrappings(
mathics/core/pattern.py:518: in get_wrappings
yield_func(items[0])
mathics/core/pattern.py:690: in yield_wrapping
element.match(
mathics/builtin/patterns.py:1023: in match
self.pattern.match(yield_func, expression, new_vars, evaluation)
mathics/builtin/patterns.py:1228: in match
yield_func(vars, None)
mathics/core/pattern.py:670: in match_yield
self.match_element(
mathics/core/pattern.py:702: in match_element
self.get_wrappings(
mathics/core/pattern.py:529: in get_wrappings
yield_func(sequence)
mathics/core/pattern.py:690: in yield_wrapping
element.match(
mathics/builtin/patterns.py:1646: in match
yield_func(new_vars, None)
mathics/core/pattern.py:687: in match_yield
yield_func(new_vars, items_rest)
mathics/core/pattern.py:663: in element_yield
yield_func(
mathics/core/pattern.py:663: in element_yield
yield_func(
mathics/core/rules.py:54: in yield_match
new_expression = self.do_replace(expression, vars, options, evaluation)
mathics/core/rules.py:213: in do_replace
return self.function(evaluation=evaluation, options=options, **vars_noctx)
mathics/builtin/string/operations.py:1043: in apply
result = [t for s in result for t in mathics_split(re_patt, s, flags=flags)]
mathics/builtin/string/operations.py:1043: in <listcomp>
result = [t for s in result for t in mathics_split(re_patt, s, flags=flags)]
mathics/builtin/atomic/strings.py:322: in mathics_split
indices = list((m.start(), m.end()) for m in re.finditer(patt, string, flags))
/usr/lib64/python3.11/re/__init__.py:223: in finditer
return _compile(pattern, flags).finditer(string)
/usr/lib64/python3.11/re/__init__.py:294: in _compile
p = _compiler.compile(pattern, flags)
/usr/lib64/python3.11/re/_compiler.py:743: in compile
p = _parser.parse(p, flags)
/usr/lib64/python3.11/re/_parser.py:980: in parse
p = _parse_sub(source, state, flags & SRE_FLAG_VERBOSE, 0)
/usr/lib64/python3.11/re/_parser.py:455: in _parse_sub
itemsappend(_parse(source, state, verbose, nested + 1,
/usr/lib64/python3.11/re/_parser.py:863: in _parse
p = _parse_sub(source, state, sub_verbose, nested + 1)
/usr/lib64/python3.11/re/_parser.py:455: in _parse_sub
itemsappend(_parse(source, state, verbose, nested + 1,
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
source = <re._parser.Tokenizer object at 0x7fce1a893c50>
state = <re._parser.State object at 0x7fce1a91a790>, verbose = 0, nested = 3
first = False
def _parse(source, state, verbose, nested, first=False):
# parse a simple pattern
subpattern = SubPattern(state)
# precompute constants into local variables
subpatternappend = subpattern.append
sourceget = source.get
sourcematch = source.match
_len = len
_ord = ord
while True:
this = source.next
if this is None:
break # end of pattern
if this in "|)":
break # end of subpattern
sourceget()
if verbose:
# skip whitespace and comments
if this in WHITESPACE:
continue
if this == "#":
while True:
this = sourceget()
if this is None or this == "\n":
break
continue
if this[0] == "\\":
code = _escape(source, this, state)
subpatternappend(code)
elif this not in SPECIAL_CHARS:
subpatternappend((LITERAL, _ord(this)))
elif this == "[":
here = source.tell() - 1
# character set
set = []
setappend = set.append
## if sourcematch(":"):
## pass # handle character classes
if source.next == '[':
import warnings
warnings.warn(
'Possible nested set at position %d' % source.tell(),
FutureWarning, stacklevel=nested + 6
)
negate = sourcematch("^")
# check remaining characters
while True:
this = sourceget()
if this is None:
raise source.error("unterminated character set",
source.tell() - here)
if this == "]" and set:
break
elif this[0] == "\\":
code1 = _class_escape(source, this)
else:
if set and this in '-&~|' and source.next == this:
import warnings
warnings.warn(
'Possible set %s at position %d' % (
'difference' if this == '-' else
'intersection' if this == '&' else
'symmetric difference' if this == '~' else
'union',
source.tell() - 1),
FutureWarning, stacklevel=nested + 6
)
code1 = LITERAL, _ord(this)
if sourcematch("-"):
# potential range
that = sourceget()
if that is None:
raise source.error("unterminated character set",
source.tell() - here)
if that == "]":
if code1[0] is IN:
code1 = code1[1][0]
setappend(code1)
setappend((LITERAL, _ord("-")))
break
if that[0] == "\\":
code2 = _class_escape(source, that)
else:
if that == '-':
import warnings
warnings.warn(
'Possible set difference at position %d' % (
source.tell() - 2),
FutureWarning, stacklevel=nested + 6
)
code2 = LITERAL, _ord(that)
if code1[0] != LITERAL or code2[0] != LITERAL:
msg = "bad character range %s-%s" % (this, that)
raise source.error(msg, len(this) + 1 + len(that))
lo = code1[1]
hi = code2[1]
if hi < lo:
msg = "bad character range %s-%s" % (this, that)
raise source.error(msg, len(this) + 1 + len(that))
setappend((RANGE, (lo, hi)))
else:
if code1[0] is IN:
code1 = code1[1][0]
setappend(code1)
set = _uniq(set)
# XXX: <fl> should move set optimization to compiler!
if _len(set) == 1 and set[0][0] is LITERAL:
# optimization
if negate:
subpatternappend((NOT_LITERAL, set[0][1]))
else:
subpatternappend(set[0])
else:
if negate:
set.insert(0, (NEGATE, None))
# charmap optimization can't be added here because
# global flags still are not known
subpatternappend((IN, set))
elif this in REPEAT_CHARS:
# repeat previous item
here = source.tell()
if this == "?":
min, max = 0, 1
elif this == "*":
min, max = 0, MAXREPEAT
elif this == "+":
min, max = 1, MAXREPEAT
elif this == "{":
if source.next == "}":
subpatternappend((LITERAL, _ord(this)))
continue
min, max = 0, MAXREPEAT
lo = hi = ""
while source.next in DIGITS:
lo += sourceget()
if sourcematch(","):
while source.next in DIGITS:
hi += sourceget()
else:
hi = lo
if not sourcematch("}"):
subpatternappend((LITERAL, _ord(this)))
source.seek(here)
continue
if lo:
min = int(lo)
if min >= MAXREPEAT:
raise OverflowError("the repetition number is too large")
if hi:
max = int(hi)
if max >= MAXREPEAT:
raise OverflowError("the repetition number is too large")
if max < min:
raise source.error("min repeat greater than max repeat",
source.tell() - here)
else:
raise AssertionError("unsupported quantifier %r" % (char,))
# figure out which item to repeat
if subpattern:
item = subpattern[-1:]
else:
item = None
if not item or item[0][0] is AT:
raise source.error("nothing to repeat",
source.tell() - here + len(this))
if item[0][0] in _REPEATCODES:
raise source.error("multiple repeat",
source.tell() - here + len(this))
if item[0][0] is SUBPATTERN:
group, add_flags, del_flags, p = item[0][1]
if group is None and not add_flags and not del_flags:
item = p
if sourcematch("?"):
# Non-Greedy Match
subpattern[-1] = (MIN_REPEAT, (min, max, item))
elif sourcematch("+"):
# Possessive Match (Always Greedy)
subpattern[-1] = (POSSESSIVE_REPEAT, (min, max, item))
else:
# Greedy Match
subpattern[-1] = (MAX_REPEAT, (min, max, item))
elif this == ".":
subpatternappend((ANY, None))
elif this == "(":
start = source.tell() - 1
capture = True
atomic = False
name = None
add_flags = 0
del_flags = 0
if sourcematch("?"):
# options
char = sourceget()
if char is None:
raise source.error("unexpected end of pattern")
if char == "P":
# python extensions
if sourcematch("<"):
# named group: skip forward to end of name
name = source.getuntil(">", "group name")
source.checkgroupname(name, 1, nested)
elif sourcematch("="):
# named backreference
name = source.getuntil(")", "group name")
source.checkgroupname(name, 1, nested)
gid = state.groupdict.get(name)
if gid is None:
msg = "unknown group name %r" % name
raise source.error(msg, len(name) + 1)
if not state.checkgroup(gid):
raise source.error("cannot refer to an open group",
len(name) + 1)
state.checklookbehindgroup(gid, source)
subpatternappend((GROUPREF, gid))
continue
else:
char = sourceget()
if char is None:
raise source.error("unexpected end of pattern")
raise source.error("unknown extension ?P" + char,
len(char) + 2)
elif char == ":":
# non-capturing group
capture = False
elif char == "#":
# comment
while True:
if source.next is None:
raise source.error("missing ), unterminated comment",
source.tell() - start)
if sourceget() == ")":
break
continue
elif char in "=!<":
# lookahead assertions
dir = 1
if char == "<":
char = sourceget()
if char is None:
raise source.error("unexpected end of pattern")
if char not in "=!":
raise source.error("unknown extension ?<" + char,
len(char) + 2)
dir = -1 # lookbehind
lookbehindgroups = state.lookbehindgroups
if lookbehindgroups is None:
state.lookbehindgroups = state.groups
p = _parse_sub(source, state, verbose, nested + 1)
if dir < 0:
if lookbehindgroups is None:
state.lookbehindgroups = None
if not sourcematch(")"):
raise source.error("missing ), unterminated subpattern",
source.tell() - start)
if char == "=":
subpatternappend((ASSERT, (dir, p)))
else:
subpatternappend((ASSERT_NOT, (dir, p)))
continue
elif char == "(":
# conditional backreference group
condname = source.getuntil(")", "group name")
if condname.isidentifier():
source.checkgroupname(condname, 1, nested)
condgroup = state.groupdict.get(condname)
if condgroup is None:
msg = "unknown group name %r" % condname
raise source.error(msg, len(condname) + 1)
else:
try:
condgroup = int(condname)
if condgroup < 0:
raise ValueError
except ValueError:
msg = "bad character in group name %r" % condname
raise source.error(msg, len(condname) + 1) from None
if not condgroup:
raise source.error("bad group number",
len(condname) + 1)
if condgroup >= MAXGROUPS:
msg = "invalid group reference %d" % condgroup
raise source.error(msg, len(condname) + 1)
if condgroup not in state.grouprefpos:
state.grouprefpos[condgroup] = (
source.tell() - len(condname) - 1
)
if not (condname.isdecimal() and condname.isascii()):
import warnings
warnings.warn(
"bad character in group name %s at position %d" %
(repr(condname) if source.istext else ascii(condname),
source.tell() - len(condname) - 1),
DeprecationWarning, stacklevel=nested + 6
)
state.checklookbehindgroup(condgroup, source)
item_yes = _parse(source, state, verbose, nested + 1)
if source.match("|"):
item_no = _parse(source, state, verbose, nested + 1)
if source.next == "|":
raise source.error("conditional backref with more than two branches")
else:
item_no = None
if not source.match(")"):
raise source.error("missing ), unterminated subpattern",
source.tell() - start)
subpatternappend((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
continue
elif char == ">":
# non-capturing, atomic group
capture = False
atomic = True
elif char in FLAGS or char == "-":
# flags
flags = _parse_flags(source, state, char)
if flags is None: # global flags
if not first or subpattern:
> raise source.error('global flags not at the start '
'of the expression',
source.tell() - start)
E re.error: global flags not at the start of the expression at position 1
/usr/lib64/python3.11/re/_parser.py:841: error
----------------------------- Captured stdout call -----------------------------
Wed Jan 11 08:27:03 2023
('{a, bbb, cccc, aa, d}', '{a, bbb, cccc, aa, d}')
Wed Jan 11 08:27:03 2023
('{a, bbb, -ccc, dddd}', '{a, bbb, -ccc, dddd}')
Wed Jan 11 08:27:03 2023
('{the, cat, in, the, hat}', '{the, cat, in, the, hat}')
Wed Jan 11 08:27:03 2023
('{192, 168, 0, 1}', '{192, 168, 0, 1}')
=============================== warnings summary ===============================
../../../../usr/lib/python3.11/site-packages/pint/registry.py:539
../../../../usr/lib/python3.11/site-packages/pint/registry.py:539
/usr/lib/python3.11/site-packages/pint/registry.py:539: DeprecationWarning: read_binary is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice.
rbytes = importlib_resources.read_binary(__package__, file)
mathics/core/parser/convert.py:31: 8 warnings
test/test_context.py: 1 warning
test/test_structure.py: 1 warning
test/builtin/test_assignment.py: 5 warnings
test/builtin/colors/test_colors.py: 3 warnings
test/builtin/numbers/test_linalg.py: 1 warning
test/core/test_arithmetic.py: 6 warnings
test/package/test_combinatorica.py: 1 warning
/builddir/build/BUILD/Mathics3-5.0.2/mathics/core/parser/convert.py:31: DeprecationWarning: invalid escape sequence '\!'
return s.encode("raw_unicode_escape").decode("unicode_escape")
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
=========================== short test summary info ============================
FAILED test/test_strings.py::test_string_split - re.error: global flags not a...
= 1 failed, 1104 passed, 25 skipped, 7 xfailed, 28 warnings in 89.85s (0:01:29) =
Expected behavior
The test should not fail
Your Environment
I can repro this reliably on Fedora Rawhide, which currently ships with Python 3.11.1 and Cython 0.29.32 (though I've verified this fails in the same way with and without Cython).
Workarounds
Disabling the test with --deselect=test/test_strings.py::test_string_split works, but that just papers over the problem
This is still failing in the same way with 6.0.4 on Fedora Linux 38, which ships with Python 3.11.7. I can't test on more recent versions as Fedora switched to 3.12 and that's not supported yet (#932).
The merge that addresses this issue went in May 24, 2023. It is not in any of the 6.x branches. It is only in current master which will be in the upcoming 7.0 release.
To see this addressed, you'd have to build from current master sources, not any previously-released sources.
After the upcoming 7.0 release, (which drops Python 3.6 support) we'll start to address Python 3.12 (and probably drop Python 3.7 support).
Description
When building the latest stable release (5.0.2) on Fedora,
test_strings.py::test_string_split
fails withre.error: global flags not at the start of the expression at position 1
.How to Reproduce
On a system with mock installed:
After it fails, you can use
mock -r fedora-rawhide-x86_64 --shell --enable-network
to get an interactive shell inside the build environment to troubleshoot (usednf install
to get any additional packages you might need).Output Given
Expected behavior
The test should not fail
Your Environment
I can repro this reliably on Fedora Rawhide, which currently ships with Python 3.11.1 and Cython 0.29.32 (though I've verified this fails in the same way with and without Cython).
Workarounds
Disabling the test with
--deselect=test/test_strings.py::test_string_split
works, but that just papers over the problemPriority
It's blocking the inclusion of Mathics in Fedora.
Additional context
This came up during the Fedora package review for Mathics.
The text was updated successfully, but these errors were encountered: