Skip to content

Commit

Permalink
Fix Read{,List}[] with TokenWord and improve ...
Browse files Browse the repository at this point in the history
* Read{,List}[] with TokenWord needs to return two tokens sometimes
* Adjust error message the right value of Read or ReadList is used.
* simplify logic in eval/files_io/read.py
* Add data_dir variable that can be used in testing.
  • Loading branch information
rocky committed Sep 29, 2024
1 parent a551242 commit 38de43b
Show file tree
Hide file tree
Showing 7 changed files with 140 additions and 50 deletions.
76 changes: 62 additions & 14 deletions mathics/builtin/files_io/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
MathicsOpen,
channel_to_stream,
close_stream,
parse_read_options,
read_name_and_stream,
)
from mathics.eval.makeboxes import do_format, format_element
Expand Down Expand Up @@ -703,7 +704,9 @@ def eval_default(self, exprs, filename, evaluation):
def validate_read_type(name: str, typ, evaluation: Evaluation):
"""
Validate a Read option type, and give a message if
the type is invalid. For Expession[Hold]
the type is invalid. For Expession[Hold], we convert it to
SymbolHoldExpression, String names are like "Byte" are
converted to Symbols in the return.
"""
if hasattr(typ, "head") and typ.head == SymbolHold:
if not hasattr(typ, "elements"):
Expand All @@ -715,6 +718,26 @@ def validate_read_type(name: str, typ, evaluation: Evaluation):
return None

return SymbolHoldExpression

if isinstance(typ, String):
typ = Symbol(typ.value)
elif not isinstance(typ, Symbol):
evaluation.message(name, "readf", typ)
return None

if typ.short_name not in (
"Byte",
"Character",
"Expression",
"Number",
"Real",
"Record",
"String",
"Word",
):
evaluation.message(name, "readf", typ)
return None

return typ


Expand Down Expand Up @@ -791,11 +814,11 @@ class Read(Builtin):
= 5
#> Close[stream];
Reading a comment however will return the empty list:
Reading a comment, a non-expression, will return 'Hold[Null]'
>> stream = StringToStream["(* ::Package:: *)"];
>> Read[stream, Hold[Expression]]
= {}
= Hold[Null]
#> Close[stream];
Expand Down Expand Up @@ -868,14 +891,26 @@ def eval(self, stream, types, evaluation: Evaluation, options: dict):
if new_type is None:
return
checked_types.append(new_type)
check_types = tuple(checked_types)
checked_types = tuple(checked_types)
else:
new_type = validate_read_type("Read", types, evaluation)
if new_type is None:
return
checked_types = (new_type,)

return eval_Read(name, n, checked_types, stream, evaluation, options)
result = eval_Read("Read", n, checked_types, stream, evaluation, options)
if isinstance(result, list):
if isinstance(types, ListExpression):
assert len(result) == len(
types.elements
), "internal error: eval_Read() should have a return for each type"
else:
assert (
len(result) == 1
), f"internal error: eval_Read() should return at most 1 element; got {result}"
return result[0]

return from_python(result)


class ReadList(Read):
Expand Down Expand Up @@ -982,7 +1017,10 @@ class ReadList(Read):
>> InputForm[%]
= {123, abc}
"""
messages = {"opstl": "Value of option `1` should be a string or a list of strings."}
messages = {
"opstl": "Value of option `1` should be a string or a list of strings.",
"readf": "`1` is not a valid format specification.",
}
options = {
"NullRecords": "False",
"NullWords": "False",
Expand All @@ -998,17 +1036,16 @@ class ReadList(Read):
def eval(self, file, types, evaluation: Evaluation, options: dict):
"ReadList[file_, types_, OptionsPattern[ReadList]]"

py_options = parse_read_options(options)
# Options
# TODO: Implement extra options
# py_options = parse_read_options(options)
# null_records = py_options['NullRecords']
# null_words = py_options['NullWords']
# record_separators = py_options['RecordSeparators']
# token_words = py_options['TokenWords']
# word_separators = py_options['WordSeparators']

result = []
name, n, stream = read_name_and_stream(file, evaluation)

# FIXME: DRY better with Read[].
# Validate types parameter and store the
Expand All @@ -1020,30 +1057,41 @@ def eval(self, file, types, evaluation: Evaluation, options: dict):
if new_type is None:
return
checked_types.append(new_type)
check_types = tuple(checked_types)
checked_types = tuple(checked_types)
else:
new_type = validate_read_type("ReadList", types, evaluation)
if new_type is None:
return
checked_types = (new_type,)

name, n, stream = read_name_and_stream(file, evaluation)

if name is None:
return
elif name == SymbolFailed:
return SymbolFailed

while True:
tmp = eval_Read(name, n, checked_types, stream, evaluation, options)
next_elt = eval_Read(
"ReadList", n, checked_types, stream, evaluation, options
)

if tmp is None:
if next_elt is None:
return

if tmp is SymbolFailed:
if next_elt is SymbolFailed:
return

if tmp is SymbolEndOfFile:
if next_elt is SymbolEndOfFile:
break
result.append(tmp)

if isinstance(next_elt, list) and py_options["TokenWords"]:
# FIXME: This might not be correct in all cases.
# we probably need a more positive way to indicate whether next_elt
# was returned from TokenWord parsing or not.
result += next_elt
else:
result.append(next_elt)
return from_python(result)

def eval_n(self, file, types, n: Integer, evaluation: Evaluation, options: dict):
Expand Down
34 changes: 25 additions & 9 deletions mathics/eval/files_io/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,13 +128,14 @@ def eval_Read(
name: str, n: int, types: tuple, stream, evaluation: Evaluation, options: dict
):
"""
Evaluation method for Read[] and ReadList[]
Evaluation method for Read[] and ReadList[]. `name` will be either "Read" or
"ReadList" and is used in error messages
"""
types = to_mathics_list(*types)

for typ in types.elements:
if typ not in READ_TYPES:
evaluation.message("Read", "readf", typ)
evaluation.message(name, "readf", typ)
return SymbolFailed

separators = read_get_separators(options, evaluation)
Expand Down Expand Up @@ -199,7 +200,7 @@ def eval_Read(

if expr is SymbolEndOfFile:
evaluation.message(
"Read", "readt", tmp, to_expression("InputSteam", name, n)
name, "readt", tmp, to_expression("InputSteam", name, n)
)
return SymbolFailed
elif isinstance(expr, BaseElement):
Expand All @@ -219,7 +220,7 @@ def eval_Read(
tmp = float(tmp)
except ValueError:
evaluation.message(
"Read", "readn", to_expression("InputSteam", name, n)
name, "readn", to_expression("InputSteam", name, n)
)
return SymbolFailed
result.append(tmp)
Expand All @@ -231,7 +232,7 @@ def eval_Read(
tmp = float(tmp)
except ValueError:
evaluation.message(
"Read", "readn", to_expression("InputSteam", name, n)
name, "readn", to_expression("InputSteam", name, n)
)
return SymbolFailed
result.append(tmp)
Expand All @@ -243,16 +244,31 @@ def eval_Read(
raise EOFError
result.append(tmp.rstrip("\n"))
elif typ is Symbol("Word"):
result.append(next(read_word))
# next() for word tokens can return one or two words:
# the next word in the list and a following TokenWord
# match. Therefore, test for this and do list-like
# appending here.

# THINK ABOUT: We might need to reconsider/refactor
# other cases to allow for multiple words as well. And
# for uniformity, we may want to redo the generators to
# always return *lists* instead instead of either a
# word or a list (which is always at most two words?)
words = next(read_word)
if not isinstance(words, list):
words = [words]
result += words

except EOFError:
return SymbolEndOfFile
except UnicodeDecodeError:
evaluation.message("General", "ucdec")
evaluation.message(name, "ucdec")

if isinstance(result, Symbol):
return result
if len(result) == 1:
return from_python(*result)
if isinstance(result, list):
if len(result) == 0 and SymbolHoldExpression in types:
return Expression(SymbolHold, SymbolNull)
return [from_python(part) for part in result]

return from_python(result)
31 changes: 10 additions & 21 deletions mathics/eval/files_io/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,9 @@ def parse_read_options(options) -> dict:
string_quotes=False
)
assert isinstance(record_separators, list)
assert all(
isinstance(s, str) and s[0] == s[-1] == '"' for s in record_separators
)
# assert all(
# isinstance(s, str) and s[0] == s[-1] == '"' for s in record_separators
# )
record_separators = [s[1:-1] for s in record_separators]
result["RecordSeparators"] = record_separators

Expand All @@ -171,8 +171,6 @@ def parse_read_options(options) -> dict:
string_quotes=False
)
assert isinstance(word_separators, list)
assert all(isinstance(s, str) and s[0] == s[-1] == '"' for s in word_separators)
word_separators = [s[1:-1] for s in word_separators]
result["WordSeparators"] = word_separators

# NullRecords
Expand All @@ -190,7 +188,6 @@ def parse_read_options(options) -> dict:
# TokenWords
if "System`TokenWords" in keys:
token_words = options["System`TokenWords"].to_python(string_quotes=False)
assert token_words == []
result["TokenWords"] = token_words

return result
Expand Down Expand Up @@ -385,9 +382,7 @@ def read_from_stream(
else:
yield word
continue
last_word = word
word = ""
yield last_word
yield word
break

if tmp in word_separators:
Expand All @@ -396,30 +391,24 @@ def read_from_stream(
if stream.io.seekable():
stream.io.seek(stream.io.tell() - 1)
word += some_token_word_prefix
last_word = word
word = ""
some_token_word_prefix = ""
yield last_word
yield word
break

if accepted is not None and tmp not in accepted:
word += some_token_word_prefix
last_word = word
word = ""
some_token_word_prefix = ""
yield last_word
yield word
break

some_token_word_prefix += tmp
for token_word in token_words:
if token_word == some_token_word_prefix:
if word:
# Start here
last_word = word
word = ""
some_token_word_prefix = ""
yield last_word
yield token_word
yield [word, token_word]
else:
yield token_word
some_token_word_prefix = ""
break
else:
word += some_token_word_prefix
Expand Down
6 changes: 0 additions & 6 deletions test/builtin/files_io/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,12 +278,6 @@ def test_close():
# "{Read, InputStream, String, {Real}}",
# "",
# ),
(
r'stream = StringToStream["\"abc123\""];ReadList[stream, "Invalid"]//{#1[[0]],#1[[2]]}&',
("Invalid is not a valid format specification.",),
"{ReadList, Invalid}",
"",
),
("Close[stream];", None, "Null", ""),
(
'ReadList[StringToStream["a 1 b 2"], {Word, Number}, 1]',
Expand Down
38 changes: 38 additions & 0 deletions test/builtin/files_io/test_read.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# -*- coding: utf-8 -*-
"""
Unit tests from builtins/files_io/files.py
"""
from test.helper import check_evaluation, data_dir

import pytest


@pytest.mark.parametrize(
("str_expr", "msgs", "str_expected", "fail_msg"),
[
(
f'ReadList["{data_dir}/lisp1.m", '
"""Word, TokenWords->{"(", ")", "[", "]", "'"}]""",
None,
"{', (, 1, )}",
None,
),
(
r'stream = StringToStream["\"abc123\""];ReadList[stream, "Invalid"]//{#1[[0]],#1[[2]]}&',
("Invalid is not a valid format specification.",),
"{ReadList, Invalid}",
"",
),
],
)
def test_read_list(str_expr, msgs, str_expected, fail_msg):
"""ReadList[] tests."""
check_evaluation(
str_expr,
str_expected,
to_string_expr=True,
to_string_expected=True,
hold_expected=True,
failure_message=fail_msg,
expected_messages=msgs,
)
1 change: 1 addition & 0 deletions test/data/lisp1.m
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
'(1)
4 changes: 4 additions & 0 deletions test/helper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
import os.path as osp
import time
from typing import Optional

Expand All @@ -12,6 +13,9 @@
# the lowest common denominator available on all systems.
session = MathicsSession(character_encoding="ASCII")

# Set up a data path that can be used in testing
data_dir = osp.normpath(osp.join(osp.dirname(__file__), "data"))


def reset_session(add_builtin=True, catch_interrupt=False):
"""
Expand Down

0 comments on commit 38de43b

Please sign in to comment.