Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Read{,List}[] with TokenWord and improve ... #1104

Merged
merged 1 commit into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 62 additions & 14 deletions mathics/builtin/files_io/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
MathicsOpen,
channel_to_stream,
close_stream,
parse_read_options,
read_name_and_stream,
)
from mathics.eval.makeboxes import do_format, format_element
Expand Down Expand Up @@ -703,7 +704,9 @@ def eval_default(self, exprs, filename, evaluation):
def validate_read_type(name: str, typ, evaluation: Evaluation):
"""
Validate a Read option type, and give a message if
the type is invalid. For Expession[Hold]
the type is invalid. For Expession[Hold], we convert it to
SymbolHoldExpression, String names are like "Byte" are
converted to Symbols in the return.
"""
if hasattr(typ, "head") and typ.head == SymbolHold:
if not hasattr(typ, "elements"):
Expand All @@ -715,6 +718,26 @@ def validate_read_type(name: str, typ, evaluation: Evaluation):
return None

return SymbolHoldExpression

if isinstance(typ, String):
typ = Symbol(typ.value)
elif not isinstance(typ, Symbol):
evaluation.message(name, "readf", typ)
return None

if typ.short_name not in (
"Byte",
"Character",
"Expression",
"Number",
"Real",
"Record",
"String",
"Word",
):
evaluation.message(name, "readf", typ)
return None

return typ


Expand Down Expand Up @@ -791,11 +814,11 @@ class Read(Builtin):
= 5
#> Close[stream];

Reading a comment however will return the empty list:
Reading a comment, a non-expression, will return 'Hold[Null]'
>> stream = StringToStream["(* ::Package:: *)"];

>> Read[stream, Hold[Expression]]
= {}
= Hold[Null]

#> Close[stream];

Expand Down Expand Up @@ -868,14 +891,26 @@ def eval(self, stream, types, evaluation: Evaluation, options: dict):
if new_type is None:
return
checked_types.append(new_type)
check_types = tuple(checked_types)
checked_types = tuple(checked_types)
else:
new_type = validate_read_type("Read", types, evaluation)
if new_type is None:
return
checked_types = (new_type,)

return eval_Read(name, n, checked_types, stream, evaluation, options)
result = eval_Read("Read", n, checked_types, stream, evaluation, options)
if isinstance(result, list):
if isinstance(types, ListExpression):
assert len(result) == len(
types.elements
), "internal error: eval_Read() should have a return for each type"
else:
assert (
len(result) == 1
), f"internal error: eval_Read() should return at most 1 element; got {result}"
return result[0]

return from_python(result)


class ReadList(Read):
Expand Down Expand Up @@ -982,7 +1017,10 @@ class ReadList(Read):
>> InputForm[%]
= {123, abc}
"""
messages = {"opstl": "Value of option `1` should be a string or a list of strings."}
messages = {
"opstl": "Value of option `1` should be a string or a list of strings.",
"readf": "`1` is not a valid format specification.",
}
options = {
"NullRecords": "False",
"NullWords": "False",
Expand All @@ -998,17 +1036,16 @@ class ReadList(Read):
def eval(self, file, types, evaluation: Evaluation, options: dict):
"ReadList[file_, types_, OptionsPattern[ReadList]]"

py_options = parse_read_options(options)
# Options
# TODO: Implement extra options
# py_options = parse_read_options(options)
# null_records = py_options['NullRecords']
# null_words = py_options['NullWords']
# record_separators = py_options['RecordSeparators']
# token_words = py_options['TokenWords']
# word_separators = py_options['WordSeparators']

result = []
name, n, stream = read_name_and_stream(file, evaluation)

# FIXME: DRY better with Read[].
# Validate types parameter and store the
Expand All @@ -1020,30 +1057,41 @@ def eval(self, file, types, evaluation: Evaluation, options: dict):
if new_type is None:
return
checked_types.append(new_type)
check_types = tuple(checked_types)
checked_types = tuple(checked_types)
else:
new_type = validate_read_type("ReadList", types, evaluation)
if new_type is None:
return
checked_types = (new_type,)

name, n, stream = read_name_and_stream(file, evaluation)

if name is None:
return
elif name == SymbolFailed:
return SymbolFailed

while True:
tmp = eval_Read(name, n, checked_types, stream, evaluation, options)
next_elt = eval_Read(
"ReadList", n, checked_types, stream, evaluation, options
)

if tmp is None:
if next_elt is None:
return

if tmp is SymbolFailed:
if next_elt is SymbolFailed:
return

if tmp is SymbolEndOfFile:
if next_elt is SymbolEndOfFile:
break
result.append(tmp)

if isinstance(next_elt, list) and py_options["TokenWords"]:
# FIXME: This might not be correct in all cases.
# we probably need a more positive way to indicate whether next_elt
# was returned from TokenWord parsing or not.
result += next_elt
else:
result.append(next_elt)
return from_python(result)

def eval_n(self, file, types, n: Integer, evaluation: Evaluation, options: dict):
Expand Down
1 change: 1 addition & 0 deletions mathics/core/systemsymbols.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@
SymbolVariance = Symbol("System`Variance")
SymbolWhitespace = Symbol("System`Whitespace")
SymbolWhitespaceCharacter = Symbol("System`WhitespaceCharacter")
SymbolWord = Symbol("System`Word")
SymbolWordBoundary = Symbol("System`WordBoundary")
SymbolWordCharacter = Symbol("System`WordCharacter")
SymbolXor = Symbol("System`Xor")
42 changes: 32 additions & 10 deletions mathics/eval/files_io/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
SymbolHoldExpression,
SymbolPath,
SymbolReal,
SymbolWord,
)
from mathics.core.util import canonic_filename
from mathics.eval.files_io.read import (
Expand Down Expand Up @@ -128,13 +129,14 @@ def eval_Read(
name: str, n: int, types: tuple, stream, evaluation: Evaluation, options: dict
):
"""
Evaluation method for Read[] and ReadList[]
Evaluation method for Read[] and ReadList[]. `name` will be either "Read" or
"ReadList" and is used in error messages
"""
types = to_mathics_list(*types)

for typ in types.elements:
if typ not in READ_TYPES:
evaluation.message("Read", "readf", typ)
evaluation.message(name, "readf", typ)
return SymbolFailed

separators = read_get_separators(options, evaluation)
Expand Down Expand Up @@ -199,7 +201,7 @@ def eval_Read(

if expr is SymbolEndOfFile:
evaluation.message(
"Read", "readt", tmp, to_expression("InputSteam", name, n)
name, "readt", tmp, to_expression("InputSteam", name, n)
)
return SymbolFailed
elif isinstance(expr, BaseElement):
Expand All @@ -219,7 +221,7 @@ def eval_Read(
tmp = float(tmp)
except ValueError:
evaluation.message(
"Read", "readn", to_expression("InputSteam", name, n)
name, "readn", to_expression("InputSteam", name, n)
)
return SymbolFailed
result.append(tmp)
Expand All @@ -231,7 +233,7 @@ def eval_Read(
tmp = float(tmp)
except ValueError:
evaluation.message(
"Read", "readn", to_expression("InputSteam", name, n)
name, "readn", to_expression("InputSteam", name, n)
)
return SymbolFailed
result.append(tmp)
Expand All @@ -242,17 +244,37 @@ def eval_Read(
if len(tmp) == 0:
raise EOFError
result.append(tmp.rstrip("\n"))
elif typ is Symbol("Word"):
result.append(next(read_word))
elif typ is SymbolWord:
# next() for word tokens can return one or two words:
# the next word in the list and a following TokenWord
# match. Therefore, test for this and do list-like
# appending here.

# THINK ABOUT: We might need to reconsider/refactor
# other cases to allow for multiple words as well. And
# for uniformity, we may want to redo the generators to
# always return *lists* instead instead of either a
# word or a list (which is always at most two words?)
words = next(read_word)
if not isinstance(words, list):
words = [words]
result += words

except EOFError:
return SymbolEndOfFile
except UnicodeDecodeError:
evaluation.message("General", "ucdec")
evaluation.message(name, "ucdec")

if isinstance(result, Symbol):
return result
if len(result) == 1:
return from_python(*result)
if isinstance(result, list):
result_len = len(result)
if result_len == 0:
if SymbolHoldExpression in types:
return Expression(SymbolHold, SymbolNull)
elif result_len == 2 and SymbolWord in types:
return [from_python(part) for part in result]
elif result_len == 1:
result = result[0]

return from_python(result)
31 changes: 10 additions & 21 deletions mathics/eval/files_io/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,9 @@ def parse_read_options(options) -> dict:
string_quotes=False
)
assert isinstance(record_separators, list)
assert all(
isinstance(s, str) and s[0] == s[-1] == '"' for s in record_separators
)
# assert all(
# isinstance(s, str) and s[0] == s[-1] == '"' for s in record_separators
# )
record_separators = [s[1:-1] for s in record_separators]
result["RecordSeparators"] = record_separators

Expand All @@ -171,8 +171,6 @@ def parse_read_options(options) -> dict:
string_quotes=False
)
assert isinstance(word_separators, list)
assert all(isinstance(s, str) and s[0] == s[-1] == '"' for s in word_separators)
word_separators = [s[1:-1] for s in word_separators]
result["WordSeparators"] = word_separators

# NullRecords
Expand All @@ -190,7 +188,6 @@ def parse_read_options(options) -> dict:
# TokenWords
if "System`TokenWords" in keys:
token_words = options["System`TokenWords"].to_python(string_quotes=False)
assert token_words == []
result["TokenWords"] = token_words

return result
Expand Down Expand Up @@ -385,9 +382,7 @@ def read_from_stream(
else:
yield word
continue
last_word = word
word = ""
yield last_word
yield word
break

if tmp in word_separators:
Expand All @@ -396,30 +391,24 @@ def read_from_stream(
if stream.io.seekable():
stream.io.seek(stream.io.tell() - 1)
word += some_token_word_prefix
last_word = word
word = ""
some_token_word_prefix = ""
yield last_word
yield word
break

if accepted is not None and tmp not in accepted:
word += some_token_word_prefix
last_word = word
word = ""
some_token_word_prefix = ""
yield last_word
yield word
break

some_token_word_prefix += tmp
for token_word in token_words:
if token_word == some_token_word_prefix:
if word:
# Start here
last_word = word
word = ""
some_token_word_prefix = ""
yield last_word
yield token_word
yield [word, token_word]
else:
yield token_word
some_token_word_prefix = ""
break
else:
word += some_token_word_prefix
Expand Down
6 changes: 0 additions & 6 deletions test/builtin/files_io/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,12 +278,6 @@ def test_close():
# "{Read, InputStream, String, {Real}}",
# "",
# ),
(
r'stream = StringToStream["\"abc123\""];ReadList[stream, "Invalid"]//{#1[[0]],#1[[2]]}&',
("Invalid is not a valid format specification.",),
"{ReadList, Invalid}",
"",
),
("Close[stream];", None, "Null", ""),
(
'ReadList[StringToStream["a 1 b 2"], {Word, Number}, 1]',
Expand Down
Loading