diff --git a/mathics/builtin/files_io/files.py b/mathics/builtin/files_io/files.py index 4bc33fbb2..3248ba826 100644 --- a/mathics/builtin/files_io/files.py +++ b/mathics/builtin/files_io/files.py @@ -45,6 +45,7 @@ MathicsOpen, channel_to_stream, close_stream, + parse_read_options, read_name_and_stream, ) from mathics.eval.makeboxes import do_format, format_element @@ -703,7 +704,9 @@ def eval_default(self, exprs, filename, evaluation): def validate_read_type(name: str, typ, evaluation: Evaluation): """ Validate a Read option type, and give a message if - the type is invalid. For Expession[Hold] + the type is invalid. For Expession[Hold], we convert it to + SymbolHoldExpression, String names are like "Byte" are + converted to Symbols in the return. """ if hasattr(typ, "head") and typ.head == SymbolHold: if not hasattr(typ, "elements"): @@ -715,6 +718,26 @@ def validate_read_type(name: str, typ, evaluation: Evaluation): return None return SymbolHoldExpression + + if isinstance(typ, String): + typ = Symbol(typ.value) + elif not isinstance(typ, Symbol): + evaluation.message(name, "readf", typ) + return None + + if typ.short_name not in ( + "Byte", + "Character", + "Expression", + "Number", + "Real", + "Record", + "String", + "Word", + ): + evaluation.message(name, "readf", typ) + return None + return typ @@ -791,11 +814,11 @@ class Read(Builtin): = 5 #> Close[stream]; - Reading a comment however will return the empty list: + Reading a comment, a non-expression, will return 'Hold[Null]' >> stream = StringToStream["(* ::Package:: *)"]; >> Read[stream, Hold[Expression]] - = {} + = Hold[Null] #> Close[stream]; @@ -868,14 +891,26 @@ def eval(self, stream, types, evaluation: Evaluation, options: dict): if new_type is None: return checked_types.append(new_type) - check_types = tuple(checked_types) + checked_types = tuple(checked_types) else: new_type = validate_read_type("Read", types, evaluation) if new_type is None: return checked_types = (new_type,) - return eval_Read(name, n, checked_types, stream, evaluation, options) + result = eval_Read("Read", n, checked_types, stream, evaluation, options) + if isinstance(result, list): + if isinstance(types, ListExpression): + assert len(result) == len( + types.elements + ), "internal error: eval_Read() should have a return for each type" + else: + assert ( + len(result) == 1 + ), f"internal error: eval_Read() should return at most 1 element; got {result}" + return result[0] + + return from_python(result) class ReadList(Read): @@ -982,7 +1017,10 @@ class ReadList(Read): >> InputForm[%] = {123, abc} """ - messages = {"opstl": "Value of option `1` should be a string or a list of strings."} + messages = { + "opstl": "Value of option `1` should be a string or a list of strings.", + "readf": "`1` is not a valid format specification.", + } options = { "NullRecords": "False", "NullWords": "False", @@ -998,9 +1036,9 @@ class ReadList(Read): def eval(self, file, types, evaluation: Evaluation, options: dict): "ReadList[file_, types_, OptionsPattern[ReadList]]" + py_options = parse_read_options(options) # Options # TODO: Implement extra options - # py_options = parse_read_options(options) # null_records = py_options['NullRecords'] # null_words = py_options['NullWords'] # record_separators = py_options['RecordSeparators'] @@ -1008,7 +1046,6 @@ def eval(self, file, types, evaluation: Evaluation, options: dict): # word_separators = py_options['WordSeparators'] result = [] - name, n, stream = read_name_and_stream(file, evaluation) # FIXME: DRY better with Read[]. # Validate types parameter and store the @@ -1020,30 +1057,41 @@ def eval(self, file, types, evaluation: Evaluation, options: dict): if new_type is None: return checked_types.append(new_type) - check_types = tuple(checked_types) + checked_types = tuple(checked_types) else: new_type = validate_read_type("ReadList", types, evaluation) if new_type is None: return checked_types = (new_type,) + name, n, stream = read_name_and_stream(file, evaluation) + if name is None: return elif name == SymbolFailed: return SymbolFailed while True: - tmp = eval_Read(name, n, checked_types, stream, evaluation, options) + next_elt = eval_Read( + "ReadList", n, checked_types, stream, evaluation, options + ) - if tmp is None: + if next_elt is None: return - if tmp is SymbolFailed: + if next_elt is SymbolFailed: return - if tmp is SymbolEndOfFile: + if next_elt is SymbolEndOfFile: break - result.append(tmp) + + if isinstance(next_elt, list) and py_options["TokenWords"]: + # FIXME: This might not be correct in all cases. + # we probably need a more positive way to indicate whether next_elt + # was returned from TokenWord parsing or not. + result += next_elt + else: + result.append(next_elt) return from_python(result) def eval_n(self, file, types, n: Integer, evaluation: Evaluation, options: dict): diff --git a/mathics/core/systemsymbols.py b/mathics/core/systemsymbols.py index 30a6eed06..c4349bf22 100644 --- a/mathics/core/systemsymbols.py +++ b/mathics/core/systemsymbols.py @@ -273,6 +273,7 @@ SymbolVariance = Symbol("System`Variance") SymbolWhitespace = Symbol("System`Whitespace") SymbolWhitespaceCharacter = Symbol("System`WhitespaceCharacter") +SymbolWord = Symbol("System`Word") SymbolWordBoundary = Symbol("System`WordBoundary") SymbolWordCharacter = Symbol("System`WordCharacter") SymbolXor = Symbol("System`Xor") diff --git a/mathics/eval/files_io/files.py b/mathics/eval/files_io/files.py index 4fc901c79..4aeb19ab2 100644 --- a/mathics/eval/files_io/files.py +++ b/mathics/eval/files_io/files.py @@ -26,6 +26,7 @@ SymbolHoldExpression, SymbolPath, SymbolReal, + SymbolWord, ) from mathics.core.util import canonic_filename from mathics.eval.files_io.read import ( @@ -128,13 +129,14 @@ def eval_Read( name: str, n: int, types: tuple, stream, evaluation: Evaluation, options: dict ): """ - Evaluation method for Read[] and ReadList[] + Evaluation method for Read[] and ReadList[]. `name` will be either "Read" or + "ReadList" and is used in error messages """ types = to_mathics_list(*types) for typ in types.elements: if typ not in READ_TYPES: - evaluation.message("Read", "readf", typ) + evaluation.message(name, "readf", typ) return SymbolFailed separators = read_get_separators(options, evaluation) @@ -199,7 +201,7 @@ def eval_Read( if expr is SymbolEndOfFile: evaluation.message( - "Read", "readt", tmp, to_expression("InputSteam", name, n) + name, "readt", tmp, to_expression("InputSteam", name, n) ) return SymbolFailed elif isinstance(expr, BaseElement): @@ -219,7 +221,7 @@ def eval_Read( tmp = float(tmp) except ValueError: evaluation.message( - "Read", "readn", to_expression("InputSteam", name, n) + name, "readn", to_expression("InputSteam", name, n) ) return SymbolFailed result.append(tmp) @@ -231,7 +233,7 @@ def eval_Read( tmp = float(tmp) except ValueError: evaluation.message( - "Read", "readn", to_expression("InputSteam", name, n) + name, "readn", to_expression("InputSteam", name, n) ) return SymbolFailed result.append(tmp) @@ -242,17 +244,37 @@ def eval_Read( if len(tmp) == 0: raise EOFError result.append(tmp.rstrip("\n")) - elif typ is Symbol("Word"): - result.append(next(read_word)) + elif typ is SymbolWord: + # next() for word tokens can return one or two words: + # the next word in the list and a following TokenWord + # match. Therefore, test for this and do list-like + # appending here. + + # THINK ABOUT: We might need to reconsider/refactor + # other cases to allow for multiple words as well. And + # for uniformity, we may want to redo the generators to + # always return *lists* instead instead of either a + # word or a list (which is always at most two words?) + words = next(read_word) + if not isinstance(words, list): + words = [words] + result += words except EOFError: return SymbolEndOfFile except UnicodeDecodeError: - evaluation.message("General", "ucdec") + evaluation.message(name, "ucdec") if isinstance(result, Symbol): return result - if len(result) == 1: - return from_python(*result) + if isinstance(result, list): + result_len = len(result) + if result_len == 0: + if SymbolHoldExpression in types: + return Expression(SymbolHold, SymbolNull) + elif result_len == 2 and SymbolWord in types: + return [from_python(part) for part in result] + elif result_len == 1: + result = result[0] return from_python(result) diff --git a/mathics/eval/files_io/read.py b/mathics/eval/files_io/read.py index 64178225b..4a75756a9 100644 --- a/mathics/eval/files_io/read.py +++ b/mathics/eval/files_io/read.py @@ -159,9 +159,9 @@ def parse_read_options(options) -> dict: string_quotes=False ) assert isinstance(record_separators, list) - assert all( - isinstance(s, str) and s[0] == s[-1] == '"' for s in record_separators - ) + # assert all( + # isinstance(s, str) and s[0] == s[-1] == '"' for s in record_separators + # ) record_separators = [s[1:-1] for s in record_separators] result["RecordSeparators"] = record_separators @@ -171,8 +171,6 @@ def parse_read_options(options) -> dict: string_quotes=False ) assert isinstance(word_separators, list) - assert all(isinstance(s, str) and s[0] == s[-1] == '"' for s in word_separators) - word_separators = [s[1:-1] for s in word_separators] result["WordSeparators"] = word_separators # NullRecords @@ -190,7 +188,6 @@ def parse_read_options(options) -> dict: # TokenWords if "System`TokenWords" in keys: token_words = options["System`TokenWords"].to_python(string_quotes=False) - assert token_words == [] result["TokenWords"] = token_words return result @@ -385,9 +382,7 @@ def read_from_stream( else: yield word continue - last_word = word - word = "" - yield last_word + yield word break if tmp in word_separators: @@ -396,30 +391,24 @@ def read_from_stream( if stream.io.seekable(): stream.io.seek(stream.io.tell() - 1) word += some_token_word_prefix - last_word = word - word = "" some_token_word_prefix = "" - yield last_word + yield word break if accepted is not None and tmp not in accepted: word += some_token_word_prefix - last_word = word - word = "" some_token_word_prefix = "" - yield last_word + yield word break some_token_word_prefix += tmp for token_word in token_words: if token_word == some_token_word_prefix: if word: - # Start here - last_word = word - word = "" - some_token_word_prefix = "" - yield last_word - yield token_word + yield [word, token_word] + else: + yield token_word + some_token_word_prefix = "" break else: word += some_token_word_prefix diff --git a/test/builtin/files_io/test_files.py b/test/builtin/files_io/test_files.py index d339adb05..7042a728b 100644 --- a/test/builtin/files_io/test_files.py +++ b/test/builtin/files_io/test_files.py @@ -278,12 +278,6 @@ def test_close(): # "{Read, InputStream, String, {Real}}", # "", # ), - ( - r'stream = StringToStream["\"abc123\""];ReadList[stream, "Invalid"]//{#1[[0]],#1[[2]]}&', - ("Invalid is not a valid format specification.",), - "{ReadList, Invalid}", - "", - ), ("Close[stream];", None, "Null", ""), ( 'ReadList[StringToStream["a 1 b 2"], {Word, Number}, 1]', diff --git a/test/builtin/files_io/test_read.py b/test/builtin/files_io/test_read.py new file mode 100644 index 000000000..3b529b515 --- /dev/null +++ b/test/builtin/files_io/test_read.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- +""" +Unit tests from builtins/files_io/files.py +""" +from test.helper import check_evaluation, data_dir + +import pytest + + +@pytest.mark.parametrize( + ("str_expr", "msgs", "str_expected", "fail_msg"), + [ + ( + f'ReadList["{data_dir}/lisp1.m", ' + """Word, TokenWords->{"(", ")", "[", "]", "'"}]""", + None, + "{', (, 1, )}", + None, + ), + ( + r'stream = StringToStream["\"abc123\""];ReadList[stream, "Invalid"]//{#1[[0]],#1[[2]]}&', + ("Invalid is not a valid format specification.",), + "{ReadList, Invalid}", + "", + ), + ], +) +def test_read_list(str_expr, msgs, str_expected, fail_msg): + """ReadList[] tests.""" + check_evaluation( + str_expr, + str_expected, + to_string_expr=True, + to_string_expected=True, + hold_expected=True, + failure_message=fail_msg, + expected_messages=msgs, + ) diff --git a/test/data/lisp1.m b/test/data/lisp1.m new file mode 100644 index 000000000..46ccdd81c --- /dev/null +++ b/test/data/lisp1.m @@ -0,0 +1 @@ +'(1) diff --git a/test/helper.py b/test/helper.py index e42a844f6..b428f880d 100644 --- a/test/helper.py +++ b/test/helper.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import os.path as osp import time from typing import Optional @@ -12,6 +13,9 @@ # the lowest common denominator available on all systems. session = MathicsSession(character_encoding="ASCII") +# Set up a data path that can be used in testing +data_dir = osp.normpath(osp.join(osp.dirname(__file__), "data")) + def reset_session(add_builtin=True, catch_interrupt=False): """