From 6cd5f7ee49e15e6d5bba536b77e771a309a72d4d Mon Sep 17 00:00:00 2001 From: Andrey Kislyuk Date: Sun, 27 Aug 2017 12:40:47 -0700 Subject: [PATCH] Handle multi-document streams. Fixes #6 --- README.rst | 6 +++--- test/test.py | 1 + yq/__init__.py | 20 ++++++++++++++------ 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/README.rst b/README.rst index f2adee9..cf362c8 100644 --- a/README.rst +++ b/README.rst @@ -13,8 +13,8 @@ Before using ``yq``, you also have to install its dependency, ``jq``. See the `j Synopsis -------- -``yq``'s mode of operation is simple: it transcodes YAML on standard input to JSON (using ``yaml.safe_load`` to avoid -dangerous vulnerabilities in YAML/PyYAML design) and pipes it to ``jq``:: +``yq``'s mode of operation is simple: it transcodes YAML on standard input to JSON (using the key-order-preserving +equivalent of ``yaml.safe_load_all`` to avoid dangerous vulnerabilities in YAML/PyYAML design) and pipes it to ``jq``:: cat input.yml | yq .foo.bar @@ -23,7 +23,7 @@ Or specify the filename directly:: yq .foo.bar input.yml By default, no transcoding of ``jq`` output is done. Specify the ``--yaml-output``/``-y`` option to transcode it back -into YAML (using ``yaml.safe_dump``):: +into YAML (using the key-order-preserving equivalent of ``yaml.safe_dump_all``):: cat input.yml | yq -y .foo.bar diff --git a/test/test.py b/test/test.py index 634761b..99586a8 100755 --- a/test/test.py +++ b/test/test.py @@ -36,6 +36,7 @@ def test_yq(self): self.assertEqual(self.run_yq('{"понедельник": 1}', ['.["понедельник"]']), "") self.assertEqual(self.run_yq('{"понедельник": 1}', ["-y", '.["понедельник"]']), "1\n...\n") self.assertEqual(self.run_yq("- понедельник\n- вторник\n", ["-y", "."]), "- понедельник\n- вторник\n") + self.assertEqual(self.run_yq("---\na: b\n---\nc: d", ["-y", "."]), "a: b\n---\nc: d\n") def test_yq_err(self): err = 'yq: Error running jq: ScannerError: while scanning for the next token\nfound character \'%\' that cannot start any token\n in "", line 1, column 3.' diff --git a/yq/__init__.py b/yq/__init__.py index bd2aa8e..e773e9c 100755 --- a/yq/__init__.py +++ b/yq/__init__.py @@ -41,6 +41,12 @@ def construct_mapping(loader, node): def represent_dict_order(dumper, data): return dumper.represent_mapping("tag:yaml.org,2002:map", data.items()) +def decode_docs(jq_output, json_decoder): + while jq_output: + doc, pos = json_decoder.raw_decode(jq_output) + jq_output = jq_output[pos+1:] + yield doc + OrderedLoader.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, construct_mapping) OrderedDumper.add_representer(OrderedDict, represent_dict_order) @@ -69,14 +75,16 @@ def main(args=None): try: input_stream = args.file[0] if args.file else sys.stdin + input_docs = yaml.load_all(input_stream, Loader=OrderedLoader) if args.yaml_output: - input_payload = yaml.load(input_stream, Loader=OrderedLoader) - out, err = jq.communicate(json.dumps(input_payload, cls=JSONDateTimeEncoder)) - out = json.loads(out, object_pairs_hook=OrderedDict) - yaml.dump(out, stream=sys.stdout, Dumper=OrderedDumper, width=args.width, - allow_unicode=True, default_flow_style=False) + input_payload = "\n".join(json.dumps(doc, cls=JSONDateTimeEncoder) for doc in input_docs) + jq_out, jq_err = jq.communicate(input_payload) + json_decoder = json.JSONDecoder(object_pairs_hook=OrderedDict) + yaml.dump_all(decode_docs(jq_out, json_decoder), stream=sys.stdout, Dumper=OrderedDumper, width=args.width, + allow_unicode=True, default_flow_style=False) else: - json.dump(yaml.load(input_stream, Loader=OrderedLoader), jq.stdin, cls=JSONDateTimeEncoder) + for doc in input_docs: + json.dump(doc, jq.stdin, cls=JSONDateTimeEncoder) jq.stdin.close() jq.wait() input_stream.close()