From 21d7638526e2b0825a8cae62159b876e57164648 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sun, 18 Feb 2024 12:43:16 -0800 Subject: [PATCH 1/4] first feature commit --- nbstripout/_nbstripout.py | 36 ++++++++++++++++--- tests/test_end_to_end.py | 74 +++++++++++++++++++++++++++++++++------ 2 files changed, 94 insertions(+), 16 deletions(-) diff --git a/nbstripout/_nbstripout.py b/nbstripout/_nbstripout.py index 77e7944..4287bfa 100644 --- a/nbstripout/_nbstripout.py +++ b/nbstripout/_nbstripout.py @@ -331,27 +331,39 @@ def status(git_config, install_location=INSTALL_LOCATION_LOCAL, verbose=False): return 1 def process_notebook(input_stream, output_stream, args, extra_keys, filename='input from stdin'): + any_change = False if args.mode == 'zeppelin': nb = json.load(input_stream, object_pairs_hook=collections.OrderedDict) + nb_str_orig = json.dumps(nb, indent=2) nb_stripped = strip_zeppelin_output(nb) + + nb_str_stripped = json.dumps(nb_stripped, indent=2) + if nb_str_orig != nb_str_stripped: + any_change = True + if args.dry_run: output_stream.write(f'Dry run: would have stripped {filename}\n') - return + return any_change if output_stream.seekable(): output_stream.seek(0) output_stream.truncate() json.dump(nb_stripped, output_stream, indent=2) output_stream.write('\n') output_stream.flush() - return + return any_change + with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) nb = nbformat.read(input_stream, as_version=nbformat.NO_CONVERT) + nb_start_str = json.dumps(nb, indent=2) nb = strip_output(nb, args.keep_output, args.keep_count, args.keep_id, extra_keys, args.drop_empty_cells, args.drop_tagged_cells.split(), args.strip_init_cells, _parse_size(args.max_size)) + nb_end_str = json.dumps(nb, indent=2) + if nb_start_str != nb_end_str: + any_change = True if args.dry_run: output_stream.write(f'Dry run: would have stripped {filename}\n') @@ -363,7 +375,7 @@ def process_notebook(input_stream, output_stream, args, extra_keys, filename='in warnings.simplefilter("ignore", category=UserWarning) nbformat.write(nb, output_stream) output_stream.flush() - + return any_change def main(): parser = ArgumentParser(epilog=__doc__, formatter_class=RawDescriptionHelpFormatter) @@ -383,6 +395,8 @@ def main(): 'repository and configuration summary if installed') task.add_argument('--version', action='store_true', help='Print version') + parser.add_argument("--verify", action="store_true", + help="Return a non-zero exit code if any files were changed, Implies --dry-run") parser.add_argument('--keep-count', action='store_true', help='Do not strip the execution count/prompt number') parser.add_argument('--keep-output', action='store_true', @@ -428,6 +442,11 @@ def main(): args = parser.parse_args() git_config = ['git', 'config'] + if args.verify: + if not args.dry_run: + print("Running in verify mode, setting --dry-run") + args.dry_run = True + if args._system: git_config.append('--system') install_location = INSTALL_LOCATION_SYSTEM @@ -483,6 +502,7 @@ def main(): input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') if sys.stdin else None output_stream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', newline='') + any_change = False for filename in args.files: if not (args.force or filename.endswith('.ipynb') or filename.endswith('.zpln')): continue @@ -490,7 +510,9 @@ def main(): try: with io.open(filename, 'r+', encoding='utf8', newline='') as f: out = output_stream if args.textconv or args.dry_run else f - process_notebook(f, out, args, extra_keys, filename) + any_local_change = process_notebook(f, out, args, extra_keys, filename) + any_change = any_change or any_local_change + except nbformat.reader.NotJSONError: print(f"No valid notebook detected in '{filename}'", file=sys.stderr) raise SystemExit(1) @@ -504,7 +526,11 @@ def main(): if not args.files and input_stream: try: - process_notebook(input_stream, output_stream, args, extra_keys) + any_local_change = process_notebook(input_stream, output_stream, args, extra_keys) + any_change = any_change or any_local_change except nbformat.reader.NotJSONError: print('No valid notebook detected on stdin', file=sys.stderr) raise SystemExit(1) + + if args.verify and any_change: + raise SystemExit(1) diff --git a/tests/test_end_to_end.py b/tests/test_end_to_end.py index 725c53d..d8430e5 100644 --- a/tests/test_end_to_end.py +++ b/tests/test_end_to_end.py @@ -1,6 +1,7 @@ import os from pathlib import Path import re +import json from subprocess import run, PIPE # Note: typing.Pattern is deprecated, for removal in 3.13 in favour of re.Pattern introduced in 3.8 from typing import List, Union, Pattern @@ -53,49 +54,100 @@ def nbstripout_exe(): @pytest.mark.parametrize("input_file, expected_file, args", TEST_CASES) -def test_end_to_end_stdin(input_file: str, expected_file: str, args: List[str]): +@pytest.mark.parametrize("verify", (True, False)) +def test_end_to_end_stdin(input_file: str, expected_file: str, args: List[str], verify: bool): with open(NOTEBOOKS_FOLDER / expected_file, mode="r") as f: expected = f.read() + expected_str = json.dumps(json.loads(expected), indent=2) with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f: - pc = run([nbstripout_exe()] + args, stdin=f, stdout=PIPE, universal_newlines=True) + input_str = json.dumps(json.loads(f.read()), indent=2) + + with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f: + args = [nbstripout_exe()] + args + if verify: + args.append("--verify") + pc = run(args, stdin=f, stdout=PIPE, universal_newlines=True) output = pc.stdout - assert output == expected + if verify: + # When using stin, the dry flag is disregarded. + if input_str != expected_str: + assert pc.returncode == 1 + else: + assert pc.returncode == 0 + else: + assert output == expected + assert pc.returncode == 0 @pytest.mark.parametrize("input_file, expected_file, args", TEST_CASES) -def test_end_to_end_file(input_file: str, expected_file: str, args: List[str], tmp_path): +@pytest.mark.parametrize("verify", (True, False)) +def test_end_to_end_file(input_file: str, expected_file: str, args: List[str], tmp_path, verify: bool): with open(NOTEBOOKS_FOLDER / expected_file, mode="r") as f: expected = f.read() + expected_str = json.dumps(json.loads(expected), indent=2) p = tmp_path / input_file with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f: p.write_text(f.read()) - pc = run([nbstripout_exe(), p] + args, stdout=PIPE, universal_newlines=True) - assert not pc.stdout and p.read_text() == expected + with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f: + input_str = json.dumps(json.loads(f.read()), indent=2) + + args = [nbstripout_exe(), p] + args + if verify: + args.append("--verify") + pc = run(args, stdout=PIPE, universal_newlines=True) + + output = pc.stdout.strip() + if verify: + if expected_str != input_str.strip(): + assert pc.returncode == 1 + + # Since verify implies --dry-run, we make sure the file is not modified + # In other words, that the output == input, INSTEAD of output == expected + output.strip() == input_str.strip() + else: + output_file_str = json.dumps(json.loads(p.read_text()), indent=2) + assert pc.returncode == 0 + assert output_file_str == expected_str @pytest.mark.parametrize("input_file, extra_args", DRY_RUN_CASES) -def test_dry_run_stdin(input_file: str, extra_args: List[str]): +@pytest.mark.parametrize("verify", (True, False)) +def test_dry_run_stdin(input_file: str, extra_args: List[str], verify: bool): expected = "Dry run: would have stripped input from stdin\n" with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f: - pc = run([nbstripout_exe(), "--dry-run"] + extra_args, stdin=f, stdout=PIPE, universal_newlines=True) + args = [nbstripout_exe(), "--dry-run"] + extra_args + if verify: + args.append("--verify") + pc = run(args, stdin=f, stdout=PIPE, universal_newlines=True) output = pc.stdout + exit_code = pc.returncode assert output == expected + if verify: + assert exit_code == 1 + else: + assert exit_code == 0 @pytest.mark.parametrize("input_file, extra_args", DRY_RUN_CASES) -def test_dry_run_args(input_file: str, extra_args: List[str]): +@pytest.mark.parametrize("verify", (True, False)) +def test_dry_run_args(input_file: str, extra_args: List[str], verify: bool): expected_regex = re.compile(f"Dry run: would have stripped .*[/\\\\]{input_file}\n") - - pc = run([nbstripout_exe(), str(NOTEBOOKS_FOLDER / input_file), "--dry-run", ] + extra_args, stdout=PIPE, universal_newlines=True) + args = [nbstripout_exe(), str(NOTEBOOKS_FOLDER / input_file), "--dry-run", ] + extra_args + if verify: + args.append("--verify") + pc = run(args, stdout=PIPE, universal_newlines=True) output = pc.stdout + exit_code = pc.returncode assert expected_regex.match(output) + if verify: + assert exit_code == 1 @pytest.mark.parametrize("input_file, expected_errs, extra_args", ERR_OUTPUT_CASES) From e6ca67558b5e977ca0ed16390f3b9ec852708878 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Mon, 18 Mar 2024 02:28:44 -0700 Subject: [PATCH 2/4] Update nbstripout/_nbstripout.py Co-authored-by: Florian Rathgeber --- nbstripout/_nbstripout.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nbstripout/_nbstripout.py b/nbstripout/_nbstripout.py index 4287bfa..f751b60 100644 --- a/nbstripout/_nbstripout.py +++ b/nbstripout/_nbstripout.py @@ -396,7 +396,7 @@ def main(): task.add_argument('--version', action='store_true', help='Print version') parser.add_argument("--verify", action="store_true", - help="Return a non-zero exit code if any files were changed, Implies --dry-run") + help="Return a non-zero exit code if any files were changed, Implies --dry-run") parser.add_argument('--keep-count', action='store_true', help='Do not strip the execution count/prompt number') parser.add_argument('--keep-output', action='store_true', From 5eb68f09283ba9e8d72e4d3ac385a5375de3b715 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Mon, 18 Mar 2024 02:29:42 -0700 Subject: [PATCH 3/4] Update nbstripout/_nbstripout.py Co-authored-by: Florian Rathgeber --- nbstripout/_nbstripout.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/nbstripout/_nbstripout.py b/nbstripout/_nbstripout.py index f751b60..075c3ee 100644 --- a/nbstripout/_nbstripout.py +++ b/nbstripout/_nbstripout.py @@ -442,10 +442,8 @@ def main(): args = parser.parse_args() git_config = ['git', 'config'] - if args.verify: - if not args.dry_run: - print("Running in verify mode, setting --dry-run") - args.dry_run = True + if args.verify and not args.dry_run: + args.dry_run = True if args._system: git_config.append('--system') From 9fb0d0fa0ae0e73f1ba9e185526aed0470d8049a Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Mon, 18 Mar 2024 02:43:02 -0700 Subject: [PATCH 4/4] Update nbstripout/_nbstripout.py Co-authored-by: Florian Rathgeber --- nbstripout/_nbstripout.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nbstripout/_nbstripout.py b/nbstripout/_nbstripout.py index 075c3ee..04abaa8 100644 --- a/nbstripout/_nbstripout.py +++ b/nbstripout/_nbstripout.py @@ -508,8 +508,8 @@ def main(): try: with io.open(filename, 'r+', encoding='utf8', newline='') as f: out = output_stream if args.textconv or args.dry_run else f - any_local_change = process_notebook(f, out, args, extra_keys, filename) - any_change = any_change or any_local_change + if process_notebook(f, out, args, extra_keys, filename): + any_change = True except nbformat.reader.NotJSONError: print(f"No valid notebook detected in '{filename}'", file=sys.stderr)