kynan · jspaezp · Feb 18, 2024 · Mar 18, 2024 · Mar 18, 2024 · Mar 18, 2024
diff --git a/nbstripout/_nbstripout.py b/nbstripout/_nbstripout.py
@@ -331,27 +331,39 @@ def status(git_config, install_location=INSTALL_LOCATION_LOCAL, verbose=False):
         return 1
 
 def process_notebook(input_stream, output_stream, args, extra_keys, filename='input from stdin'):
+    any_change = False
     if args.mode == 'zeppelin':
         nb = json.load(input_stream, object_pairs_hook=collections.OrderedDict)
+        nb_str_orig = json.dumps(nb, indent=2)
         nb_stripped = strip_zeppelin_output(nb)
+
+        nb_str_stripped = json.dumps(nb_stripped, indent=2)
+        if nb_str_orig != nb_str_stripped:
+            any_change = True
+
         if args.dry_run:
             output_stream.write(f'Dry run: would have stripped {filename}\n')
-            return
+            return any_change
         if output_stream.seekable():
             output_stream.seek(0)
             output_stream.truncate()
         json.dump(nb_stripped, output_stream, indent=2)
         output_stream.write('\n')
         output_stream.flush()
-        return
+        return any_change
+
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", category=UserWarning)
         nb = nbformat.read(input_stream, as_version=nbformat.NO_CONVERT)
 
+    nb_start_str = json.dumps(nb, indent=2)
     nb = strip_output(nb, args.keep_output, args.keep_count, args.keep_id,
                         extra_keys, args.drop_empty_cells,
                         args.drop_tagged_cells.split(), args.strip_init_cells,
                         _parse_size(args.max_size))
+    nb_end_str = json.dumps(nb, indent=2)
+    if nb_start_str != nb_end_str:
+        any_change = True
 
     if args.dry_run:
         output_stream.write(f'Dry run: would have stripped {filename}\n')
@@ -363,7 +375,7 @@ def process_notebook(input_stream, output_stream, args, extra_keys, filename='in
             warnings.simplefilter("ignore", category=UserWarning)
             nbformat.write(nb, output_stream)
         output_stream.flush()
-
+    return any_change
 
 def main():
     parser = ArgumentParser(epilog=__doc__, formatter_class=RawDescriptionHelpFormatter)
@@ -383,6 +395,8 @@ def main():
                       'repository and configuration summary if installed')
     task.add_argument('--version', action='store_true',
                       help='Print version')
+    parser.add_argument("--verify", action="store_true",
+                        help="Return a non-zero exit code if any files were changed, Implies --dry-run")
     parser.add_argument('--keep-count', action='store_true',
                         help='Do not strip the execution count/prompt number')
     parser.add_argument('--keep-output', action='store_true',
@@ -428,6 +442,9 @@ def main():
     args = parser.parse_args()
     git_config = ['git', 'config']
 
+    if args.verify and not args.dry_run:
+        args.dry_run = True
+
     if args._system:
         git_config.append('--system')
         install_location = INSTALL_LOCATION_SYSTEM
@@ -483,14 +500,17 @@ def main():
     input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') if sys.stdin else None
     output_stream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', newline='')
 
+    any_change = False
     for filename in args.files:
         if not (args.force or filename.endswith('.ipynb') or filename.endswith('.zpln')):
             continue
 
         try:
             with io.open(filename, 'r+', encoding='utf8', newline='') as f:
                 out = output_stream if args.textconv or args.dry_run else f
-                process_notebook(f, out, args, extra_keys, filename)
+                if process_notebook(f, out, args, extra_keys, filename):
+                    any_change = True
+
         except nbformat.reader.NotJSONError:
             print(f"No valid notebook detected in '{filename}'", file=sys.stderr)
             raise SystemExit(1)
@@ -504,7 +524,11 @@ def main():
 
     if not args.files and input_stream:
         try:
-            process_notebook(input_stream, output_stream, args, extra_keys)
+            any_local_change = process_notebook(input_stream, output_stream, args, extra_keys)
+            any_change = any_change or any_local_change
-            any_local_change = process_notebook(input_stream, output_stream, args, extra_keys)
-            any_change = any_change or any_local_change
+            if process_notebook(input_stream, output_stream, args, extra_keys):
+                any_change = True
-            any_local_change = process_notebook(input_stream, output_stream, args, extra_keys)
-            any_change = any_change or any_local_change
+            if process_notebook(input_stream, output_stream, args, extra_keys):
+                any_change = True
         except nbformat.reader.NotJSONError:
             print('No valid notebook detected on stdin', file=sys.stderr)
             raise SystemExit(1)
+
+    if args.verify and any_change:
+        raise SystemExit(1)
diff --git a/tests/test_end_to_end.py b/tests/test_end_to_end.py
@@ -1,6 +1,7 @@
 import os
 from pathlib import Path
 import re
+import json
 from subprocess import run, PIPE
 # Note: typing.Pattern is deprecated, for removal in 3.13 in favour of re.Pattern introduced in 3.8
 from typing import List, Union, Pattern
@@ -53,49 +54,100 @@ def nbstripout_exe():
 
 
 @pytest.mark.parametrize("input_file, expected_file, args", TEST_CASES)
-def test_end_to_end_stdin(input_file: str, expected_file: str, args: List[str]):
+@pytest.mark.parametrize("verify", (True, False))
+def test_end_to_end_stdin(input_file: str, expected_file: str, args: List[str], verify: bool):
     with open(NOTEBOOKS_FOLDER / expected_file, mode="r") as f:
         expected = f.read()
+        expected_str = json.dumps(json.loads(expected), indent=2)
 
     with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f:
-        pc = run([nbstripout_exe()] + args, stdin=f, stdout=PIPE, universal_newlines=True)
+        input_str = json.dumps(json.loads(f.read()), indent=2)
+
+    with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f:
+        args = [nbstripout_exe()] + args
+        if verify:
+            args.append("--verify")
+        pc = run(args, stdin=f, stdout=PIPE, universal_newlines=True)
         output = pc.stdout
 
-    assert output == expected
+    if verify:
+        # When using stin, the dry flag is disregarded.
+        if input_str != expected_str:
+            assert pc.returncode == 1
+        else:
+            assert pc.returncode == 0
+    else:
+        assert output == expected
+        assert pc.returncode == 0
 
 
 @pytest.mark.parametrize("input_file, expected_file, args", TEST_CASES)
-def test_end_to_end_file(input_file: str, expected_file: str, args: List[str], tmp_path):
+@pytest.mark.parametrize("verify", (True, False))
+def test_end_to_end_file(input_file: str, expected_file: str, args: List[str], tmp_path, verify: bool):
     with open(NOTEBOOKS_FOLDER / expected_file, mode="r") as f:
         expected = f.read()
+        expected_str = json.dumps(json.loads(expected), indent=2)
 
     p = tmp_path / input_file
     with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f:
         p.write_text(f.read())
-    pc = run([nbstripout_exe(), p] + args, stdout=PIPE, universal_newlines=True)
 
-    assert not pc.stdout and p.read_text() == expected
+    with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f:
+        input_str = json.dumps(json.loads(f.read()), indent=2)
+
+    args = [nbstripout_exe(), p] + args
+    if verify:
+        args.append("--verify")
+    pc = run(args, stdout=PIPE, universal_newlines=True)
+
+    output = pc.stdout.strip()
+    if verify:
+        if expected_str != input_str.strip():
+            assert pc.returncode == 1
+
+        # Since verify implies --dry-run, we make sure the file is not modified
+        # In other words, that the output == input, INSTEAD of output == expected
+        output.strip() == input_str.strip()
+    else:
+        output_file_str = json.dumps(json.loads(p.read_text()), indent=2)
+        assert pc.returncode == 0
+        assert output_file_str == expected_str
 
 
 @pytest.mark.parametrize("input_file, extra_args", DRY_RUN_CASES)
-def test_dry_run_stdin(input_file: str, extra_args: List[str]):
+@pytest.mark.parametrize("verify", (True, False))
+def test_dry_run_stdin(input_file: str, extra_args: List[str], verify: bool):
     expected = "Dry run: would have stripped input from stdin\n"
 
     with open(NOTEBOOKS_FOLDER / input_file, mode="r") as f:
-        pc = run([nbstripout_exe(), "--dry-run"] + extra_args, stdin=f, stdout=PIPE, universal_newlines=True)
+        args = [nbstripout_exe(), "--dry-run"] + extra_args
+        if verify:
+            args.append("--verify")
+        pc = run(args, stdin=f, stdout=PIPE, universal_newlines=True)
         output = pc.stdout
+        exit_code = pc.returncode
 
     assert output == expected
+    if verify:
+        assert exit_code == 1
+    else:
+        assert exit_code == 0
 
 
 @pytest.mark.parametrize("input_file, extra_args", DRY_RUN_CASES)
-def test_dry_run_args(input_file: str, extra_args: List[str]):
+@pytest.mark.parametrize("verify", (True, False))
+def test_dry_run_args(input_file: str, extra_args: List[str], verify: bool):
     expected_regex = re.compile(f"Dry run: would have stripped .*[/\\\\]{input_file}\n")
-
-    pc = run([nbstripout_exe(), str(NOTEBOOKS_FOLDER / input_file), "--dry-run", ] + extra_args, stdout=PIPE, universal_newlines=True)
+    args = [nbstripout_exe(), str(NOTEBOOKS_FOLDER / input_file), "--dry-run", ] + extra_args
+    if verify:
+        args.append("--verify")
+    pc = run(args, stdout=PIPE, universal_newlines=True)
     output = pc.stdout
+    exit_code = pc.returncode
 
     assert expected_regex.match(output)
+    if verify:
+        assert exit_code == 1
 
 
 @pytest.mark.parametrize("input_file, expected_errs, extra_args", ERR_OUTPUT_CASES)