From 37305ce5e6ef1b91f6058eff5b128064f76fbe45 Mon Sep 17 00:00:00 2001
From: Simon Bray <32272674+simonbray@users.noreply.github.com>
Date: Thu, 23 Nov 2023 20:48:52 +0100
Subject: [PATCH] Fix two bugs with `split_file_to_collection` (#1358)

* fix two bugs

* allow chunksize to be greater than the number of records in the input file without failing
* allow an empty file to be split without an error

* bump version number
---
 .../split_file_to_collection/split_file_to_collection.py       | 3 ++-
 .../split_file_to_collection/split_file_to_collection.xml      | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/tools/text_processing/split_file_to_collection/split_file_to_collection.py b/tools/text_processing/split_file_to_collection/split_file_to_collection.py
index 675e15451d..6580fecf2b 100755
--- a/tools/text_processing/split_file_to_collection/split_file_to_collection.py
+++ b/tools/text_processing/split_file_to_collection/split_file_to_collection.py
@@ -224,6 +224,7 @@ def split_by_record(args, in_file, out_dir, top, ftype):
             for i in range(top):
                 f.readline()
             n_records = 0
+            last_line_matched = False
             for line in f:
                 if (num == 0 and re.match(sep, line) is not None) or (
                     num > 0 and n_records % num == 0
@@ -241,7 +242,7 @@ def split_by_record(args, in_file, out_dir, top, ftype):
         if chunksize == 0:  # i.e. no chunking
             n_per_file = n_records // numnew
         else:
-            numnew = n_records // chunksize
+            numnew = max(n_records // chunksize, 1)  # should not be less than 1
             n_per_file = chunksize
 
     # make new files
diff --git a/tools/text_processing/split_file_to_collection/split_file_to_collection.xml b/tools/text_processing/split_file_to_collection/split_file_to_collection.xml
index 56c4248d99..354df3f590 100644
--- a/tools/text_processing/split_file_to_collection/split_file_to_collection.xml
+++ b/tools/text_processing/split_file_to_collection/split_file_to_collection.xml
@@ -1,4 +1,4 @@
-<tool id="split_file_to_collection" name="Split file" version="0.5.0">
+<tool id="split_file_to_collection" name="Split file" version="0.5.1">
     <description>to dataset collection</description>
     <macros>
         <xml name="regex_sanitizer">