From 2603b055c5e9c935e0824acef744af5f16abbaaa Mon Sep 17 00:00:00 2001 From: Georg Osang Date: Mon, 8 May 2023 15:44:39 +0200 Subject: [PATCH] Support for multiple input files --- README.md | 4 +-- main.py | 35 ++++++++++++++----------- parsers/creation/contentindexparser.py | 23 +++++++++------- tests/input/example1/content_index1.csv | 3 +++ tests/input/example1/content_index2.csv | 3 +++ tests/test_contentindexparser.py | 28 +++++++++++++------- 6 files changed, 59 insertions(+), 37 deletions(-) create mode 100644 tests/input/example1/content_index1.csv create mode 100644 tests/input/example1/content_index2.csv diff --git a/README.md b/README.md index a52f77d..71b4bda 100644 --- a/README.md +++ b/README.md @@ -11,12 +11,12 @@ In the future this should also include a rewrite of https://github.com/geoo89/ra ## Console tool ``` -main.py {create_flows,flow_to_sheet} input output --format {csv,xlsx,google_sheets} [--datamodels DATAMODELS] +main.py {create_flows,flow_to_sheet} input1 input2 ... -o output --format {csv,xlsx,google_sheets} [--datamodels DATAMODELS] ``` Example: ``` -main.py create_flows tests/input/example1/content_index.csv out.json --format=csv --datamodels=tests.input.example1.nestedmodel +main.py create_flows tests/input/example1/content_index.csv -o out.json --format=csv --datamodels=tests.input.example1.nestedmodel ``` `main.py -h` for more details. diff --git a/main.py b/main.py index 6d8fadc..ab4c59c 100644 --- a/main.py +++ b/main.py @@ -13,19 +13,19 @@ def main(): description = 'Generate RapidPro JSON from Spreadsheet(s).\n\n'\ 'Example usage: \n'\ - 'create_flows tests/input/example1/content_index.csv out.json --format=csv --datamodels=tests.input.example1.nestedmodel' + 'create_flows tests/input/example1/content_index.csv --output=out.json --format=csv --datamodels=tests.input.example1.nestedmodel' parser = argparse.ArgumentParser(description=description, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('command', choices=["create_flows", "flow_to_sheet"], help='create_flows: Create flows as defined in the input content index sheet.\n'\ - ' input: Content index sheet defining flows to be created.\n'\ + ' input: Content index sheet(s) defining flows to be created.\n'\ ' output: RapidPro JSON file for writing output flows.\n'\ 'flow_to_sheet: Convert input file into a set of sheets encoding the flows.\n' ' input: RapidPro JSON file to read the flows from.\n'\ ' output: File to write the output sheets to.\n') - parser.add_argument('input', help='Filename, or sheet_id for google sheets (https://docs.google.com/spreadsheets/d/[spreadsheet_id]/edit)') - parser.add_argument('output', help='Filename') - parser.add_argument('--format', required=True, choices=["csv", "xlsx", "google_sheets"], help='Sheet format for reading/writing.') + parser.add_argument('input', nargs='+', help='Filename, or sheet_id for google sheets (https://docs.google.com/spreadsheets/d/[spreadsheet_id]/edit)') + parser.add_argument('-o', '--output', required=True, help='Filename') + parser.add_argument('-f', '--format', required=True, choices=["csv", "xlsx", "google_sheets"], help='Sheet format for reading/writing.') parser.add_argument('--datamodels', help='Module defining models for data sheets. E.g. if the definitions reside in ./myfolder/mysubfolder/mymodelsfile.py, then this argument should be myfolder.mysubfolder.mymodelsfile') args = parser.parse_args() @@ -33,17 +33,22 @@ def main(): print(f"Command {args.command} currently unsupported.") return - if args.format == 'csv': - sheet_reader = CSVSheetReader(args.input) - elif args.format == 'xlsx': - sheet_reader = XLSXSheetReader(args.input) - elif args.format == 'google_sheets': - sheet_reader = GoogleSheetReader(args.input) - else: - print(f"Format {args.format} currently unsupported.") - return + for index, infile in enumerate(args.input): + if args.format == 'csv': + sheet_reader = CSVSheetReader(infile) + elif args.format == 'xlsx': + sheet_reader = XLSXSheetReader(infile) + elif args.format == 'google_sheets': + sheet_reader = GoogleSheetReader(infile) + else: + print(f"Format {args.format} currently unsupported.") + return + + if index == 0: + ci_parser = ContentIndexParser(sheet_reader, args.datamodels) + else: + ci_parser.add_content_index(sheet_reader) - ci_parser = ContentIndexParser(sheet_reader, args.datamodels) output = ci_parser.parse_all_flows() json.dump(output.render(), open(args.output, 'w'), indent=4) diff --git a/parsers/creation/contentindexparser.py b/parsers/creation/contentindexparser.py index 06c0130..72c9a17 100644 --- a/parsers/creation/contentindexparser.py +++ b/parsers/creation/contentindexparser.py @@ -20,16 +20,19 @@ def __init__(self, table, argument_definitions): class ContentIndexParser: def __init__(self, sheet_reader, user_data_model_module_name=None): - self.sheet_reader = sheet_reader self.template_sheets = {} # values: tablib tables self.data_sheets = {} # values: OrderedDicts of RowModels self.flow_definition_rows = [] # list of ContentIndexRowModel if user_data_model_module_name: self.user_models_module = importlib.import_module(user_data_model_module_name) - main_sheet = self.sheet_reader.get_main_sheet() - self.process_content_index_table(main_sheet, "content_index") + main_sheet = sheet_reader.get_main_sheet() + self.process_content_index_table(sheet_reader, main_sheet, "content_index") - def process_content_index_table(self, content_index_table, content_index_name): + def add_content_index(self, sheet_reader): + main_sheet = sheet_reader.get_main_sheet() + self.process_content_index_table(sheet_reader, main_sheet, "content_index") + + def process_content_index_table(self, sheet_reader, content_index_table, content_index_name): # content_index_table is in tablib table format row_parser = RowParser(ContentIndexRowModel, CellParser()) sheet_parser = SheetParser(row_parser, content_index_table) @@ -43,26 +46,26 @@ def process_content_index_table(self, content_index_table, content_index_name): if not len(row.sheet_name) == 1: LOGGER.critical('For content_index rows, exactly one sheet_name has to be specified') sheet_name = row.sheet_name[0] - sheet = self.sheet_reader.get_sheet(sheet_name) + sheet = sheet_reader.get_sheet(sheet_name) with logging_context(f"{sheet_name}"): - self.process_content_index_table(sheet, sheet_name) + self.process_content_index_table(sheet_reader, sheet, sheet_name) elif row.type == 'data_sheet': if not len(row.sheet_name) >= 1: LOGGER.critical('For data_sheet rows, at least one sheet_name has to be specified') - self.process_data_sheet(row.sheet_name, row.new_name, row.data_model) + self.process_data_sheet(sheet_reader, row.sheet_name, row.new_name, row.data_model) elif row.type in ['template_definition', 'create_flow']: if not len(row.sheet_name) == 1: LOGGER.critical('For template_definition/create_flow rows, exactly one sheet_name has to be specified') sheet_name = row.sheet_name[0] if sheet_name not in self.template_sheets: - sheet = self.sheet_reader.get_sheet(sheet_name) + sheet = sheet_reader.get_sheet(sheet_name) self.template_sheets[sheet_name] = TemplateSheet(sheet, row.template_argument_definitions) if row.type == 'create_flow': self.flow_definition_rows.append((logging_prefix, row)) else: LOGGER.error(f'invalid type: "{row.type}"') - def process_data_sheet(self, sheet_names, new_name, data_model_name): + def process_data_sheet(self, sheet_reader, sheet_names, new_name, data_model_name): if not hasattr(self, 'user_models_module'): LOGGER.critical(f'If there are data sheets, a user_data_model_module_name has to be provided (as commandline argument)') return @@ -77,7 +80,7 @@ def process_data_sheet(self, sheet_names, new_name, data_model_name): content = OrderedDict() for sheet_name in sheet_names: with logging_context(sheet_name): - data_table = self.sheet_reader.get_sheet(sheet_name) + data_table = sheet_reader.get_sheet(sheet_name) try: user_model = getattr(self.user_models_module, data_model_name) except AttributeError: diff --git a/tests/input/example1/content_index1.csv b/tests/input/example1/content_index1.csv new file mode 100644 index 0000000..cb844ac --- /dev/null +++ b/tests/input/example1/content_index1.csv @@ -0,0 +1,3 @@ +type,sheet_name,data_sheet,data_row_id,new_name,data_model,status +create_flow,my_template,nesteddata,row1,,, +data_sheet,nesteddata,,,,NestedRowModel, diff --git a/tests/input/example1/content_index2.csv b/tests/input/example1/content_index2.csv new file mode 100644 index 0000000..c1467e5 --- /dev/null +++ b/tests/input/example1/content_index2.csv @@ -0,0 +1,3 @@ +type,sheet_name,data_sheet,data_row_id,new_name,data_model,status +create_flow,my_template,nesteddata,row2,,, +create_flow,my_basic_flow,,,,, diff --git a/tests/test_contentindexparser.py b/tests/test_contentindexparser.py index b0c2472..e285bec 100644 --- a/tests/test_contentindexparser.py +++ b/tests/test_contentindexparser.py @@ -19,23 +19,31 @@ def compare_messages(self, render_output, flow_name, messages_exp, context=None) if not flow_found: self.assertTrue(False, msg=f'Flow with name "{flow_name}" does not exist in output.') - def test_example1_csv(self): - # Same test as test_generate_flows in parsers/creation/tests/test_contentindexparser - # but with csvs - sheet_reader = CSVSheetReader('tests/input/example1/content_index.csv') - ci_parser = ContentIndexParser(sheet_reader, 'tests.input.example1.nestedmodel') + def check_example1(self, ci_parser): container = ci_parser.parse_all_flows() render_output = container.render() self.compare_messages(render_output, 'my_basic_flow', ['Some text']) self.compare_messages(render_output, 'my_template - row1', ['Value1', 'Happy1 and Sad1']) self.compare_messages(render_output, 'my_template - row2', ['Value2', 'Happy2 and Sad2']) + def test_example1_csv(self): + # Same test as test_generate_flows in parsers/creation/tests/test_contentindexparser + # but with csvs + sheet_reader = CSVSheetReader('tests/input/example1/content_index.csv') + ci_parser = ContentIndexParser(sheet_reader, 'tests.input.example1.nestedmodel') + self.check_example1(ci_parser) + + def test_example1_split_csv(self): + # Same test as test_generate_flows in parsers/creation/tests/test_contentindexparser + # but with csvs + sheet_reader = CSVSheetReader('tests/input/example1/content_index1.csv') + ci_parser = ContentIndexParser(sheet_reader, 'tests.input.example1.nestedmodel') + sheet_reader = CSVSheetReader('tests/input/example1/content_index2.csv') + ci_parser.add_content_index(sheet_reader) + self.check_example1(ci_parser) + def test_example1_xlsx(self): # Same test as above sheet_reader = XLSXSheetReader('tests/input/example1/content_index.xlsx') ci_parser = ContentIndexParser(sheet_reader, 'tests.input.example1.nestedmodel') - container = ci_parser.parse_all_flows() - render_output = container.render() - self.compare_messages(render_output, 'my_basic_flow', ['Some text']) - self.compare_messages(render_output, 'my_template - row1', ['Value1', 'Happy1 and Sad1']) - self.compare_messages(render_output, 'my_template - row2', ['Value2', 'Happy2 and Sad2']) + self.check_example1(ci_parser)