Skip to content

Commit

Permalink
Merge branch 'multi-input' of github.com:IDEMSInternational/rapidpro-…
Browse files Browse the repository at this point in the history
…flow-toolkit into main

Support for multiple input files
  • Loading branch information
geoo89 committed Jun 27, 2023
2 parents 69205eb + 2603b05 commit d77976b
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 43 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ In the future this should also include a rewrite of https://github.com/geoo89/ra

## Console tool
```
main.py {create_flows,flow_to_sheet} input output --format {csv,xlsx,google_sheets} [--datamodels DATAMODELS]
main.py {create_flows,flow_to_sheet} input1 input2 ... -o output --format {csv,xlsx,google_sheets} [--datamodels DATAMODELS]
```

Example:
```
main.py create_flows tests/input/example1/content_index.csv out.json --format=csv --datamodels=tests.input.example1.nestedmodel
main.py create_flows tests/input/example1/content_index.csv -o out.json --format=csv --datamodels=tests.input.example1.nestedmodel
```

`main.py -h` for more details.
Expand Down
35 changes: 19 additions & 16 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,19 @@
def main():
description = 'Generate RapidPro JSON from Spreadsheet(s).\n\n'\
'Example usage: \n'\
'create_flows tests/input/example1/content_index.csv out.json --format=csv --datamodels=tests.input.example1.nestedmodel'
'create_flows tests/input/example1/content_index.csv --output=out.json --format=csv --datamodels=tests.input.example1.nestedmodel'
parser = argparse.ArgumentParser(description=description, formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('command',
choices=["create_flows", "flow_to_sheet"],
help='create_flows: Create flows as defined in the input content index sheet.\n'\
' input: Content index sheet defining flows to be created.\n'\
' input: Content index sheet(s) defining flows to be created.\n'\
' output: RapidPro JSON file for writing output flows.\n'\
'flow_to_sheet: Convert input file into a set of sheets encoding the flows.\n'
' input: RapidPro JSON file to read the flows from.\n'\
' output: File to write the output sheets to.\n')
parser.add_argument('input', help='Filename, or sheet_id for google sheets (https://docs.google.com/spreadsheets/d/[spreadsheet_id]/edit)')
parser.add_argument('output', help='Filename')
parser.add_argument('--format', required=True, choices=["csv", "xlsx", "google_sheets"], help='Sheet format for reading/writing.')
parser.add_argument('input', nargs='+', help='Filename, or sheet_id for google sheets (https://docs.google.com/spreadsheets/d/[spreadsheet_id]/edit)')
parser.add_argument('-o', '--output', required=True, help='Filename')
parser.add_argument('-f', '--format', required=True, choices=["csv", "xlsx", "google_sheets"], help='Sheet format for reading/writing.')
parser.add_argument('--datamodels', help='Module defining models for data sheets. E.g. if the definitions reside in ./myfolder/mysubfolder/mymodelsfile.py, then this argument should be myfolder.mysubfolder.mymodelsfile')
parser.add_argument('--tags', nargs='*', help='Tags to filter the content index sheet. A sequence of lists, with each list starting with an integer (tag position) followed by tags to include for this position. Example: 1 foo bar 2 baz means: only include rows if tags:1 is empty, foo or bar, and tags:2 is empty or baz.')
args = parser.parse_args()
Expand All @@ -35,18 +35,21 @@ def main():
print(f"Command {args.command} currently unsupported.")
return

if args.format == 'csv':
sheet_reader = CSVSheetReader(args.input)
elif args.format == 'xlsx':
sheet_reader = XLSXSheetReader(args.input)
elif args.format == 'google_sheets':
sheet_reader = GoogleSheetReader(args.input)
else:
print(f"Format {args.format} currently unsupported.")
return

tag_matcher = TagMatcher(args.tags)
ci_parser = ContentIndexParser(sheet_reader, args.datamodels, tag_matcher=tag_matcher)
for index, infile in enumerate(args.input):
if args.format == 'csv':
sheet_reader = CSVSheetReader(infile)
elif args.format == 'xlsx':
sheet_reader = XLSXSheetReader(infile)
elif args.format == 'google_sheets':
sheet_reader = GoogleSheetReader(infile)
else:
print(f"Format {args.format} currently unsupported.")
return
if index == 0:
ci_parser = ContentIndexParser(sheet_reader, args.datamodels, tag_matcher=tag_matcher)
else:
ci_parser.add_content_index(sheet_reader)
output = ci_parser.parse_all()
json.dump(output.render(), open(args.output, 'w'), indent=4)

Expand Down
45 changes: 26 additions & 19 deletions parsers/creation/contentindexparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,21 @@ def __init__(self, table, argument_definitions):
class ContentIndexParser:

def __init__(self, sheet_reader, user_data_model_module_name=None, tag_matcher=TagMatcher()):
self.sheet_reader = sheet_reader
self.tag_matcher = tag_matcher
self.template_sheets = {} # values: tablib tables
self.data_sheets = {} # values: OrderedDicts of RowModels
self.flow_definition_rows = [] # list of ContentIndexRowModel
self.campaign_definition_rows = [] # list of ContentIndexRowModel
self.campaign_parsers = [] # list of CampaignParser
if user_data_model_module_name:
self.user_models_module = importlib.import_module(user_data_model_module_name)
main_sheet = self.sheet_reader.get_main_sheet()
self.process_content_index_table(main_sheet, "content_index")
main_sheet = sheet_reader.get_main_sheet()
self.process_content_index_table(sheet_reader, main_sheet, "content_index")

def process_content_index_table(self, content_index_table, content_index_name):
def add_content_index(self, sheet_reader):
main_sheet = sheet_reader.get_main_sheet()
self.process_content_index_table(sheet_reader, main_sheet, "content_index")

def process_content_index_table(self, sheet_reader, content_index_table, content_index_name):
# content_index_table is in tablib table format
row_parser = RowParser(ContentIndexRowModel, CellParser())
sheet_parser = SheetParser(row_parser, content_index_table)
Expand All @@ -50,30 +53,31 @@ def process_content_index_table(self, content_index_table, content_index_name):
if not len(row.sheet_name) == 1:
LOGGER.critical('For content_index rows, exactly one sheet_name has to be specified')
sheet_name = row.sheet_name[0]
sheet = self.sheet_reader.get_sheet(sheet_name)
sheet = sheet_reader.get_sheet(sheet_name)
with logging_context(f"{sheet_name}"):
self.process_content_index_table(sheet, sheet_name)
self.process_content_index_table(sheet_reader, sheet, sheet_name)
elif row.type == 'data_sheet':
if not len(row.sheet_name) >= 1:
LOGGER.critical('For data_sheet rows, at least one sheet_name has to be specified')
self.process_data_sheet(row.sheet_name, row.new_name, row.data_model)
self.process_data_sheet(sheet_reader, row.sheet_name, row.new_name, row.data_model)
elif row.type in ['template_definition', 'create_flow']:
if not len(row.sheet_name) == 1:
LOGGER.critical('For template_definition/create_flow rows, exactly one sheet_name has to be specified')
sheet_name = row.sheet_name[0]
if sheet_name not in self.template_sheets:
sheet = self.sheet_reader.get_sheet(sheet_name)
sheet = sheet_reader.get_sheet(sheet_name)
self.template_sheets[sheet_name] = TemplateSheet(sheet, row.template_argument_definitions)
if row.type == 'create_flow':
self.flow_definition_rows.append((logging_prefix, row))
elif row.type == 'create_campaign':
if not len(row.sheet_name) == 1:
LOGGER.critical('For create_campaign rows, exactly one sheet_name has to be specified')
self.campaign_definition_rows.append((logging_prefix, row))
campaign_parser = self.create_campaign_parser(sheet_reader, row)
self.campaign_parsers.append((logging_prefix, campaign_parser))
else:
LOGGER.error(f'invalid type: "{row.type}"')

def process_data_sheet(self, sheet_names, new_name, data_model_name):
def process_data_sheet(self, sheet_reader, sheet_names, new_name, data_model_name):
if not hasattr(self, 'user_models_module'):
LOGGER.critical(f'If there are data sheets, a user_data_model_module_name has to be provided (as commandline argument)')
return
Expand All @@ -88,7 +92,7 @@ def process_data_sheet(self, sheet_names, new_name, data_model_name):
content = OrderedDict()
for sheet_name in sheet_names:
with logging_context(sheet_name):
data_table = self.sheet_reader.get_sheet(sheet_name)
data_table = sheet_reader.get_sheet(sheet_name)
try:
user_model = getattr(self.user_models_module, data_model_name)
except AttributeError:
Expand Down Expand Up @@ -124,15 +128,18 @@ def parse_all(self):
self.parse_all_campaigns(rapidpro_container)
return rapidpro_container

def create_campaign_parser(self, sheet_reader, row):
sheet_name = row.sheet_name[0]
sheet = sheet_reader.get_sheet(sheet_name)
row_parser = RowParser(CampaignEventRowModel, CellParser())
sheet_parser = SheetParser(row_parser, sheet)
rows = sheet_parser.parse_all()
return CampaignParser(row.new_name or sheet_name, row.group, rows)

def parse_all_campaigns(self, rapidpro_container):
for logging_prefix, row in self.campaign_definition_rows:
sheet_name = row.sheet_name[0]
for logging_prefix, campaign_parser in self.campaign_parsers:
sheet_name = campaign_parser.campaign.name
with logging_context(f'{logging_prefix} | {sheet_name}'):
sheet = self.sheet_reader.get_sheet(sheet_name)
row_parser = RowParser(CampaignEventRowModel, CellParser())
sheet_parser = SheetParser(row_parser, sheet)
rows = sheet_parser.parse_all()
campaign_parser = CampaignParser(row.new_name or sheet_name, row.group, rows)
campaign = campaign_parser.parse()
rapidpro_container.add_campaign(campaign)

Expand Down
3 changes: 3 additions & 0 deletions tests/input/example1/content_index1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
type,sheet_name,data_sheet,data_row_id,new_name,data_model,status
create_flow,my_template,nesteddata,row1,,,
data_sheet,nesteddata,,,,NestedRowModel,
4 changes: 4 additions & 0 deletions tests/input/example1/content_index2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"type","sheet_name","data_sheet","data_row_id","new_name","data_model","status","group"
"create_flow","my_template","nesteddata","row2",,,,
"create_flow","my_basic_flow",,,,,,
"create_campaign","my_campaign",,,,,,"My Group"
22 changes: 16 additions & 6 deletions tests/test_contentindexparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,29 @@ def compare_to_expected(self, render_output):
self.assertEqual(render_output["campaigns"][0]["events"][0]["flow"]["name"], 'my_basic_flow')
self.assertEqual(render_output["campaigns"][0]["events"][0]["flow"]["uuid"], render_output["flows"][2]["uuid"])

def check_example1(self, ci_parser):
container = ci_parser.parse_all()
render_output = container.render()
self.compare_to_expected(render_output)

def test_example1_csv(self):
# Same test as test_generate_flows in parsers/creation/tests/test_contentindexparser
# but with csvs
sheet_reader = CSVSheetReader('tests/input/example1/content_index.csv')
ci_parser = ContentIndexParser(sheet_reader, 'tests.input.example1.nestedmodel')
container = ci_parser.parse_all()
render_output = container.render()
self.compare_to_expected(render_output)
self.check_example1(ci_parser)

def test_example1_split_csv(self):
# Same test as test_generate_flows in parsers/creation/tests/test_contentindexparser
# but with csvs
sheet_reader = CSVSheetReader('tests/input/example1/content_index1.csv')
ci_parser = ContentIndexParser(sheet_reader, 'tests.input.example1.nestedmodel')
sheet_reader = CSVSheetReader('tests/input/example1/content_index2.csv')
ci_parser.add_content_index(sheet_reader)
self.check_example1(ci_parser)

def test_example1_xlsx(self):
# Same test as above
sheet_reader = XLSXSheetReader('tests/input/example1/content_index.xlsx')
ci_parser = ContentIndexParser(sheet_reader, 'tests.input.example1.nestedmodel')
container = ci_parser.parse_all()
render_output = container.render()
self.compare_to_expected(render_output)
self.check_example1(ci_parser)

0 comments on commit d77976b

Please sign in to comment.