Skip to content

Commit

Permalink
json_input: If json_dict is not a dict, return a useful error/warning
Browse files Browse the repository at this point in the history
  • Loading branch information
Bjwebb committed Jun 30, 2024
1 parent 441dd52 commit 27b2cb4
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 53 deletions.
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,17 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

## [Unreleased]

## [0.24.3]
## [0.25.0]

### Fixed

- Ignore null characters in the input CSV file when reading configuration from the header rows
https://github.com/OpenDataServices/flatten-tool/pull/446

### Changed

- If `json_dict` is not a dict, return a useful error/warning https://github.com/OpenDataServices/flatten-tool/issues/442

## [0.24.2] - 2024-06-12

### Fixed
Expand Down
9 changes: 9 additions & 0 deletions flattentool/json_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import zc.zlibstorage
import ZODB.FileStorage

from flattentool.exceptions import DataErrorWarning
from flattentool.i18n import _
from flattentool.input import path_search
from flattentool.schema import make_sub_sheet_name
Expand Down Expand Up @@ -310,6 +311,14 @@ def parse(self):
# This is particularly useful for IATI XML, in order to not
# fall over on empty activity, e.g. <iati-activity/>
continue

if not isinstance(json_dict, dict):
warn(
_(f"The value at index {num} is not a JSON object"),
DataErrorWarning,
)
continue

self.parse_json_dict(json_dict, sheet=self.main_sheet)
# only persist every 2000 objects. peristing more often slows down storing.
# 2000 top level objects normally not too much to store in memory.
Expand Down
114 changes: 62 additions & 52 deletions flattentool/locale/en/LC_MESSAGES/flatten-tool.po
Original file line number Diff line number Diff line change
@@ -1,232 +1,242 @@
# Translations template for flattentool.
# Copyright (C) 2020 ORGANIZATION
# Copyright (C) 2024 ORGANIZATION
# This file is distributed under the same license as the flattentool
# project.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2024.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: flattentool 0.14.0\n"
"Project-Id-Version: flattentool 0.24.1\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2020-10-16 08:52+0000\n"
"POT-Creation-Date: 2024-06-12 12:00+0000\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <[email protected]>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.8.0\n"
"Generated-By: Babel 2.15.0\n"

#: flattentool/input.py:49
#: flattentool/input.py:70
msgid ""
"Non-numeric value \"{}\" found in number column, returning as string "
"instead."
msgstr ""

#: flattentool/input.py:60
#: flattentool/input.py:81
msgid ""
"Non-integer value \"{}\" found in integer column, returning as string "
"instead."
msgstr ""

#: flattentool/input.py:74
#: flattentool/input.py:95
msgid "Unrecognised value for boolean: \"{}\", returning as string instead"
msgstr ""

#: flattentool/input.py:92
#: flattentool/input.py:113
msgid ""
"Non-numeric value \"{}\" found in number array column, returning as "
"string array instead)."
msgstr ""

#: flattentool/input.py:145
#: flattentool/input.py:136
msgid ""
"An invalid WKT string was supplied \"{value}\", the message from the "
"parser was: {parser_msg}"
msgstr ""

#: flattentool/input.py:183
msgid "because it treats {} as an array, but another column does not"
msgstr ""

#: flattentool/input.py:158
#: flattentool/input.py:196
msgid "Overwriting cell {} by mistake"
msgstr ""

#: flattentool/input.py:170
#: flattentool/input.py:208
msgid "because it treats {} as an object, but another column does not"
msgstr ""

#: flattentool/input.py:189
#: flattentool/input.py:227
msgid "because another column treats it as an array or object"
msgstr ""

#: flattentool/input.py:207
#: flattentool/input.py:245
msgid ""
"You may have a duplicate Identifier: We couldn't merge these rows with "
"the {}: field \"{}\" in sheet \"{}\": one cell has the value: \"{}\", the"
" other cell has the value: \"{}\""
msgstr ""

#: flattentool/input.py:341 flattentool/input.py:361
#: flattentool/input.py:381 flattentool/input.py:401
msgid ""
"Duplicate heading \"{}\" found, ignoring the data in columns {} and {} "
"(sheet: \"{}\")."
msgstr ""

#: flattentool/input.py:376
#: flattentool/input.py:416
msgid ""
"Duplicate heading \"{}\" found, ignoring the data in column {} (sheet: "
"\"{}\")."
msgstr ""

#: flattentool/input.py:499
#: flattentool/input.py:546
msgid "Row/cell collision: {}"
msgstr ""

#: flattentool/input.py:511 flattentool/input.py:522 flattentool/input.py:527
#: flattentool/input.py:531
#: flattentool/input.py:558 flattentool/input.py:569 flattentool/input.py:574
#: flattentool/input.py:578
msgid "Already have key {}"
msgstr ""

#: flattentool/input.py:534
#: flattentool/input.py:581
msgid "Two sub-cells have different values: {}, {}"
msgstr ""

#: flattentool/input.py:540 flattentool/input.py:563
#: flattentool/input.py:587 flattentool/input.py:610
msgid "Unexpected result type in the JSON cell tree: {}"
msgstr ""

#: flattentool/input.py:660
#: flattentool/input.py:707
msgid "The supplied file has extension .xlsx but isn't an XLSX file."
msgstr ""

#: flattentool/input.py:931
#: flattentool/input.py:985
msgid "Column \"{}\" has been ignored because it is a number."
msgstr ""

#: flattentool/input.py:959
#: flattentool/input.py:1013
msgid "There is an array at '{}' when the schema says there should be a '{}'"
msgstr ""

#: flattentool/input.py:973
#: flattentool/input.py:1027
msgid ""
"Column {} has been ignored, because it treats {} as an array, but another"
" column does not."
msgstr ""

#: flattentool/input.py:997
#: flattentool/input.py:1051
msgid ""
"Column {} has been ignored, because it treats {} as an object, but "
"another column does not."
msgstr ""

#: flattentool/input.py:1011
#: flattentool/input.py:1065
msgid "There is an object or list at '{}' but it should be an {}"
msgstr ""

#: flattentool/input.py:1025
#: flattentool/input.py:1079
msgid ""
"Column {} has been ignored, because another column treats it as an array "
"or object"
msgstr ""

#: flattentool/json_input.py:154
#: flattentool/json_input.py:197
msgid "Using rollUp values from schema, ignoring direct input."
msgstr ""

#: flattentool/json_input.py:167
#: flattentool/json_input.py:210
msgid ""
"No fields to rollup found (pass json path directly, as a list in a file, "
"or via a schema)"
msgstr ""

#: flattentool/json_input.py:175
#: flattentool/json_input.py:218
msgid ""
"Invalid value passed for rollup (pass json path directly, as a list in a "
"file, or via a schema)"
msgstr ""

#: flattentool/json_input.py:193
msgid "Etiher json_filename or root_json_dict must be supplied"
#: flattentool/json_input.py:238
msgid "Either json_filename or root_json_dict must be supplied"
msgstr ""

#: flattentool/json_input.py:198
#: flattentool/json_input.py:243
msgid "Only one of json_file or root_json_dict should be supplied"
msgstr ""

#: flattentool/json_input.py:236
#: flattentool/json_input.py:276
msgid ""
"You wanted to preserve the following fields which are not present in the "
"supplied schema: {}"
msgstr ""

#: flattentool/json_input.py:274
#: flattentool/json_input.py:341
msgid ""
"You wanted to preserve the following fields which are not present in the "
"input data: {}"
msgstr ""

#: flattentool/json_input.py:380
#: flattentool/json_input.py:382
msgid "Invalid GeoJSON: {parser_msg}"
msgstr ""

#: flattentool/json_input.py:480
msgid ""
"Warning: No schema was provided so column headings are JSON keys, not "
"titles."
msgstr ""

#: flattentool/json_input.py:397
#: flattentool/json_input.py:497
msgid "Rolled up values must be basic types"
msgstr ""

#: flattentool/json_input.py:474 flattentool/json_input.py:485
#: flattentool/json_input.py:580 flattentool/json_input.py:591
msgid ""
"More than one value supplied for \"{}\". Could not provide rollup, so "
"adding a warning to the relevant cell(s) in the spreadsheet."
msgstr ""

#: flattentool/json_input.py:480 flattentool/json_input.py:491
#: flattentool/json_input.py:586 flattentool/json_input.py:597
msgid ""
"WARNING: More than one value supplied, consult the relevant sub-sheet for"
" the data."
msgstr ""

#: flattentool/json_input.py:522
#: flattentool/json_input.py:637
msgid "Unsupported type {}"
msgstr ""

#: flattentool/output.py:71 flattentool/output.py:147
#: flattentool/output.py:75 flattentool/output.py:152
msgid ""
"Character(s) in '{}' are not allowed in a spreadsheet cell. Those "
"character(s) will be removed"
msgstr ""

#: flattentool/schema.py:145
#: flattentool/schema.py:143
msgid "One of schema_filename or root_schema_dict must be supplied"
msgstr ""

#: flattentool/schema.py:149
#: flattentool/schema.py:147
msgid "Only one of schema_filename or root_schema_dict should be supplied"
msgstr ""

#: flattentool/schema.py:192
#: flattentool/schema.py:190
msgid "Field {} does not have a title, skipping."
msgstr ""

#: flattentool/schema.py:344
#: flattentool/schema.py:368
msgid "Field {}{}/0/{} is missing a title, skipping."
msgstr ""

#: flattentool/schema.py:350
#: flattentool/schema.py:374
msgid "Field {}{} does not have a title, skipping it and all its children."
msgstr ""

#: flattentool/schema.py:390
#: flattentool/schema.py:414
msgid ""
"Unknown type_set: {}, did you forget to explicity set the \"type\" key on"
" \"items\"?"
"Unknown type_set: {}, did you forget to explicitly set the \"type\" key "
"on \"items\"?"
msgstr ""

#: flattentool/schema.py:423
#: flattentool/schema.py:447
msgid ""
"Unrecognised types {} for property \"{}\" with context \"{}\",so this "
"property has been ignored."
msgstr ""

#: flattentool/schema.py:431
#: flattentool/schema.py:455
msgid "Skipping field \"{}\", because it has no properties."
msgstr ""

21 changes: 21 additions & 0 deletions flattentool/tests/test_json_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,27 @@ def test_parse_basic_json_dict():
assert parser.sub_sheets == {}


def test_parse_not_json_dict(recwarn):
parser = JSONParser(root_json_dict=[["test"], {"a": "b"}, "test"])
assert list(parser.main_sheet) == ["a"]
assert list(parser.main_sheet.lines) == [{"a": "b"}]
assert parser.sub_sheets == {}

assert len(recwarn) == 2

w = recwarn.pop(UserWarning)
assert (
repr(w.message)
== "DataErrorWarning('The value at index 0 is not a JSON object')"
)

w = recwarn.pop(UserWarning)
assert (
repr(w.message)
== "DataErrorWarning('The value at index 2 is not a JSON object')"
)


def test_parse_nested_dict_json_dict():
parser = JSONParser(
root_json_dict=[
Expand Down

0 comments on commit 27b2cb4

Please sign in to comment.