Skip to content

Commit

Permalink
check for missing keys (#144)
Browse files Browse the repository at this point in the history
* check for missing keys

* fix code smells
  • Loading branch information
iulusoy authored Aug 15, 2023
1 parent edffd91 commit 911a43b
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 0 deletions.
32 changes: 32 additions & 0 deletions ammico/test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,38 @@ def test_initialize_dict(get_path):
assert mydict == out_dict


def test_check_for_missing_keys():
mydict = {
"file1": {"faces": "Yes", "text_english": "Something"},
"file2": {"faces": "No", "text_english": "Otherthing"},
}
# check that dict is not changed
mydict2 = ut.check_for_missing_keys(mydict)
assert mydict2 == mydict
# check that dict is updated if key is missing
mydict = {
"file1": {"faces": "Yes", "text_english": "Something"},
"file2": {"faces": "No"},
}
mydict2 = ut.check_for_missing_keys(mydict)
assert mydict2["file2"] == {"faces": "No", "text_english": None}
# check that dict is updated if more than one key is missing
mydict = {"file1": {"faces": "Yes", "text_english": "Something"}, "file2": {}}
mydict2 = ut.check_for_missing_keys(mydict)
assert mydict2["file2"] == {"faces": None, "text_english": None}
# now test the exceptions
with pytest.raises(ValueError):
ut.check_for_missing_keys({"File": "path"})
with pytest.raises(ValueError):
ut.check_for_missing_keys({"File": {}})
mydict = {
"file1": {"faces": "Yes"},
"file2": {"faces": "No", "text_english": "Something"},
}
with pytest.raises(ValueError):
ut.check_for_missing_keys(mydict)


def test_append_data_to_dict(get_path):
with open(get_path + "example_append_data_to_dict_in.json", "r") as file:
mydict = json.load(file)
Expand Down
49 changes: 49 additions & 0 deletions ammico/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,55 @@ def initialize_dict(filelist: list) -> dict:
return mydict


def check_for_missing_keys(mydict: dict) -> dict:
"""Check the nested dictionary for any missing keys in the subdicts.
Args:
mydict(dict): The nested dictionary with keys to check.
Returns:
dict: The dictionary with keys appended."""
# check that we actually got a nested dict
if not isinstance(mydict[next(iter(mydict))], dict):
raise ValueError(
"Please provide a nested dictionary - you provided {}".format(
next(iter(mydict))
)
)
# gather all existing keys of first item in a list
subdict = mydict[next(iter(mydict))]
if len(list(subdict.keys())) < 1:
raise ValueError(
"Could not get any keys to compare to - please check if your nested dict is empty!"
)
for key in mydict.keys():
# compare keys of next item with first item
if subdict.keys() != mydict[key].keys():
# print a warning if key is not found and set to None
keys_a = set(subdict.keys())
keys_b = set(mydict[key].keys())
missing_keys_in_b = keys_a - keys_b
if missing_keys_in_b:
print(
"Found missing key(s) {} in subdict {} - setting to None.".format(
missing_keys_in_b, key
)
)
for missing_key in missing_keys_in_b:
mydict[key][missing_key] = None
# check that there are no other keys in the subdicts -
# this would only happen if there is a key missing in the first subdict
# then we would need to start over so best to
# abort if this happens - this is a very unlikely case
missing_keys_in_a = keys_b - keys_a
if missing_keys_in_a:
raise ValueError(
"Could not update missing keys - first item already missing {}".format(
missing_keys_in_a
)
)
return mydict


def append_data_to_dict(mydict: dict) -> dict:
"""Append entries from nested dictionaries to keys in a global dict."""

Expand Down

0 comments on commit 911a43b

Please sign in to comment.