From 911a43bfad38bb40513ba0f2bafce85fb4f1d159 Mon Sep 17 00:00:00 2001 From: Inga Ulusoy Date: Tue, 15 Aug 2023 13:07:41 +0200 Subject: [PATCH] check for missing keys (#144) * check for missing keys * fix code smells --- ammico/test/test_utils.py | 32 +++++++++++++++++++++++++ ammico/utils.py | 49 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) diff --git a/ammico/test/test_utils.py b/ammico/test/test_utils.py index 28dae4a..0ab8382 100644 --- a/ammico/test/test_utils.py +++ b/ammico/test/test_utils.py @@ -84,6 +84,38 @@ def test_initialize_dict(get_path): assert mydict == out_dict +def test_check_for_missing_keys(): + mydict = { + "file1": {"faces": "Yes", "text_english": "Something"}, + "file2": {"faces": "No", "text_english": "Otherthing"}, + } + # check that dict is not changed + mydict2 = ut.check_for_missing_keys(mydict) + assert mydict2 == mydict + # check that dict is updated if key is missing + mydict = { + "file1": {"faces": "Yes", "text_english": "Something"}, + "file2": {"faces": "No"}, + } + mydict2 = ut.check_for_missing_keys(mydict) + assert mydict2["file2"] == {"faces": "No", "text_english": None} + # check that dict is updated if more than one key is missing + mydict = {"file1": {"faces": "Yes", "text_english": "Something"}, "file2": {}} + mydict2 = ut.check_for_missing_keys(mydict) + assert mydict2["file2"] == {"faces": None, "text_english": None} + # now test the exceptions + with pytest.raises(ValueError): + ut.check_for_missing_keys({"File": "path"}) + with pytest.raises(ValueError): + ut.check_for_missing_keys({"File": {}}) + mydict = { + "file1": {"faces": "Yes"}, + "file2": {"faces": "No", "text_english": "Something"}, + } + with pytest.raises(ValueError): + ut.check_for_missing_keys(mydict) + + def test_append_data_to_dict(get_path): with open(get_path + "example_append_data_to_dict_in.json", "r") as file: mydict = json.load(file) diff --git a/ammico/utils.py b/ammico/utils.py index 6d0c014..0a9a380 100644 --- a/ammico/utils.py +++ b/ammico/utils.py @@ -154,6 +154,55 @@ def initialize_dict(filelist: list) -> dict: return mydict +def check_for_missing_keys(mydict: dict) -> dict: + """Check the nested dictionary for any missing keys in the subdicts. + + Args: + mydict(dict): The nested dictionary with keys to check. + Returns: + dict: The dictionary with keys appended.""" + # check that we actually got a nested dict + if not isinstance(mydict[next(iter(mydict))], dict): + raise ValueError( + "Please provide a nested dictionary - you provided {}".format( + next(iter(mydict)) + ) + ) + # gather all existing keys of first item in a list + subdict = mydict[next(iter(mydict))] + if len(list(subdict.keys())) < 1: + raise ValueError( + "Could not get any keys to compare to - please check if your nested dict is empty!" + ) + for key in mydict.keys(): + # compare keys of next item with first item + if subdict.keys() != mydict[key].keys(): + # print a warning if key is not found and set to None + keys_a = set(subdict.keys()) + keys_b = set(mydict[key].keys()) + missing_keys_in_b = keys_a - keys_b + if missing_keys_in_b: + print( + "Found missing key(s) {} in subdict {} - setting to None.".format( + missing_keys_in_b, key + ) + ) + for missing_key in missing_keys_in_b: + mydict[key][missing_key] = None + # check that there are no other keys in the subdicts - + # this would only happen if there is a key missing in the first subdict + # then we would need to start over so best to + # abort if this happens - this is a very unlikely case + missing_keys_in_a = keys_b - keys_a + if missing_keys_in_a: + raise ValueError( + "Could not update missing keys - first item already missing {}".format( + missing_keys_in_a + ) + ) + return mydict + + def append_data_to_dict(mydict: dict) -> dict: """Append entries from nested dictionaries to keys in a global dict."""