From 15615cdd615faca15742c213cb0bde6a353aed5f Mon Sep 17 00:00:00 2001 From: vstevensf Date: Wed, 28 Sep 2022 19:15:21 -0400 Subject: [PATCH 01/38] first draft of mwfh base magma code --- magma/metawfl_handler.py | 131 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 magma/metawfl_handler.py diff --git a/magma/metawfl_handler.py b/magma/metawfl_handler.py new file mode 100644 index 0000000..55e1cee --- /dev/null +++ b/magma/metawfl_handler.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 + +################################################ +# Libraries +################################################ +import copy +from metawfl import MetaWorkflow + +################################################ +# MetaWorkflowStep +################################################ +#TODO: should i put this within metaworkflowhandler class? for ease of imports is the only +# reason I can think of as reasoning for doing so + +class MetaWorkflowStep(object): + """ + Class to represent a MetaWorkflow object, + as a step within a MetaWorkflow Handler object + """ + + def __init__(self, input_dict): + """ + Constructor method, initialize object and attributes. + + :param input_dict: a MetaWorkflow step and accompanying info within handler, defined by json file + :type input_dict: dict + """ + ### Basic (non-calculated) attributes ### + for key in input_dict: + setattr(self, key, input_dict[key]) + + # Validate presence of basic attributes of this MetaWorkflow step + self._validate_basic_attributes() + + #TODO: import and call magma mwf to initialize the mwf within the handler + # THEN check the dependencies + # also need to fill in the names for the mwfs + + ### Calculated attributes ### + # Nodes set, for building graph structure based on dependencies + self._nodes = set() #step_objects for steps that depend on current step + + # Names (strings) of MetaWorkflow steps that this MetaWorkflow is dependent on + if getattr(self, 'dependencies', None): # set None for [default] arg to not throw AttributeError + self.dependencies = set(self.dependencies) # convert to set to not have duplicates + else: + self.dependencies = set() + + #TODO: case where a metaworkflow is repeated downstream? does this ever happen? + + + def _validate_basic_attributes(self): + """ + Validation of the JSON input for the MetaWorkflow step + Checks that necessary MetaWorkflow attributes are present for this MetaWorkflow step + """ + try: + getattr(self, "meta_workflow") #str, must not be unique TODO: check this + getattr(self, "name") #str, must be unique TODO: name filling in ff + getattr(self, "duplication_flag") #bool + except AttributeError as e: + raise ValueError("JSON validation error, {0}\n" + .format(e.args[0])) + + #TODO: is there another way to integrate this other than this weird nested try except + try: + getattr(self, "items_for_creation_property_trace") + except AttributeError: + try: + getattr(self, "items_for_creation_uuid") + except AttributeError as e: + raise ValueError("JSON validation error, {0}\n" + .format(e.args[0])) + + + +################################################ +# MetaWorkflowHandler +################################################ +class MetaWorkflowHandler(object): + """ + Class representing a MetaWorkflow Handler object, + a list of MetaWorkflows with specified dependencies + """ + + def __init__(self, input_dict): + """ + Constructor method, initialize object and attributes. + + :param input_dict: MetaWorkflow Handler object dictionary, defined by json file + :type input_dict: dict + """ + + ### Basic attributes ### + # Required: project, institution + # Identifying: uuid, aliases, accession + # Commonly present: title, name, description, meta_workflows (list) + # see cgap_portal meta_workflow_handler schema for more info + for key in input_dict: + setattr(self, key, input_dict[key]) + + # Validate presence of basic attributes of this MetaWorkflow Handler + self._validate_basic_attributes() + + # check for meta_workflows attribute, set empty if it is not present + if getattr(self, 'meta_workflows', None): # set None for [default] arg to not throw AttributeError + self.meta_workflows = self.meta_workflows # convert to set to not have duplicates + else: + self.meta_workflows = [] + +# # Calculated attributes +# self.steps = {} #{step_obj.name: step_obj, ...} +# self._end_workflows = None + +# # Calculate attributes +# self._validate_basic_attributes() +# self._read_steps() + + + def _validate_basic_attributes(self): + """ + """ + try: + getattr(self, 'uuid') #str, must be unique + getattr(self, 'input') #list + # getattr(self, 'meta_workflows') #list -- TODO: what if it's empty? -- took care of that in init + #TODO: check project and institution? I think the schema takes care of that + except AttributeError as e: + raise ValueError('JSON validation error, {0}\n' + .format(e.args[0])) + From 165a612929426c83999ff5d3311044e6d5270e2f Mon Sep 17 00:00:00 2001 From: vstevensf Date: Mon, 24 Oct 2022 13:10:41 -0400 Subject: [PATCH 02/38] Further editing of baseline MWF handler, with added functions for calculated attributes --- magma/metawfl_handler.py | 105 +++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 55 deletions(-) diff --git a/magma/metawfl_handler.py b/magma/metawfl_handler.py index 55e1cee..14b6560 100644 --- a/magma/metawfl_handler.py +++ b/magma/metawfl_handler.py @@ -3,14 +3,16 @@ ################################################ # Libraries ################################################ -import copy -from metawfl import MetaWorkflow +# from magma import metawfl #TODO: do this in FF + +################################################ +# TODO: functions for dcic utils -- move later +################################################ +from magma.utils import check_presence_of_attributes, set_list_attributes, generate_ordered_steps_list ################################################ # MetaWorkflowStep ################################################ -#TODO: should i put this within metaworkflowhandler class? for ease of imports is the only -# reason I can think of as reasoning for doing so class MetaWorkflowStep(object): """ @@ -22,7 +24,7 @@ def __init__(self, input_dict): """ Constructor method, initialize object and attributes. - :param input_dict: a MetaWorkflow step and accompanying info within handler, defined by json file + :param input_dict: a MetaWorkflow step (object) and accompanying info within handler, defined by json file :type input_dict: dict """ ### Basic (non-calculated) attributes ### @@ -32,45 +34,38 @@ def __init__(self, input_dict): # Validate presence of basic attributes of this MetaWorkflow step self._validate_basic_attributes() + # Get rid of dependency duplicates -- TODO: already done within mwf? + + # Initialize Metaworkflow (magma (ff?)) using embedded call to metaworkflow LinkTo + #TODO: do in magma ff? + # metaworkflow_linkto = getattr(self, "meta_workflow") #TODO: embedding API -- how to test?? + + #TODO: import and call magma mwf to initialize the mwf within the handler # THEN check the dependencies # also need to fill in the names for the mwfs - ### Calculated attributes ### - # Nodes set, for building graph structure based on dependencies - self._nodes = set() #step_objects for steps that depend on current step - - # Names (strings) of MetaWorkflow steps that this MetaWorkflow is dependent on - if getattr(self, 'dependencies', None): # set None for [default] arg to not throw AttributeError - self.dependencies = set(self.dependencies) # convert to set to not have duplicates - else: - self.dependencies = set() + #TODO: check that names of metaworkflow steps are unique -- also + # use setdefault for filling in names (in ff? or here?) -- rather, check circularity #TODO: case where a metaworkflow is repeated downstream? does this ever happen? - def _validate_basic_attributes(self): + def _validate_basic_attributes(self): #TODO: create this as part of the utility function? """ Validation of the JSON input for the MetaWorkflow step + Checks that necessary MetaWorkflow attributes are present for this MetaWorkflow step """ + check_presence_of_attributes(self, ["meta_workflow", "name", "duplication_flag"]) + # str, must be unique TODO: name filling in ff try: - getattr(self, "meta_workflow") #str, must not be unique TODO: check this - getattr(self, "name") #str, must be unique TODO: name filling in ff - getattr(self, "duplication_flag") #bool + # set None for [default] arg to not throw AttributeError + if getattr(self, "items_for_creation_property_trace", True): + getattr(self, "items_for_creation_uuid") except AttributeError as e: raise ValueError("JSON validation error, {0}\n" - .format(e.args[0])) - - #TODO: is there another way to integrate this other than this weird nested try except - try: - getattr(self, "items_for_creation_property_trace") - except AttributeError: - try: - getattr(self, "items_for_creation_uuid") - except AttributeError as e: - raise ValueError("JSON validation error, {0}\n" - .format(e.args[0])) + .format(e.args[0])) @@ -87,45 +82,45 @@ def __init__(self, input_dict): """ Constructor method, initialize object and attributes. - :param input_dict: MetaWorkflow Handler object dictionary, defined by json file + :param input_dict: MetaWorkflow Handler object, defined by json file from portal :type input_dict: dict """ ### Basic attributes ### - # Required: project, institution - # Identifying: uuid, aliases, accession - # Commonly present: title, name, description, meta_workflows (list) - # see cgap_portal meta_workflow_handler schema for more info + for key in input_dict: setattr(self, key, input_dict[key]) # Validate presence of basic attributes of this MetaWorkflow Handler - self._validate_basic_attributes() + # - Required: project, institution TODO: taken care of in schema + # - Identifying: uuid, aliases, accession TODO: is this kinda the same as required? + # - Commonly present: title, name, description, meta_workflows (list) + # See cgap_portal meta_workflow_handler schema for more info. + check_presence_of_attributes(self, ["uuid"]) - # check for meta_workflows attribute, set empty if it is not present - if getattr(self, 'meta_workflows', None): # set None for [default] arg to not throw AttributeError - self.meta_workflows = self.meta_workflows # convert to set to not have duplicates - else: - self.meta_workflows = [] + ### Calculated attributes ### -# # Calculated attributes -# self.steps = {} #{step_obj.name: step_obj, ...} -# self._end_workflows = None + # to check for non-existent meta_workflows attribute + self._set_meta_workflows_list() -# # Calculate attributes -# self._validate_basic_attributes() -# self._read_steps() + # order the meta_workflows list based on dependencies + ordered_meta_workflows = generate_ordered_steps_list(self.meta_workflows, "name", "dependencies") + self.meta_workflows = ordered_meta_workflows + # create MetaWorkflow object for each metaworkflow step in meta_workflows + #TODO: do in magma-ff? because requires pulling metadata using UUID + #self.create_meta_workflow_steps() - def _validate_basic_attributes(self): + def _set_meta_workflows_list(self): """ + Checks for meta_workflows attribute, gets rid of duplicates, + else sets as empty list if not present + TODO: better to throw error if duplicates are present? """ - try: - getattr(self, 'uuid') #str, must be unique - getattr(self, 'input') #list - # getattr(self, 'meta_workflows') #list -- TODO: what if it's empty? -- took care of that in init - #TODO: check project and institution? I think the schema takes care of that - except AttributeError as e: - raise ValueError('JSON validation error, {0}\n' - .format(e.args[0])) + set_list_attributes(self, ["meta_workflows"]) + + # def create_meta_workflow_steps(self): #TODO: in magma ff? + # meta_workflows_list = getattr(self, "meta_workflows") # list + # for meta_workflow_step in meta_workflows_list: + #TODO: getting global input of first step ## getattr(self, 'input') # list From 469804d7b4e4b39849e7aba96656fc26942323ca Mon Sep 17 00:00:00 2001 From: vstevensf Date: Mon, 24 Oct 2022 13:13:12 -0400 Subject: [PATCH 03/38] Baseline Magma FF MWF Handler -- will be modifying the use of copy --- magma_ff/metawfl_handler.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 magma_ff/metawfl_handler.py diff --git a/magma_ff/metawfl_handler.py b/magma_ff/metawfl_handler.py new file mode 100644 index 0000000..03d333d --- /dev/null +++ b/magma_ff/metawfl_handler.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 + +################################################ +# Libraries +################################################ +import copy + +# magma +from magma.metawfl_handler import MetaWorkflowHandler as MetaWorkflowHandlerFromMagma +from magma_ff.parser import ParserFF + +################################################ +# MetaWorkflow Handler, Fourfront +################################################ +class MetaWorkflowHandler(MetaWorkflowHandlerFromMagma): + + def __init__(self, input_json): + """ + Constructor method, initialize object and attributes. + + :param input_json: MetaWorkflow Handler object defined by json file, from portal + :type input_json: dict + """ + input_json_ = copy.deepcopy(input_json) + + # To handle compatibility between portal and magma json formats + # TODO: necessary? + ParserFF(input_json_).arguments_to_json() + + super().__init__(input_json_) + + #TODO: name filling with property traces + #end def + +#end class \ No newline at end of file From 6cccc8e2097ce4da2ee71e4b2f7ac5f522bbe03a Mon Sep 17 00:00:00 2001 From: vstevensf Date: Mon, 24 Oct 2022 13:14:08 -0400 Subject: [PATCH 04/38] Creation of helper functions that may eventually be added to dcic utils. Some are specific to the structure of CGAP portal schemas, but tried to generalize as much as possible. Also includes partial draft of dependency validation via topological sort. --- magma/utils.py | 185 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 magma/utils.py diff --git a/magma/utils.py b/magma/utils.py new file mode 100644 index 0000000..769b49c --- /dev/null +++ b/magma/utils.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python3 + +################################################ +# Libraries +################################################ + +################################################ +# Functions +################################################ + +def check_variable_type(variable, intended_type): + """ + Checks that given variable is of the intended type. + Raises TypeError if not of the intended type. + If it matches, returns None. + + :param variable: variable to be checked + :type variable: depends?? (TODO: lol check how to define this) + :param intended_type: the variable type that is intended + :type intended_type: also....depends..(TODO:) + :raises TypeError: if variable is of incorrect/unintended type + """ + if not isinstance(variable, intended_type): + raise TypeError("Input must be of type {0}".format(str(intended_type))) + +def check_list_elements_type(list_to_check, intended_type): + """ + Checks that all elements in list are of a given type. + Raises Exception if not all elements are strings. + + :param list_to_check: list to be checked + :type list_to_check: list + :param intended_type: the variable type that is intended + :type intended_type: also....depends..(TODO:) + :raises TypeError: if list_to_check is of incorrect type (not a list) + :raises Exception: if not all list elements are of the intended type + TODO: should this exception also be a TypeError + """ + # Check that input is of type list + check_variable_type(list_to_check, list) + + # check that all elements in list are strings + if not all(isinstance(element, intended_type) for element in list_to_check): + raise Exception("All elements in list must be of type {0}".format(str(intended_type))) + +def check_presence_of_attributes(input_object, attributes_to_check=None): + """ + Takes in an object and a list of attributes, checks that those attributes are present + in this object + + :param input_object: object to check + :type input_object: object (dict) + :param attributes_to_check: list of attributes to check + :type attributes_to_check: list[str] + :raises ValueError: if object doesn't have a specified attribute + + TODO: should this have a return? right now it just raises errors or not + """ + if attributes_to_check is None: + return + + # Check that attributes_to_check is of type list + check_variable_type(attributes_to_check, list) + + # check that all attributes to be checked are strings + check_list_elements_type(attributes_to_check, str) + + for attribute in attributes_to_check: + try: + getattr(input_object, attribute) + except AttributeError as e: + raise ValueError("Object validation error, {0}\n" + .format(e.args[0])) + +def set_list_attributes(input_object, attributes_to_set=None): + """ + Checks for given attribute(s) of type list, sets as empty list if not present, + else sets that list attribute, without duplicates. + + :param input_object: object with attributes to be set + :type input_object: object (dict) + :param attributes_to_set: list of attributes to set + :type attributes_to_set: list[str] + """ + if attributes_to_set is None: + return + # check that all attributes to be checked are strings + check_list_elements_type(attributes_to_set, str) + + # especially if we are handling duplicates in reordering list function + for attribute in attributes_to_set: + if not hasattr(input_object, attribute): + # if not present, set attribute as empty list + setattr(input_object, attribute, []) + +def generate_ordered_steps_list(steps_with_dependencies_array, name_of_step_attribute, name_of_dependency_attribute): + """ + Takes in list of steps and reorders based on dependencies, returning reordered list. + If impossible to create ordered list (circular dependencies, missing steps, etc.), + returns None. TODO: check this -- it'll throw an exception/error, not return None + + :param steps_with_dependencies_array: list of objects, where each object has (at least) step and dependency attribute + #TODO: check the above line -- dependency may not be necessary + :type steps_with_dependencies_array: list of dicts/objects + :param name_of_step_attribute: name of the key corresponding to the step's name – + i.e. attribute referred to by dependency values + :type name_of_step_attribute: str + :param name_of_dependency_attribute: name of the key corresponding to the dependencies list + :type name_of_dependency_attribute: str + :return: the reordered list (if possible) + :rtype: list of dicts/objects + TODO: add errors and exceptions possibly thrown + """ + # check that name_of_step_attribute is a string + check_variable_type(name_of_step_attribute, str) + + # check that steps_with_dependencies_array is a list of objects/dicts + check_list_elements_type(steps_with_dependencies_array, object) + #TODO: does json "sub" object of type object or dict?? + # ^ also what are we actually working with here + # ^ because json.load turns it into a dict.... + # and the MWFH class has those subparts as type dict (from manual testing) + # solved: isinstance takes care of this anyway??...do we want that differentiation tho + + # check that all objects in steps_with_dependencies_array have name_of_step_attribute + if not all(hasattr(element, name_of_step_attribute) for element in steps_with_dependencies_array): + raise Exception("All elements in list must have attribute \"{0}\"".format(name_of_step_attribute)) + # TODO: make this a utility function -- also, should it raise exception or error + + # TODO: random, but we never make the check that the same key is used twice in dict + # by default, python takes the value as the lastmost definition of that key + # i don't think we need to worry about this + + # TODO: feel like this is overkill, but checking for duplicates in steps_with_dependencies_array? + # is there any case where the exact same step will be defined?? i think gets tricky with sharding maybe? idk + + ### List reordering based on dependencies ### + + ## Preprocessing of dependencies lists -- TODO: could make this its own function + # add dependencies attribute if not present, remove duplicates from dependencies, + # and check for self dependencies + for step in steps_with_dependencies_array: + # add empty dependency list if not present + # TODO: setting of dependency to []? so that it at least has a value + # VERSUS not having the dependency attribute at all -- I'm sticking w latter for now + if not getattr(step, name_of_dependency_attribute, None): #TODO: use helper function above instead + setattr(step, name_of_dependency_attribute, []) + + # get rid of duplicates -- TODO: make a helper function? + dependencies = getattr(step, name_of_dependency_attribute) + setattr(step, name_of_dependency_attribute, list(set(dependencies))) + + # check for self dependencies -- if present, throw exception (TODO: check this) + dependencies = getattr(step, name_of_dependency_attribute) # list of dependencies + name = getattr(step, name_of_step_attribute) + if name in dependencies: + raise Exception("Self dependency for step \"{0}\" not allowed".format(name)) + + ## Build directed graph by "reversing" dependencies (TODO: redo this comment and make own function) + + # make list of "name" values, whose indices correspond to indices of the objects in steps_with_dependencies_array + names = [] + for obj in steps_with_dependencies_array: + names.append(getattr(obj, name_of_step_attribute)) #TODO:alternatively, do in the above for loop + + for step in steps_with_dependencies_array: + dependencies = getattr(step, name_of_dependency_attribute) + + # go through each step this current step is dependent on + # and add "step_after" attribute + # (dependencies are "origin", "source", or "progenitor" steps) + for dependency in dependencies: + + + # if this dependency step doesn't have the step_after attribute, create it + if not getattr(dependency, "step_after", None): #TODO: rename this attribute, use helper fxn here + setattr(dependency, "step_after", []) + + + # TODO:edge cases: all steps have dependencies, no steps depending on each other, dependency on self + + # TODO: should I do resetting of list attribute (reordered) from original source object here or outside? + # (like, the pass by reference problem. look this up for python) + # check this in use of other helper functions too + # here, I have chosen to return the reordered array and redefine the metaworkflows list in main MWFH class From f62fabddde2d1e87f13c144f82f46ae8304429f5 Mon Sep 17 00:00:00 2001 From: vstevensf Date: Mon, 24 Oct 2022 13:15:53 -0400 Subject: [PATCH 05/38] Drafts of pytests for baseline Magma MWF Handler and helper functions. --- test/files/test_METAWFL_HANDLER.json | 30 ++++++++++++ test/files/tester.json | 10 ++++ test/test_metawfl_handler.py | 17 +++++++ test/test_utils_magma.py | 68 ++++++++++++++++++++++++++++ 4 files changed, 125 insertions(+) create mode 100644 test/files/test_METAWFL_HANDLER.json create mode 100644 test/files/tester.json create mode 100644 test/test_metawfl_handler.py create mode 100644 test/test_utils_magma.py diff --git a/test/files/test_METAWFL_HANDLER.json b/test/files/test_METAWFL_HANDLER.json new file mode 100644 index 0000000..a70a75a --- /dev/null +++ b/test/files/test_METAWFL_HANDLER.json @@ -0,0 +1,30 @@ +{ + "title": "Test MetaWorkflow Handler", + "name": "test_metawf_handler", + "version": "v1", + "description": "Test metaworkflow handler", + "project": "test_project", + "institution": "test_institution", + "uuid": "test_mwfh_uuid", + "aliases": ["cgap:test_metawf_handler"], + "meta_workflows": [ + { + "meta_workflow": "test_mwf_uuid_1", + "name": "test_mwf_1", + "items_for_creation_proerty_trace": [ + "sample_processing.samples" + ] + }, + { + "meta_workflow": "test_mwf_uuid_2", + "items_for_creation_uuid": [ + "test_sample_uuid_1", + "test_sample_uuid_2" + ], + "dependencies": [ + "test_mwf_1" + ], + "duplication_flag": true + } + ] +} \ No newline at end of file diff --git a/test/files/tester.json b/test/files/tester.json new file mode 100644 index 0000000..c478875 --- /dev/null +++ b/test/files/tester.json @@ -0,0 +1,10 @@ +{ + "title": "Test MetaWorkflow Handler", + "name": "test_metawf_handler", + "version": "v1", + "description": "Test metaworkflow handler", + "project": "test_project", + "institution": "test_institution", + "uuid": "test_mwfh_uuid", + "aliases": ["cgap:test_metawf_handler"] +} \ No newline at end of file diff --git a/test/test_metawfl_handler.py b/test/test_metawfl_handler.py new file mode 100644 index 0000000..13f23c9 --- /dev/null +++ b/test/test_metawfl_handler.py @@ -0,0 +1,17 @@ +################################################################# +# Libraries +################################################################# +import pytest +import json + +from magma import metawfl_handler as mwfh + +#TODO: how is the json object passed to magma? as list? or dict? + +with open('test/files/test_METAWFL_HANDLER.json') as json_file: + data = json.load(json_file) + +print(data) +print(type(data)) + +mwfh.MetaWorkflowHandler(data) \ No newline at end of file diff --git a/test/test_utils_magma.py b/test/test_utils_magma.py new file mode 100644 index 0000000..0d7a3db --- /dev/null +++ b/test/test_utils_magma.py @@ -0,0 +1,68 @@ +################################################################# +# Libraries +################################################################# +import pytest + +from magma.utils import check_list_is_all_strings, check_presence_of_attributes, set_list_attributes + +################################################################# +# Vars +################################################################# + +################################################################# +# Tests +################################################################# + +#TODO: replace all check_list_is_all_strings tests accordingly + +@pytest.mark.parametrize( + "list_to_check,expected", + [ + ([], None), + (["id"], None), + (["1", "test", "2"], None) + ], +) +def test_check_list_is_all_strings_no_errors(list_to_check, expected): + """ + Test for function checking if all elements of a list are strings, + no errors raised + """ + result = check_list_is_all_strings(list_to_check) + assert result == expected + +@pytest.mark.parametrize( + "list_to_check", + [ + ([["1", "2", "3", "4", "5"], ["6"]]), + ([["1", "2", "3", "4", "5"], "6"]), + ([None, "test"]), + ([1, "test"]) + ], +) +def test_check_list_is_all_strings_with_exceptions(list_to_check): + """ + Test for function checking if all elements of a list are strings, + Exception raised + """ + with pytest.raises(Exception) as excinfo: + check_list_is_all_strings(list_to_check) + assert str(excinfo.value) == "All elements in list must be strings" + +@pytest.mark.parametrize( + "list_to_check", + [ + (), + (None), + (1), + ("test"), + (set()) + ], +) +def test_check_list_is_all_strings_with_type_error(list_to_check): + """ + Test for function checking if all elements of a list are strings, + TypeError raised (incorrect argument type passed) + """ + with pytest.raises(TypeError): + check_list_is_all_strings(list_to_check) \ No newline at end of file From 4d50191ca394753dccb137490dc9bbfe3dab5d62 Mon Sep 17 00:00:00 2001 From: vstevensf Date: Mon, 24 Oct 2022 13:19:09 -0400 Subject: [PATCH 06/38] Remove extraneous files I use for local testing --- test/files/test_METAWFL_HANDLER.json | 30 ---------------------------- test/files/tester.json | 10 ---------- 2 files changed, 40 deletions(-) delete mode 100644 test/files/test_METAWFL_HANDLER.json delete mode 100644 test/files/tester.json diff --git a/test/files/test_METAWFL_HANDLER.json b/test/files/test_METAWFL_HANDLER.json deleted file mode 100644 index a70a75a..0000000 --- a/test/files/test_METAWFL_HANDLER.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "title": "Test MetaWorkflow Handler", - "name": "test_metawf_handler", - "version": "v1", - "description": "Test metaworkflow handler", - "project": "test_project", - "institution": "test_institution", - "uuid": "test_mwfh_uuid", - "aliases": ["cgap:test_metawf_handler"], - "meta_workflows": [ - { - "meta_workflow": "test_mwf_uuid_1", - "name": "test_mwf_1", - "items_for_creation_proerty_trace": [ - "sample_processing.samples" - ] - }, - { - "meta_workflow": "test_mwf_uuid_2", - "items_for_creation_uuid": [ - "test_sample_uuid_1", - "test_sample_uuid_2" - ], - "dependencies": [ - "test_mwf_1" - ], - "duplication_flag": true - } - ] -} \ No newline at end of file diff --git a/test/files/tester.json b/test/files/tester.json deleted file mode 100644 index c478875..0000000 --- a/test/files/tester.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "title": "Test MetaWorkflow Handler", - "name": "test_metawf_handler", - "version": "v1", - "description": "Test metaworkflow handler", - "project": "test_project", - "institution": "test_institution", - "uuid": "test_mwfh_uuid", - "aliases": ["cgap:test_metawf_handler"] -} \ No newline at end of file From 81c826d47db3b61ed496ee58d64ddfceb02d6c9e Mon Sep 17 00:00:00 2001 From: vstevensf Date: Wed, 26 Oct 2022 17:56:43 -0400 Subject: [PATCH 07/38] Added pytests for magma/utils.py --- test/test_utils_magma.py | 169 +++++++++++++++++++++++++++------------ 1 file changed, 118 insertions(+), 51 deletions(-) diff --git a/test/test_utils_magma.py b/test/test_utils_magma.py index 0d7a3db..b55899d 100644 --- a/test/test_utils_magma.py +++ b/test/test_utils_magma.py @@ -3,7 +3,7 @@ ################################################################# import pytest -from magma.utils import check_list_is_all_strings, check_presence_of_attributes, set_list_attributes +from magma.utils import check_variable_type, check_list_elements_type ################################################################# # Vars @@ -13,56 +13,123 @@ # Tests ################################################################# -#TODO: replace all check_list_is_all_strings tests accordingly +class TestCheckVariableType: + @pytest.mark.parametrize( + "variable, intended_type", + [ + (2, int), + (-2, int), + (float('inf'), float), + (complex(1, 1.0), complex), + (True, bool), + (False, bool), + (None, type(None)), + (None, object), + ('a', str), + ('a', object), + ("test", str), + ("test", object), + ((1, 2), tuple), + ((1, 2), object), + ([], list), + ([], object), + (set(), set), + (set(), object), + ([1, "test"], list), + ([1, "test"], object), + ({}, dict), + ({}, object), + ({"hi": 1}, dict), + ({"hi": 1}, object) + ] + ) + def test_check_list_elements_type_no_errors(self, variable, intended_type): + """ + Test for function checking if a variable is of a specified type, + no errors raised. + """ + result = check_variable_type(variable, intended_type) + assert result == None -@pytest.mark.parametrize( - "list_to_check,expected", - [ - ([], None), - (["id"], None), - (["1", "test", "2"], None) - ], -) -def test_check_list_is_all_strings_no_errors(list_to_check, expected): - """ - Test for function checking if all elements of a list are strings, - no errors raised - """ - result = check_list_is_all_strings(list_to_check) - assert result == expected + @pytest.mark.parametrize( + "variable, intended_type", + [ + (2, list), + (float('inf'), int), + (complex(1, 1.0), float), + (True, str), + (None, bool), + ('a', int), + ("test", list), + ((1, 2), set), + (set(), tuple), + ([1, "test"], dict), + ({"hi": 1}, list) + ] + ) + def test_check_variable_type_typeerror(self, variable, intended_type): + """ + Test for function checking if a variable is of a specified type, + TypeError raised. + """ + with pytest.raises(TypeError) as type_err_info: + check_variable_type(variable, intended_type) + assert str(type_err_info.value) == "Input must be of type {0}".format(str(intended_type)) -@pytest.mark.parametrize( - "list_to_check", - [ - ([["1", "2", "3", "4", "5"], ["6"]]), - ([["1", "2", "3", "4", "5"], "6"]), - ([None, "test"]), - ([1, "test"]) - ], -) -def test_check_list_is_all_strings_with_exceptions(list_to_check): - """ - Test for function checking if all elements of a list are strings, - Exception raised - """ - with pytest.raises(Exception) as excinfo: - check_list_is_all_strings(list_to_check) - assert str(excinfo.value) == "All elements in list must be strings" +class TestListElementsType: + @pytest.mark.parametrize( + "list_to_check, intended_type", + [ + ([], str), + ([], int), + ([], list), + ([], object), + (["id"], str), + (["1", "test", "2"], str), + ([1, 2, 3, 4], int), + ([[1], [2], ["test", "2"], []], list) + ] + ) + def test_check_list_elements_type_no_errors(self, list_to_check, intended_type): + """ + Test for function checking that all elements of a list are of a specified type, + no errors raised. + """ + result = check_list_elements_type(list_to_check, intended_type) + assert result == None -@pytest.mark.parametrize( - "list_to_check", - [ - (), - (None), - (1), - ("test"), - (set()) - ], -) -def test_check_list_is_all_strings_with_type_error(list_to_check): - """ - Test for function checking if all elements of a list are strings, - TypeError raised (incorrect argument type passed) - """ - with pytest.raises(TypeError): - check_list_is_all_strings(list_to_check) \ No newline at end of file + @pytest.mark.parametrize( + "list_to_check", + [ + ((["1", "2", "3", "4", "5"], ["6"])), + (None), + ("test") + ] + ) + def test_check_list_elements_type_listtocheck_not_list(self, list_to_check): + """ + Test for function checking if all elements of a list are strings, + TypeError raised (list_to_check not a list) + """ + with pytest.raises(TypeError) as type_err_info: + check_list_elements_type(list_to_check, str) + assert str(type_err_info.value) == "Input must be of type {0}".format(str(list)) + + + @pytest.mark.parametrize( + "list_to_check, intended_type", + [ + ([["1", "2", "3", "4", "5"], ["6"]], str), + ([["1", "2", "3", "4", "5"], "6"], list), + ([None, "test"], str), + ([1, "test"], int) + ] + ) + def test_check_list_elements_type_typeerror(self, list_to_check, intended_type): + """ + Test for function checking if all elements of a list are strings, + Exception raised + """ + with pytest.raises(TypeError) as type_err_info: + check_list_elements_type(list_to_check, intended_type) + assert str(type_err_info.value) == "All elements in list must be of type {0}".format(str(intended_type)) \ No newline at end of file From 32a576f24edfacca070cc3b019d66a8c3877335f Mon Sep 17 00:00:00 2001 From: vstevensf Date: Tue, 1 Nov 2022 07:27:37 -0400 Subject: [PATCH 08/38] Further edits to pytests of magma utils --- test/test_utils_magma.py | 387 +++++++++++++++++++++++++++++++-------- 1 file changed, 306 insertions(+), 81 deletions(-) diff --git a/test/test_utils_magma.py b/test/test_utils_magma.py index b55899d..f0f38e1 100644 --- a/test/test_utils_magma.py +++ b/test/test_utils_magma.py @@ -2,134 +2,359 @@ # Libraries ################################################################# import pytest +from copy import deepcopy -from magma.utils import check_variable_type, check_list_elements_type +from magma.utils import * +# from magma.metawfl_handler import MetaWorkflowStep, MetaWorkflowHandler ################################################################# # Vars ################################################################# +# dummy class for creating simple objects +class Tester: + """ + Class for creation of simple objects, based on an input dictionary + """ + def __init__(self, input_dict): + """ + Constructor method, initialize object and attributes. + + :param input_dict: dictionary defining the basic attributes of object to be created + :type input_dict: dict + """ + for key in input_dict: + setattr(self, key, input_dict[key]) + +# Stop pytest from collecting class Tester as test (prevent warning) +Tester.__test__ = False + +#TODO: is there a way to functionalize this? +# input dicts to create Tester objects +INPUT_DICT_SINGLE_SIMPLE_ATTR = {"test_0": 0} +INPUT_DICT_SEVERAL_SIMPLE_ATTRS = {"test_0": 0, "test_1": 1, "test_2": 2} +INPUT_DICT_SINGLE_EMPTY_LIST_ATTR = {"list_empty_0": []} +INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS = {"list_empty_0": [], "list_empty_1": [], "list_empty_2": []} +INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR = {"list_simple_0": [1, 2, 3]} +INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS = {"list_simple_0": [1, 2, 3], "list_simple_1": ["a", "b", "c"]} +INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR_W_DUP = {"list_simple_0": [1, 2, 3, 4, 3]} +INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP = {"list_simple_0": [1, "a", 3, 3], "list_simple_1": ["a", "b", "c"], "list_simple_2": ["c", 1, "c"]} + +# Tester objects +TESTER_OBJ_SINGLE_SIMPLE_ATTR = Tester(INPUT_DICT_SINGLE_SIMPLE_ATTR) +TESTER_OBJ_SEVERAL_SIMPLE_ATTRS = Tester(INPUT_DICT_SEVERAL_SIMPLE_ATTRS) +TESTER_OBJ_SINGLE_EMPTY_LIST_ATTR = Tester(INPUT_DICT_SINGLE_EMPTY_LIST_ATTR) +TESTER_OBJ_SEVERAL_EMPTY_LIST_ATTRS = Tester(INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS) +TESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR = Tester(INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR) +TESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS = Tester(INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS) +TESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR_W_DUP = Tester(INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR_W_DUP) +TESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP = Tester(INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP) + +# TITLE = "Test MetaWorkflow Handler" +# NAME = "test_metawf_handler" +# VERSION = "v1" +# DESCRIPTION = "Test metaworkflow handler" +# PROJECT = "test_project" +# INSTITUTION = "test_institution" +# MWFH_UUID_0 = "test_mwfh_uuid_0" +# ALIASES = ["cgap:test_metawf_handler"] +# MWF_UUID_0 = "test_mwf_uuid_0" +# MWF_UUID_1 = "test_mwf_uuid_1" +# MWF_UUID_2 = "test_mwf_uuid_2" +# MWF_UUID_3 = "test_mwf_uuid_3" +# MWF_UUID_4 = "test_mwf_uuid_4" +# MWF_UUID_5 = "test_mwf_uuid_5" +# MWF_NAME_A = "A" +# MWF_NAME_B = "B" +# MWF_NAME_C = "C" +# MWF_NAME_D = "D" +# MWF_NAME_E = "E" +# MWF_NAME_F = "F" + +# ITEMS_FOR_CREATION_PROPERTY_TRACE_0 = "sample_processing.samples" +# ITEMS_FOR_CREATION_UUID_0 = "test_sample_uuid_0" +# ITEMS_FOR_CREATION_UUID_1 = "test_sample_uuid_1" + + + +# SIMPLE_META_WORKFLOW_DICT_0 = { +# "meta_workflow": MWF_UUID_0, +# "name": MWF_NAME_A +# } +# SIMPLE_META_WORKFLOW_DICT_1 = { +# "meta_workflow": MWF_UUID_1, +# "name": MWF_NAME_B +# } +# SIMPLE_META_WORKFLOW_DICT_2 = { +# "meta_workflow": MWF_UUID_2, +# "name": MWF_NAME_C +# } + +# META_WORKFLOWS_ARRAY_SINGLE_ITEM = [SIMPLE_META_WORKFLOW_DICT_0] +# SINGLE_ITEM_META_WORKFLOWS_DICT = {"meta_workflows": META_WORKFLOWS_ARRAY_SINGLE_ITEM} + +# META_WORKFLOWS_ARRAY_SEVERAL_ITEMS = [SIMPLE_META_WORKFLOW_DICT_0, SIMPLE_META_WORKFLOW_DICT_1, SIMPLE_META_WORKFLOW_DICT_2] +# SEVERAL_ITEMS_META_WORKFLOWS_DICT = {"meta_workflows": META_WORKFLOWS_ARRAY_SEVERAL_ITEMS} + +# META_WORKFLOWS_ARRAY_SEVERAL_ITEMS_W_DUPLICATES = [SIMPLE_META_WORKFLOW_DICT_0, SIMPLE_META_WORKFLOW_DICT_2, SIMPLE_META_WORKFLOW_DICT_1, SIMPLE_META_WORKFLOW_DICT_2] +# SEVERAL_ITEMS_W_DUPLICATES_META_WORKFLOWS_DICT = {"meta_workflows": META_WORKFLOWS_ARRAY_SEVERAL_ITEMS_W_DUPLICATES} + +# EMPTY_META_WORKFLOWS_DICT = {"meta_workflows": []} + +# SIMPLE_MWFH_DICT = { +# "title": TITLE, +# "name": NAME, +# "version": VERSION, +# "description": DESCRIPTION, +# "project": PROJECT, +# "institution": INSTITUTION, +# "uuid": MWFH_UUID_0, +# "aliases": ALIASES +# } + +# SIMPLE_MWFH_DICT_WITH_EMPTY_META_WORKFLOWS_LIST = deepcopy(SIMPLE_MWFH_DICT) +# SIMPLE_MWFH_DICT_WITH_EMPTY_META_WORKFLOWS_LIST.update(EMPTY_META_WORKFLOWS_DICT) + +# SIMPLE_MWFH_DICT_WITH_SINGLE_ITEM_META_WORKFLOWS_LIST = deepcopy(SIMPLE_MWFH_DICT) +# SIMPLE_MWFH_DICT_WITH_SINGLE_ITEM_META_WORKFLOWS_LIST.update(SINGLE_ITEM_META_WORKFLOWS_DICT) + +# SIMPLE_MWFH_DICT_WITH_SEVERAL_ITEMS_META_WORKFLOWS_LIST = deepcopy(SIMPLE_MWFH_DICT) +# SIMPLE_MWFH_DICT_WITH_SEVERAL_ITEMS_META_WORKFLOWS_LIST.update(SEVERAL_ITEMS_META_WORKFLOWS_DICT) + +# SIMPLE_MWFH_DICT_WITH_SEVERAL_ITEMS_W_DUPLICATES_META_WORKFLOWS_LIST = deepcopy(SIMPLE_MWFH_DICT) +# SIMPLE_MWFH_DICT_WITH_SEVERAL_ITEMS_W_DUPLICATES_META_WORKFLOWS_LIST.update(SEVERAL_ITEMS_W_DUPLICATES_META_WORKFLOWS_DICT) + +# # import pdb; pdb.set_trace() +# SIMPLE_MWFH_OBJECT_WITH_EMPTY_META_WORKFLOWS_LIST = MetaWorkflowHandler(SIMPLE_MWFH_DICT_WITH_EMPTY_META_WORKFLOWS_LIST) +# SIMPLE_MWFH_OBJECT_WITH_SINGLE_ITEM_META_WORKFLOWS_LIST = MetaWorkflowHandler(SIMPLE_MWFH_DICT_WITH_SINGLE_ITEM_META_WORKFLOWS_LIST) +# SIMPLE_MWFH_OBJECT_WITH_SEVERAL_ITEMS_META_WORKFLOWS_LIST = MetaWorkflowHandler(SIMPLE_MWFH_DICT_WITH_SEVERAL_ITEMS_META_WORKFLOWS_LIST) +# SIMPLE_MWFH_OBJECT_WITH_SEVERAL_ITEMS_W_DUPLICATES_META_WORKFLOWS_LIST = MetaWorkflowHandler(SIMPLE_MWFH_DICT_WITH_SEVERAL_ITEMS_W_DUPLICATES_META_WORKFLOWS_LIST) + ################################################################# # Tests ################################################################# -class TestCheckVariableType: +@pytest.mark.parametrize( + "variable, intended_type, return_value", + [ + (2, int, True), + (-2, int, True), + (float('inf'), float, True), + (complex(1, 1.0), complex, True), + (True, bool, True), + (False, bool, True), + (None, type(None), True), + (None, object, True), + ('a', str, True), + ('a', object, True), + ("test", str, True), + ("test", object, True), + ((1, 2), tuple, True), + ((1, 2), object, True), + ([], list, True), + ([], object, True), + (set(), set, True), + (set(), object, True), + ([1, "test"], list, True), + ([1, "test"], object, True), + ({}, dict, True), + ({}, object, True), + ({"hi": 1}, dict, True), + ({"hi": 1}, object, True), + (2, list, False), + (float('inf'), int, False), + (complex(1, 1.0), float, False), + (True, str, False), + (None, bool, False), + ('a', int, False), + ("test", list, False), + ((1, 2), set, False), + (set(), tuple, False), + ([1, "test"], dict, False), + ({"hi": 1}, list, False) + ] +) +def test_check_list_elements_type(variable, intended_type, return_value): + """ + Test for function checking if a variable is of a specified type. + """ + result = check_variable_type(variable, intended_type) + assert result == return_value + + + +class TestCheckListElementsType: @pytest.mark.parametrize( - "variable, intended_type", + "list_to_check, intended_type, return_value", [ - (2, int), - (-2, int), - (float('inf'), float), - (complex(1, 1.0), complex), - (True, bool), - (False, bool), - (None, type(None)), - (None, object), - ('a', str), - ('a', object), - ("test", str), - ("test", object), - ((1, 2), tuple), - ((1, 2), object), - ([], list), - ([], object), - (set(), set), - (set(), object), - ([1, "test"], list), - ([1, "test"], object), - ({}, dict), - ({}, object), - ({"hi": 1}, dict), - ({"hi": 1}, object) + ([], str, True), + ([], int, True), + ([], list, True), + ([], object, True), + (["id"], str, True), + (["1", "test", "2"], str, True), + ([1, 2, 3, 4], int, True), + ([[1], [2], ["test", "2"], []], list, True), + ([["1", "2", "3", "4", "5"], ["6"]], str, False), + ([["1", "2", "3", "4", "5"], "6"], list, False), + ([None, "test"], str, False), + ([1, "test"], int, False) ] ) - def test_check_list_elements_type_no_errors(self, variable, intended_type): + def test_check_list_elements_type_no_errors(self, list_to_check, intended_type, return_value): """ - Test for function checking if a variable is of a specified type, + Test for function checking that all elements of a list are of a specified type, no errors raised. """ - result = check_variable_type(variable, intended_type) - assert result == None + result = check_list_elements_type(list_to_check, intended_type) + assert result == return_value + @pytest.mark.parametrize( - "variable, intended_type", + "list_to_check, intended_type", [ - (2, list), - (float('inf'), int), - (complex(1, 1.0), float), - (True, str), - (None, bool), - ('a', int), + (1, str), ("test", list), - ((1, 2), set), - (set(), tuple), - ([1, "test"], dict), - ({"hi": 1}, list) + (None, str) ] ) - def test_check_variable_type_typeerror(self, variable, intended_type): + def test_check_list_elements_type_typeerror(self, list_to_check, intended_type): """ - Test for function checking if a variable is of a specified type, - TypeError raised. + Test for function checking if all elements of a list are strings, + TypeError raised when list elements are not of the intended type. """ with pytest.raises(TypeError) as type_err_info: - check_variable_type(variable, intended_type) - assert str(type_err_info.value) == "Input must be of type {0}".format(str(intended_type)) + check_list_elements_type(list_to_check, intended_type) + assert str(type_err_info.value) == "list_to_check argument must be of type {0}".format(str(list)) -class TestListElementsType: +class TestCheckPresenceOfAttributes: @pytest.mark.parametrize( - "list_to_check, intended_type", + "input_object, attributes_to_check", [ - ([], str), - ([], int), - ([], list), - ([], object), - (["id"], str), - (["1", "test", "2"], str), - ([1, 2, 3, 4], int), - ([[1], [2], ["test", "2"], []], list) + (TESTER_OBJ_SINGLE_SIMPLE_ATTR, None), + (TESTER_OBJ_SINGLE_SIMPLE_ATTR, []), + (TESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_0"]), + (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1"]), + (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1", "test_2"]), + (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2"]), + (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2", "test_1"]) ] ) - def test_check_list_elements_type_no_errors(self, list_to_check, intended_type): + def test_check_presence_of_attributes_no_errors(self, input_object, attributes_to_check): """ - Test for function checking that all elements of a list are of a specified type, + Test for function checking that specified attributes are part of a given object, no errors raised. """ - result = check_list_elements_type(list_to_check, intended_type) + result = check_presence_of_attributes(input_object, attributes_to_check) assert result == None @pytest.mark.parametrize( - "list_to_check", + "input_object, attributes_to_check", [ - ((["1", "2", "3", "4", "5"], ["6"])), - (None), - ("test") + (TESTER_OBJ_SINGLE_SIMPLE_ATTR, 1), + (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, "incorrect_input_type"), + (TESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test", 4]) ] ) - def test_check_list_elements_type_listtocheck_not_list(self, list_to_check): + def test_check_presence_of_attributes_type_errors(self, input_object, attributes_to_check): """ - Test for function checking if all elements of a list are strings, - TypeError raised (list_to_check not a list) + Test for function checking that specified attributes are part of a given object, + TypeError raised because of incorrect argument type. """ - with pytest.raises(TypeError) as type_err_info: - check_list_elements_type(list_to_check, str) - assert str(type_err_info.value) == "Input must be of type {0}".format(str(list)) + with pytest.raises(TypeError): + check_presence_of_attributes(input_object, attributes_to_check) + @pytest.mark.parametrize( + "input_object, attributes_to_check", + [ + (TESTER_OBJ_SINGLE_SIMPLE_ATTR, ["not_present"]), + (TESTER_OBJ_SINGLE_SIMPLE_ATTR, ["not_present_0", "not_present_1"]), + (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1", "not_present"]), + (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "not_present", "test_1", "test_2"]), + (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "not_present_0", "test_2", "not_present_1"]), + (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["not_present", "test_0", "test_2", "test_1"]) + ] + ) + def test_check_presence_of_attributes_value_errors(self, input_object, attributes_to_check): + """ + Test for function checking that specified attributes are part of a given object, + ValueError raised. + """ + with pytest.raises(ValueError) as value_err_info: + check_presence_of_attributes(input_object, attributes_to_check) + assert "Object validation error" in str(value_err_info.value) +class TestSetListAttributes: @pytest.mark.parametrize( - "list_to_check, intended_type", + "input_object, attributes_to_set", [ - ([["1", "2", "3", "4", "5"], ["6"]], str), - ([["1", "2", "3", "4", "5"], "6"], list), - ([None, "test"], str), - ([1, "test"], int) + (TESTER_OBJ_SINGLE_SIMPLE_ATTR, None), + (TESTER_OBJ_SINGLE_SIMPLE_ATTR, []), + (TESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_0"]), + (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1"]), + (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1", "test_2"]), + (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2"]), + (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2", "test_1"]) ] ) - def test_check_list_elements_type_typeerror(self, list_to_check, intended_type): + def test_set_list_attributes_of_existing_nonlist_attributes(self, input_object, attributes_to_set): """ - Test for function checking if all elements of a list are strings, - Exception raised + Test for function that gets rid of duplicates within object attributes that are lists, + or sets attributes to empty list if not present within the object. + Cases where the attributes to set are existent and are NOT lists, no action done. """ - with pytest.raises(TypeError) as type_err_info: - check_list_elements_type(list_to_check, intended_type) - assert str(type_err_info.value) == "All elements in list must be of type {0}".format(str(intended_type)) \ No newline at end of file + original_object = deepcopy(input_object) + result = set_list_attributes(input_object, attributes_to_set) + assert result == None + assert vars(input_object) == vars(original_object) # no attributes changed + + @pytest.mark.parametrize( + "input_object, attributes_to_set", + [ + (TESTER_OBJ_SINGLE_SIMPLE_ATTR, 1), + (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, "incorrect_input_type"), + (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["hi", 2]) + ] + ) + def test_set_list_attributes_type_errors(self, input_object, attributes_to_set): + """ + Test for function setting list attributes, + TypeError raised because of incorrect argument type. + """ + with pytest.raises(TypeError): + set_list_attributes(input_object, attributes_to_set) + + @pytest.mark.parametrize( + "input_object, attributes_to_set, orig_lengths, reset_lengths", + [ + (TESTER_OBJ_SINGLE_EMPTY_LIST_ATTR, ["list_empty_0"], [0], [0]), + (TESTER_OBJ_SEVERAL_EMPTY_LIST_ATTRS, ["list_empty_0", "list_empty_1", "list_empty_2"], [0, 0, 0], [0, 0, 0]), + (TESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR, ["list_simple_0"], [3], [3]), + (TESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS, ["list_simple_0", "list_simple_1"], [3, 3], [3, 3]), + (TESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR_W_DUP, ["list_simple_0"], [5], [4]), + (TESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP, ["list_simple_0", "list_simple_2", "list_simple_1"], [4, 3, 3], [3, 2, 3]) + ] + ) + def test_set_list_attributes_of_existing_list_attributes(self, input_object, attributes_to_set, orig_lengths, reset_lengths): + """ + Test for function that gets rid of duplicates within object attributes that are lists, + or sets attributes to empty list if not present within the object. + Cases where the attributes to set are existent and are lists. + """ + # import pdb; pdb.set_trace() + # check original length of attributes_to_set + for ind, attribute in enumerate(attributes_to_set): + assert len(getattr(input_object, attribute)) == orig_lengths[ind] + + result = set_list_attributes(input_object, attributes_to_set) + + # check length of "reset" attributes_to_set + for idx, attribute in enumerate(attributes_to_set): + assert len(getattr(input_object, attribute)) == reset_lengths[idx] + + assert result == None + + +# # # then do non lists +# # # then do nonexistent +# # # then do mixed \ No newline at end of file From fa02b5bd62b588e8c87a7b255372b6302b541bee Mon Sep 17 00:00:00 2001 From: vstevensf Date: Thu, 3 Nov 2022 09:37:35 -0400 Subject: [PATCH 09/38] Finished topological sort, need to add docstrings and refactor helper fxns --- magma/metawfl_handler.py | 6 +- magma/utils.py | 342 ++++++++++++++++++++++++++++----------- 2 files changed, 250 insertions(+), 98 deletions(-) diff --git a/magma/metawfl_handler.py b/magma/metawfl_handler.py index 14b6560..b2d40ff 100644 --- a/magma/metawfl_handler.py +++ b/magma/metawfl_handler.py @@ -10,6 +10,9 @@ ################################################ from magma.utils import check_presence_of_attributes, set_list_attributes, generate_ordered_steps_list + +#TODO: make parent class maybe + ################################################ # MetaWorkflowStep ################################################ @@ -105,7 +108,8 @@ def __init__(self, input_dict): # order the meta_workflows list based on dependencies ordered_meta_workflows = generate_ordered_steps_list(self.meta_workflows, "name", "dependencies") - self.meta_workflows = ordered_meta_workflows + self.ordered_meta_workflows = ordered_meta_workflows + #TODO: should i make this a new calculated attribute, rather than redefining? YES # create MetaWorkflow object for each metaworkflow step in meta_workflows #TODO: do in magma-ff? because requires pulling metadata using UUID diff --git a/magma/utils.py b/magma/utils.py index 769b49c..e89db67 100644 --- a/magma/utils.py +++ b/magma/utils.py @@ -3,6 +3,7 @@ ################################################ # Libraries ################################################ +from copy import deepcopy ################################################ # Functions @@ -11,17 +12,18 @@ def check_variable_type(variable, intended_type): """ Checks that given variable is of the intended type. - Raises TypeError if not of the intended type. - If it matches, returns None. :param variable: variable to be checked - :type variable: depends?? (TODO: lol check how to define this) + :type variable: data type :param intended_type: the variable type that is intended - :type intended_type: also....depends..(TODO:) - :raises TypeError: if variable is of incorrect/unintended type + :type intended_type: data type + :return: True if the variable if of the intended_type, else False + :rtype: bool """ if not isinstance(variable, intended_type): - raise TypeError("Input must be of type {0}".format(str(intended_type))) + return False + else: + return True def check_list_elements_type(list_to_check, intended_type): """ @@ -31,17 +33,15 @@ def check_list_elements_type(list_to_check, intended_type): :param list_to_check: list to be checked :type list_to_check: list :param intended_type: the variable type that is intended - :type intended_type: also....depends..(TODO:) - :raises TypeError: if list_to_check is of incorrect type (not a list) - :raises Exception: if not all list elements are of the intended type - TODO: should this exception also be a TypeError + :type intended_type: data type + :return: True if all elements of list_to_check are of the intended_type, else False + :rtype: bool """ - # Check that input is of type list - check_variable_type(list_to_check, list) - # check that all elements in list are strings if not all(isinstance(element, intended_type) for element in list_to_check): - raise Exception("All elements in list must be of type {0}".format(str(intended_type))) + return False + else: + return True def check_presence_of_attributes(input_object, attributes_to_check=None): """ @@ -49,22 +49,18 @@ def check_presence_of_attributes(input_object, attributes_to_check=None): in this object :param input_object: object to check - :type input_object: object (dict) + :type input_object: object (instance of some class) :param attributes_to_check: list of attributes to check :type attributes_to_check: list[str] - :raises ValueError: if object doesn't have a specified attribute - - TODO: should this have a return? right now it just raises errors or not + :return: None, if there are no attributes to check + :return: None, if all specified attributes are present + :raises ValueError: if input_object doesn't have a specified attribute """ + #TODO: make the next three commands its own helper function? I repeat variations + # several times if attributes_to_check is None: return - # Check that attributes_to_check is of type list - check_variable_type(attributes_to_check, list) - - # check that all attributes to be checked are strings - check_list_elements_type(attributes_to_check, str) - for attribute in attributes_to_check: try: getattr(input_object, attribute) @@ -72,6 +68,29 @@ def check_presence_of_attributes(input_object, attributes_to_check=None): raise ValueError("Object validation error, {0}\n" .format(e.args[0])) +def check_presence_of_key(list_of_dicts, key_to_check=None): + """ + Takes in a list of dictionaries and a list of keys, checks that those keys + are present within every dict in this list/array. + + :param list_of_dicts: dictionaries to check + :type input_dict: list[dict] + :param key_to_check: key to check + :type key_to_check: str + :return: True, if the specified key is present in each dict, or there is no + key to check, else False + :rtype: bool + """ + #TODO: make the next three commands its own helper function? I repeat variations + # several times -- on this note, look up how to have flexible argument + if key_to_check is None: + return True + + if not all((key_to_check in dictionary) for dictionary in list_of_dicts): + return False + + return True + def set_list_attributes(input_object, attributes_to_set=None): """ Checks for given attribute(s) of type list, sets as empty list if not present, @@ -81,105 +100,234 @@ def set_list_attributes(input_object, attributes_to_set=None): :type input_object: object (dict) :param attributes_to_set: list of attributes to set :type attributes_to_set: list[str] + :return: None, if there are no attributes to set + :return: None, once entire function is completed with no errors """ if attributes_to_set is None: return - # check that all attributes to be checked are strings - check_list_elements_type(attributes_to_set, str) # especially if we are handling duplicates in reordering list function for attribute in attributes_to_set: if not hasattr(input_object, attribute): # if not present, set attribute as empty list setattr(input_object, attribute, []) + else: + attrib = getattr(input_object, attribute) + + # check if this attribute is a list + if check_variable_type(attrib, list): + # then get rid of duplicates, if present + non_dup_attrib = [] + for item in attrib: + if item not in non_dup_attrib: + non_dup_attrib.append(item) + setattr(input_object, attribute, non_dup_attrib) + else: + continue + +#TODO: could make this more general... +def set_dependency_list_values(list_of_dicts, name_of_step_key, name_of_dependencies_key): + """ + Checks for dependency key within each dictionary in list_of_dicts. + If not present, add that key and set value as empty list. + Else, remove duplicates and self-dependencies. + + :param list_of_dicts: list of dictionaries that should hold a dependency list. Each + dictionary corresponds to a step, and the list represents + a "list of steps" with dependencies between them. + :type list_of_dicts: list[dict] + :param name_of_step_key: name of the key corresponding to the step's name – + i.e. attribute referred to by dependency values + :type name_of_step_key: str + :param name_of_dependencies_key: name of the key corresponding to the dependencies list + :type name_of_dependencies_key: str + :return: a copy of list_of_dicts with appropriate dependency lists set + :rtype: list[dict] + :raises TypeError: if name_of_dependencies_key is not a string + """ -def generate_ordered_steps_list(steps_with_dependencies_array, name_of_step_attribute, name_of_dependency_attribute): + list_of_dicts_copy = deepcopy(list_of_dicts) + # iterate through list of dicts and set dependencies key-value pair + for dictionary in list_of_dicts_copy: + # add empty dependency list if not present + if not name_of_dependencies_key in dictionary: + dictionary[name_of_dependencies_key] = [] + + # get rid of duplicates + # I choose this method for generalization, in the case that dependencies is + # a list of dictionaries, which are an unhashable type + dependencies = dictionary[name_of_dependencies_key] + non_duplicated_dependencies = [] + for dependency in dependencies: + if dependency not in non_duplicated_dependencies: + non_duplicated_dependencies.append(dependency) + dictionary[name_of_dependencies_key] = non_duplicated_dependencies + #TODO: note -- im working under the assumption that because of the limitations + # of the schema, the dependencies will be of the correct type. Must I include + # a check that each dependency is in fact a name of another metaworkflow? + #(...probably. :/ ) + + # check for self-dependencies + new_dependencies = dictionary[name_of_dependencies_key] #repetitive, but just for readability + dictionary_name = dictionary[name_of_step_key] + # remove from this list + #TODO: should I throw exception instead? I think it's fine to just remove bc it's easy + new_dependencies = list(filter(lambda element: element != dictionary_name, new_dependencies)) + + return list_of_dicts_copy + +def generate_ordered_step_name_list(list_of_dicts, name_of_step_key): """ - Takes in list of steps and reorders based on dependencies, returning reordered list. - If impossible to create ordered list (circular dependencies, missing steps, etc.), - returns None. TODO: check this -- it'll throw an exception/error, not return None + Based on a list of dictionaries (representing a list of steps) with a "name" key + for each dictionary, return a list of the names of each dictionary with + indices corresponding to the indices of the dictionaries themselves (same order). + """ + names = [] + for dictionary in list_of_dicts: + names.append(dictionary[name_of_step_key]) + return names + #TODO: in test, check that it is always in the same order - :param steps_with_dependencies_array: list of objects, where each object has (at least) step and dependency attribute - #TODO: check the above line -- dependency may not be necessary - :type steps_with_dependencies_array: list of dicts/objects - :param name_of_step_attribute: name of the key corresponding to the step's name – - i.e. attribute referred to by dependency values - :type name_of_step_attribute: str - :param name_of_dependency_attribute: name of the key corresponding to the dependencies list - :type name_of_dependency_attribute: str - :return: the reordered list (if possible) - :rtype: list of dicts/objects - TODO: add errors and exceptions possibly thrown +def define_forward_dependencies(list_of_dicts, name_of_step_key, name_of_dependencies_key): + """ + Build directed graph by "reversing" dependencies TODO: redo comment """ - # check that name_of_step_attribute is a string - check_variable_type(name_of_step_attribute, str) + names = generate_ordered_step_name_list(list_of_dicts, name_of_step_key) + + for dictionary in list_of_dicts: + current_dependencies = dictionary[name_of_dependencies_key] + current_dict_name = dictionary[name_of_step_key] - # check that steps_with_dependencies_array is a list of objects/dicts - check_list_elements_type(steps_with_dependencies_array, object) - #TODO: does json "sub" object of type object or dict?? - # ^ also what are we actually working with here - # ^ because json.load turns it into a dict.... - # and the MWFH class has those subparts as type dict (from manual testing) - # solved: isinstance takes care of this anyway??...do we want that differentiation tho + # go through each step this current step is dependent on + # and add "step_after" attribute + # (dependencies are "origin", "source", or "progenitor" steps) + for dependency in current_dependencies: + # isolate the index of the dependency using names list + #TODO: this matches to the first occurence of dependency within the array + idx = names.index(dependency) + dependency_step_dict = list_of_dicts[idx] + + #TODO: consider helper fxn? but not necessary + #TODO: rename this attribute + if not ("steps_after" in dependency_step_dict): + dependency_step_dict["steps_after"] = [] + + dependency_step_dict["steps_after"].append(current_dict_name) + +def find_index_with_given_step_name(steps_with_dependencies_list, name_of_step_key, name): + for index, step in enumerate(steps_with_dependencies_list): + if step[name_of_step_key] == name: + return index, step + +def topological_sort_DFS_helper(graph, curr_node, curr_idx, name_of_node_key, name_of_dependencies_key, visited_temporary, visited_permanent, queue): + if visited_permanent[curr_idx]: + return queue + if visited_temporary[curr_idx]: + raise Exception("cycle in graph!: node " + curr_node[name_of_node_key]) - # check that all objects in steps_with_dependencies_array have name_of_step_attribute - if not all(hasattr(element, name_of_step_attribute) for element in steps_with_dependencies_array): - raise Exception("All elements in list must have attribute \"{0}\"".format(name_of_step_attribute)) - # TODO: make this a utility function -- also, should it raise exception or error + visited_temporary[curr_idx] = True - # TODO: random, but we never make the check that the same key is used twice in dict - # by default, python takes the value as the lastmost definition of that key - # i don't think we need to worry about this + for following_step in curr_node[name_of_dependencies_key]: + #TODO: can't have duplicates in names with this method! + idx_following_node, following_node = find_index_with_given_step_name(graph, name_of_node_key, following_step) + topological_sort_DFS_helper(graph, following_node, idx_following_node, name_of_node_key, name_of_dependencies_key, visited_temporary, visited_permanent, queue) - # TODO: feel like this is overkill, but checking for duplicates in steps_with_dependencies_array? - # is there any case where the exact same step will be defined?? i think gets tricky with sharding maybe? idk + visited_temporary[curr_idx] = False + visited_permanent[curr_idx] = True + queue.append(curr_node) + return queue + # TODO: for test, can check that there are no duplicates in returned queue - ### List reordering based on dependencies ### - ## Preprocessing of dependencies lists -- TODO: could make this its own function - # add dependencies attribute if not present, remove duplicates from dependencies, - # and check for self dependencies - for step in steps_with_dependencies_array: - # add empty dependency list if not present - # TODO: setting of dependency to []? so that it at least has a value - # VERSUS not having the dependency attribute at all -- I'm sticking w latter for now - if not getattr(step, name_of_dependency_attribute, None): #TODO: use helper function above instead - setattr(step, name_of_dependency_attribute, []) +def topological_sort(graph, name_of_node_key, name_of_dependencies_key): + """ + DFS algorithm from wikipedia https://en.wikipedia.org/wiki/Topological_sorting + Logic based on topological sort of directed graph from https://www.geeksforgeeks.org/topological-sorting/ + TODO: finish this docstring l8r + Time complexity = O(V+E), where V = # vertices/nodes (steps), E = # edges (in directed graph, dependencies) + https://www.geeksforgeeks.org/detect-cycle-in-directed-graph-using-topological-sort/?id=discuss = cycle detection : + So, in detail, just do a topological sort and get the queue of the results. Then as you pop from the final queue and + push to your result vector/array, check all the adjacent nodes of the last popped item and if the adjacent node + exists in the vector then it's a cycle (if A goes to B then B should not precede A in the topological ordering). + ASSUMPTOPN: no self loops (i deletd them) + + pseudocode from wikipedia: + L ← Empty list that will contain the sorted nodes + while exists nodes without a permanent mark do + select an unmarked node n + visit(n) + + function visit(node n) + if n has a permanent mark then + return + if n has a temporary mark then + stop (graph has at least one cycle) + + mark n with a temporary mark + + for each node m with an edge from n to m do + visit(m) + + remove temporary mark from n + mark n with a permanent mark + add n to head of L + """ + num_steps = len(graph) + visited_temporary = [False]*num_steps + visited_permanent = [False]*num_steps + queue = [] #First In First Out - # get rid of duplicates -- TODO: make a helper function? - dependencies = getattr(step, name_of_dependency_attribute) - setattr(step, name_of_dependency_attribute, list(set(dependencies))) + while not all((element == True) for element in visited_permanent): + curr_idx = visited_permanent.index(False) + curr_node = graph[curr_idx] + #calling recursive helper function + queue = topological_sort_DFS_helper(graph, curr_node, curr_idx, name_of_node_key, name_of_dependencies_key, visited_temporary, visited_permanent, queue) - # check for self dependencies -- if present, throw exception (TODO: check this) - dependencies = getattr(step, name_of_dependency_attribute) # list of dependencies - name = getattr(step, name_of_step_attribute) - if name in dependencies: - raise Exception("Self dependency for step \"{0}\" not allowed".format(name)) + # for element in queue: + # print(element["name"]) - ## Build directed graph by "reversing" dependencies (TODO: redo this comment and make own function) + return queue - # make list of "name" values, whose indices correspond to indices of the objects in steps_with_dependencies_array - names = [] - for obj in steps_with_dependencies_array: - names.append(getattr(obj, name_of_step_attribute)) #TODO:alternatively, do in the above for loop +def generate_ordered_steps_list(steps_with_dependencies_list, name_of_step_key, name_of_dependencies_key): + """ + Takes in list of steps and reorders based on dependencies, returning a separate copy of + a reordered list. + If impossible to create ordered list (circular dependencies, missing steps, etc.), + throws error or exception. - for step in steps_with_dependencies_array: - dependencies = getattr(step, name_of_dependency_attribute) - - # go through each step this current step is dependent on - # and add "step_after" attribute - # (dependencies are "origin", "source", or "progenitor" steps) - for dependency in dependencies: + :param steps_with_dependencies_list: list of dictionaries, where each dictionary has + at least a step name + :type steps_with_dependencies_list: list[dict] + :param name_of_step_key: name of the key corresponding to the step's name – + i.e. attribute referred to by dependency values + :type name_of_step_key: str + :param name_of_dependencies_key: name of the key corresponding to the dependencies list + :type name_of_dependencies_key: str + :return: a copy of the reordered list (if possible) + :rtype: list[dict] + TODO: add errors and exceptions possibly thrown + """ + + # check that all objects in steps_with_dependencies_list have name_of_step_key + if not check_presence_of_key(steps_with_dependencies_list, name_of_step_key): + raise Exception("All dictionary elements in steps_with_dependencies_list must have attribute \"{0}\"".format(name_of_step_key)) + # TODO: feel like this is overkill, but checking for duplicates in steps_with_dependencies_list? + # is there any case where the exact same step will be defined?? i think gets tricky with sharding maybe? idk + + ### List reordering based on dependencies ### - # if this dependency step doesn't have the step_after attribute, create it - if not getattr(dependency, "step_after", None): #TODO: rename this attribute, use helper fxn here - setattr(dependency, "step_after", []) + ## Preprocessing of dependencies lists + # add dependencies attribute if not present, remove duplicates from dependencies, + # and check for self dependencies + preprocessed_steps_with_dependencies_list = set_dependency_list_values(steps_with_dependencies_list, name_of_step_key, name_of_dependencies_key) + + ## Build directed graph by "reversing" dependencies (TODO: redo this comment and make own function) + define_forward_dependencies(preprocessed_steps_with_dependencies_list, name_of_step_key, name_of_dependencies_key) + ordered_steps_list = topological_sort(preprocessed_steps_with_dependencies_list, name_of_step_key, name_of_dependencies_key) + + return ordered_steps_list - # TODO:edge cases: all steps have dependencies, no steps depending on each other, dependency on self - # TODO: should I do resetting of list attribute (reordered) from original source object here or outside? - # (like, the pass by reference problem. look this up for python) - # check this in use of other helper functions too - # here, I have chosen to return the reordered array and redefine the metaworkflows list in main MWFH class + # TODO:edge cases: all steps have dependencies, no steps depending on each other, dependency on self, identical steps From 3e9348df048ce459834c46be2e2934d61ee45a13 Mon Sep 17 00:00:00 2001 From: vstevensf Date: Thu, 3 Nov 2022 09:38:24 -0400 Subject: [PATCH 10/38] Modified some tests, removed a few extraneous. Still need to finish tests for topological sort. --- test/test_utils_magma.py | 480 +++++++++++++++++++++++---------------- 1 file changed, 284 insertions(+), 196 deletions(-) diff --git a/test/test_utils_magma.py b/test/test_utils_magma.py index f0f38e1..fa2a426 100644 --- a/test/test_utils_magma.py +++ b/test/test_utils_magma.py @@ -5,6 +5,7 @@ from copy import deepcopy from magma.utils import * + # from magma.metawfl_handler import MetaWorkflowStep, MetaWorkflowHandler ################################################################# @@ -12,10 +13,11 @@ ################################################################# # dummy class for creating simple objects -class Tester: +class ClassTester: """ Class for creation of simple objects, based on an input dictionary """ + def __init__(self, input_dict): """ Constructor method, initialize object and attributes. @@ -26,127 +28,122 @@ def __init__(self, input_dict): for key in input_dict: setattr(self, key, input_dict[key]) -# Stop pytest from collecting class Tester as test (prevent warning) -Tester.__test__ = False - -#TODO: is there a way to functionalize this? -# input dicts to create Tester objects +# TODO: is there a way to functionalize this? +# input dicts to create ClassTester objects INPUT_DICT_SINGLE_SIMPLE_ATTR = {"test_0": 0} +INPUT_DICT_SINGLE_SIMPLE_ATTR_1 = {"test_1": 0} +INPUT_DICT_SINGLE_SIMPLE_ATTR_2 = {"test_2": 0} INPUT_DICT_SEVERAL_SIMPLE_ATTRS = {"test_0": 0, "test_1": 1, "test_2": 2} INPUT_DICT_SINGLE_EMPTY_LIST_ATTR = {"list_empty_0": []} -INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS = {"list_empty_0": [], "list_empty_1": [], "list_empty_2": []} +INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS = { + "list_empty_0": [], + "list_empty_1": [], + "list_empty_2": [], +} INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR = {"list_simple_0": [1, 2, 3]} -INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS = {"list_simple_0": [1, 2, 3], "list_simple_1": ["a", "b", "c"]} +INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS = { + "list_simple_0": [1, 2, 3], + "list_simple_1": ["a", "b", "c"], +} INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR_W_DUP = {"list_simple_0": [1, 2, 3, 4, 3]} -INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP = {"list_simple_0": [1, "a", 3, 3], "list_simple_1": ["a", "b", "c"], "list_simple_2": ["c", 1, "c"]} - -# Tester objects -TESTER_OBJ_SINGLE_SIMPLE_ATTR = Tester(INPUT_DICT_SINGLE_SIMPLE_ATTR) -TESTER_OBJ_SEVERAL_SIMPLE_ATTRS = Tester(INPUT_DICT_SEVERAL_SIMPLE_ATTRS) -TESTER_OBJ_SINGLE_EMPTY_LIST_ATTR = Tester(INPUT_DICT_SINGLE_EMPTY_LIST_ATTR) -TESTER_OBJ_SEVERAL_EMPTY_LIST_ATTRS = Tester(INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS) -TESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR = Tester(INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR) -TESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS = Tester(INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS) -TESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR_W_DUP = Tester(INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR_W_DUP) -TESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP = Tester(INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP) - -# TITLE = "Test MetaWorkflow Handler" -# NAME = "test_metawf_handler" -# VERSION = "v1" -# DESCRIPTION = "Test metaworkflow handler" -# PROJECT = "test_project" -# INSTITUTION = "test_institution" -# MWFH_UUID_0 = "test_mwfh_uuid_0" -# ALIASES = ["cgap:test_metawf_handler"] -# MWF_UUID_0 = "test_mwf_uuid_0" -# MWF_UUID_1 = "test_mwf_uuid_1" -# MWF_UUID_2 = "test_mwf_uuid_2" -# MWF_UUID_3 = "test_mwf_uuid_3" -# MWF_UUID_4 = "test_mwf_uuid_4" -# MWF_UUID_5 = "test_mwf_uuid_5" -# MWF_NAME_A = "A" -# MWF_NAME_B = "B" -# MWF_NAME_C = "C" -# MWF_NAME_D = "D" -# MWF_NAME_E = "E" -# MWF_NAME_F = "F" - -# ITEMS_FOR_CREATION_PROPERTY_TRACE_0 = "sample_processing.samples" -# ITEMS_FOR_CREATION_UUID_0 = "test_sample_uuid_0" -# ITEMS_FOR_CREATION_UUID_1 = "test_sample_uuid_1" - - - -# SIMPLE_META_WORKFLOW_DICT_0 = { -# "meta_workflow": MWF_UUID_0, -# "name": MWF_NAME_A -# } -# SIMPLE_META_WORKFLOW_DICT_1 = { -# "meta_workflow": MWF_UUID_1, -# "name": MWF_NAME_B -# } -# SIMPLE_META_WORKFLOW_DICT_2 = { -# "meta_workflow": MWF_UUID_2, -# "name": MWF_NAME_C -# } - -# META_WORKFLOWS_ARRAY_SINGLE_ITEM = [SIMPLE_META_WORKFLOW_DICT_0] -# SINGLE_ITEM_META_WORKFLOWS_DICT = {"meta_workflows": META_WORKFLOWS_ARRAY_SINGLE_ITEM} - -# META_WORKFLOWS_ARRAY_SEVERAL_ITEMS = [SIMPLE_META_WORKFLOW_DICT_0, SIMPLE_META_WORKFLOW_DICT_1, SIMPLE_META_WORKFLOW_DICT_2] -# SEVERAL_ITEMS_META_WORKFLOWS_DICT = {"meta_workflows": META_WORKFLOWS_ARRAY_SEVERAL_ITEMS} - -# META_WORKFLOWS_ARRAY_SEVERAL_ITEMS_W_DUPLICATES = [SIMPLE_META_WORKFLOW_DICT_0, SIMPLE_META_WORKFLOW_DICT_2, SIMPLE_META_WORKFLOW_DICT_1, SIMPLE_META_WORKFLOW_DICT_2] -# SEVERAL_ITEMS_W_DUPLICATES_META_WORKFLOWS_DICT = {"meta_workflows": META_WORKFLOWS_ARRAY_SEVERAL_ITEMS_W_DUPLICATES} - -# EMPTY_META_WORKFLOWS_DICT = {"meta_workflows": []} - -# SIMPLE_MWFH_DICT = { -# "title": TITLE, -# "name": NAME, -# "version": VERSION, -# "description": DESCRIPTION, -# "project": PROJECT, -# "institution": INSTITUTION, -# "uuid": MWFH_UUID_0, -# "aliases": ALIASES -# } - -# SIMPLE_MWFH_DICT_WITH_EMPTY_META_WORKFLOWS_LIST = deepcopy(SIMPLE_MWFH_DICT) -# SIMPLE_MWFH_DICT_WITH_EMPTY_META_WORKFLOWS_LIST.update(EMPTY_META_WORKFLOWS_DICT) - -# SIMPLE_MWFH_DICT_WITH_SINGLE_ITEM_META_WORKFLOWS_LIST = deepcopy(SIMPLE_MWFH_DICT) -# SIMPLE_MWFH_DICT_WITH_SINGLE_ITEM_META_WORKFLOWS_LIST.update(SINGLE_ITEM_META_WORKFLOWS_DICT) - -# SIMPLE_MWFH_DICT_WITH_SEVERAL_ITEMS_META_WORKFLOWS_LIST = deepcopy(SIMPLE_MWFH_DICT) -# SIMPLE_MWFH_DICT_WITH_SEVERAL_ITEMS_META_WORKFLOWS_LIST.update(SEVERAL_ITEMS_META_WORKFLOWS_DICT) - -# SIMPLE_MWFH_DICT_WITH_SEVERAL_ITEMS_W_DUPLICATES_META_WORKFLOWS_LIST = deepcopy(SIMPLE_MWFH_DICT) -# SIMPLE_MWFH_DICT_WITH_SEVERAL_ITEMS_W_DUPLICATES_META_WORKFLOWS_LIST.update(SEVERAL_ITEMS_W_DUPLICATES_META_WORKFLOWS_DICT) - -# # import pdb; pdb.set_trace() -# SIMPLE_MWFH_OBJECT_WITH_EMPTY_META_WORKFLOWS_LIST = MetaWorkflowHandler(SIMPLE_MWFH_DICT_WITH_EMPTY_META_WORKFLOWS_LIST) -# SIMPLE_MWFH_OBJECT_WITH_SINGLE_ITEM_META_WORKFLOWS_LIST = MetaWorkflowHandler(SIMPLE_MWFH_DICT_WITH_SINGLE_ITEM_META_WORKFLOWS_LIST) -# SIMPLE_MWFH_OBJECT_WITH_SEVERAL_ITEMS_META_WORKFLOWS_LIST = MetaWorkflowHandler(SIMPLE_MWFH_DICT_WITH_SEVERAL_ITEMS_META_WORKFLOWS_LIST) -# SIMPLE_MWFH_OBJECT_WITH_SEVERAL_ITEMS_W_DUPLICATES_META_WORKFLOWS_LIST = MetaWorkflowHandler(SIMPLE_MWFH_DICT_WITH_SEVERAL_ITEMS_W_DUPLICATES_META_WORKFLOWS_LIST) +INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP = { + "list_simple_0": [1, "a", 3, 3], + "list_simple_1": ["a", "b", "c"], + "list_simple_2": ["c", 1, "c"], +} +INPUT_DICT_SINGLE_LIST_OF_DICTS = { + "list_of_dicts": [ + INPUT_DICT_SINGLE_SIMPLE_ATTR, + INPUT_DICT_SINGLE_SIMPLE_ATTR_1, + INPUT_DICT_SINGLE_SIMPLE_ATTR_2, + ] +} +INPUT_DICT_SEVERAL_LISTS_OF_DICTS = { + "list_of_dicts_0": [ + INPUT_DICT_SINGLE_SIMPLE_ATTR, + INPUT_DICT_SINGLE_SIMPLE_ATTR_1, + INPUT_DICT_SINGLE_SIMPLE_ATTR_2, + ], + "list_of_dicts_1": [ + INPUT_DICT_SINGLE_SIMPLE_ATTR, + INPUT_DICT_SINGLE_SIMPLE_ATTR_2, + INPUT_DICT_SINGLE_SIMPLE_ATTR_1, + ], +} +INPUT_DICT_SINGLE_LIST_OF_DICTS_W_DUP = { + "list_of_dicts": [ + INPUT_DICT_SINGLE_SIMPLE_ATTR, + INPUT_DICT_SINGLE_SIMPLE_ATTR_1, + INPUT_DICT_SINGLE_SIMPLE_ATTR, + INPUT_DICT_SINGLE_SIMPLE_ATTR_2, + INPUT_DICT_SINGLE_SIMPLE_ATTR, + INPUT_DICT_SINGLE_SIMPLE_ATTR, + ] +} +INPUT_DICT_SEVERAL_LISTS_OF_DICTS_W_DUP = { + "list_of_dicts_0": [ + INPUT_DICT_SINGLE_SIMPLE_ATTR, + INPUT_DICT_SINGLE_SIMPLE_ATTR_1, + INPUT_DICT_SINGLE_SIMPLE_ATTR_2, + ], + "list_of_dicts_1": [ + INPUT_DICT_SINGLE_SIMPLE_ATTR, + INPUT_DICT_SINGLE_SIMPLE_ATTR_2, + INPUT_DICT_SINGLE_SIMPLE_ATTR_1, + ], + "list_of_dicts_2": [ + INPUT_DICT_SINGLE_SIMPLE_ATTR, + INPUT_DICT_SINGLE_SIMPLE_ATTR, + INPUT_DICT_SINGLE_SIMPLE_ATTR, + INPUT_DICT_SINGLE_SIMPLE_ATTR_1, + INPUT_DICT_SINGLE_SIMPLE_ATTR, + ], + "list_of_dicts_3": [ + INPUT_DICT_SINGLE_SIMPLE_ATTR_2, + INPUT_DICT_SINGLE_SIMPLE_ATTR_2, + INPUT_DICT_SINGLE_SIMPLE_ATTR_2, + ], +} + + +# ClassTester objects +ClassTESTER_OBJ_SINGLE_SIMPLE_ATTR = ClassTester(INPUT_DICT_SINGLE_SIMPLE_ATTR) +ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS = ClassTester(INPUT_DICT_SEVERAL_SIMPLE_ATTRS) +ClassTESTER_OBJ_SINGLE_EMPTY_LIST_ATTR = ClassTester(INPUT_DICT_SINGLE_EMPTY_LIST_ATTR) +ClassTESTER_OBJ_SEVERAL_EMPTY_LIST_ATTRS = ClassTester(INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS) +ClassTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR = ClassTester(INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR) +ClassTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS = ClassTester(INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS) +ClassTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR_W_DUP = ClassTester( + INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR_W_DUP +) +ClassTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP = ClassTester( + INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP +) +ClassTESTER_OBJ_SINGLE_LIST_OF_DICTS = ClassTester(INPUT_DICT_SINGLE_LIST_OF_DICTS) +ClassTESTER_OBJ_SEVERAL_LISTS_OF_DICTS = ClassTester(INPUT_DICT_SEVERAL_LISTS_OF_DICTS) +ClassTESTER_OBJ_SINGLE_LIST_OF_DICTS_W_DUP = ClassTester(INPUT_DICT_SINGLE_LIST_OF_DICTS_W_DUP) +ClassTESTER_OBJ_SEVERAL_LISTS_OF_DICTS_W_DUP = ClassTester( + INPUT_DICT_SEVERAL_LISTS_OF_DICTS_W_DUP +) ################################################################# # Tests ################################################################# + @pytest.mark.parametrize( "variable, intended_type, return_value", [ (2, int, True), (-2, int, True), - (float('inf'), float, True), + (float("inf"), float, True), (complex(1, 1.0), complex, True), (True, bool, True), (False, bool, True), (None, type(None), True), (None, object, True), - ('a', str, True), - ('a', object, True), + ("a", str, True), + ("a", object, True), ("test", str, True), ("test", object, True), ((1, 2), tuple, True), @@ -162,17 +159,17 @@ def __init__(self, input_dict): ({"hi": 1}, dict, True), ({"hi": 1}, object, True), (2, list, False), - (float('inf'), int, False), + (float("inf"), int, False), (complex(1, 1.0), float, False), (True, str, False), (None, bool, False), - ('a', int, False), + ("a", int, False), ("test", list, False), ((1, 2), set, False), (set(), tuple, False), ([1, "test"], dict, False), - ({"hi": 1}, list, False) - ] + ({"hi": 1}, list, False), + ], ) def test_check_list_elements_type(variable, intended_type, return_value): """ @@ -181,7 +178,6 @@ def test_check_list_elements_type(variable, intended_type, return_value): result = check_variable_type(variable, intended_type) assert result == return_value - class TestCheckListElementsType: @pytest.mark.parametrize( @@ -198,10 +194,12 @@ class TestCheckListElementsType: ([["1", "2", "3", "4", "5"], ["6"]], str, False), ([["1", "2", "3", "4", "5"], "6"], list, False), ([None, "test"], str, False), - ([1, "test"], int, False) - ] + ([1, "test"], int, False), + ], ) - def test_check_list_elements_type_no_errors(self, list_to_check, intended_type, return_value): + def test_check_list_elements_type_no_errors( + self, list_to_check, intended_type, return_value + ): """ Test for function checking that all elements of a list are of a specified type, no errors raised. @@ -210,37 +208,22 @@ def test_check_list_elements_type_no_errors(self, list_to_check, intended_type, assert result == return_value - @pytest.mark.parametrize( - "list_to_check, intended_type", - [ - (1, str), - ("test", list), - (None, str) - ] - ) - def test_check_list_elements_type_typeerror(self, list_to_check, intended_type): - """ - Test for function checking if all elements of a list are strings, - TypeError raised when list elements are not of the intended type. - """ - with pytest.raises(TypeError) as type_err_info: - check_list_elements_type(list_to_check, intended_type) - assert str(type_err_info.value) == "list_to_check argument must be of type {0}".format(str(list)) - class TestCheckPresenceOfAttributes: @pytest.mark.parametrize( "input_object, attributes_to_check", [ - (TESTER_OBJ_SINGLE_SIMPLE_ATTR, None), - (TESTER_OBJ_SINGLE_SIMPLE_ATTR, []), - (TESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_0"]), - (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1"]), - (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1", "test_2"]), - (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2"]), - (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2", "test_1"]) - ] + (ClassTESTER_OBJ_SINGLE_SIMPLE_ATTR, None), + (ClassTESTER_OBJ_SINGLE_SIMPLE_ATTR, []), + (ClassTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_0"]), + (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1"]), + (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1", "test_2"]), + (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2"]), + (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2", "test_1"]), + ], ) - def test_check_presence_of_attributes_no_errors(self, input_object, attributes_to_check): + def test_check_presence_of_attributes_no_errors( + self, input_object, attributes_to_check + ): """ Test for function checking that specified attributes are part of a given object, no errors raised. @@ -251,53 +234,103 @@ def test_check_presence_of_attributes_no_errors(self, input_object, attributes_t @pytest.mark.parametrize( "input_object, attributes_to_check", [ - (TESTER_OBJ_SINGLE_SIMPLE_ATTR, 1), - (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, "incorrect_input_type"), - (TESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test", 4]) - ] + (ClassTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["not_present"]), + (ClassTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["not_present_0", "not_present_1"]), + (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1", "not_present"]), + ( + ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, + ["test_0", "not_present", "test_1", "test_2"], + ), + ( + ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, + ["test_0", "not_present_0", "test_2", "not_present_1"], + ), + ( + ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, + ["not_present", "test_0", "test_2", "test_1"], + ), + ], ) - def test_check_presence_of_attributes_type_errors(self, input_object, attributes_to_check): + def test_check_presence_of_attributes_value_errors( + self, input_object, attributes_to_check + ): """ Test for function checking that specified attributes are part of a given object, - TypeError raised because of incorrect argument type. + ValueError raised. """ - with pytest.raises(TypeError): + with pytest.raises(ValueError) as value_err_info: check_presence_of_attributes(input_object, attributes_to_check) + assert "Object validation error" in str(value_err_info.value) + +class TestCheckPresenceOfKey: @pytest.mark.parametrize( - "input_object, attributes_to_check", + "list_of_dicts, key_to_check, return_value", [ - (TESTER_OBJ_SINGLE_SIMPLE_ATTR, ["not_present"]), - (TESTER_OBJ_SINGLE_SIMPLE_ATTR, ["not_present_0", "not_present_1"]), - (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1", "not_present"]), - (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "not_present", "test_1", "test_2"]), - (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "not_present_0", "test_2", "not_present_1"]), - (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["not_present", "test_0", "test_2", "test_1"]) - ] + ([], None, True), + ([], "key", True), # kind of weird edge case, but not a biggie (TODO:) + ( + [ + INPUT_DICT_SINGLE_EMPTY_LIST_ATTR, + INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS, + ], + "list_empty_0", + True, + ), + ( + [ + INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR, + INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS, + INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS, + INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR_W_DUP, + INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP, + ], + "list_simple_0", + True, + ), + ( + [INPUT_DICT_SINGLE_LIST_OF_DICTS], + "list_of_dicts_0", + False, + ), + ( + [ + INPUT_DICT_SINGLE_LIST_OF_DICTS, + INPUT_DICT_SEVERAL_LISTS_OF_DICTS, + INPUT_DICT_SINGLE_LIST_OF_DICTS_W_DUP, + ], + "list_of_dicts", + False, + ), + ], ) - def test_check_presence_of_attributes_value_errors(self, input_object, attributes_to_check): + def test_check_presence_of_key_no_errors( + self, list_of_dicts, key_to_check, return_value + ): """ - Test for function checking that specified attributes are part of a given object, - ValueError raised. + Test for function checking that all dictionaries in a given list have the + specified key, no errors raised. """ - with pytest.raises(ValueError) as value_err_info: - check_presence_of_attributes(input_object, attributes_to_check) - assert "Object validation error" in str(value_err_info.value) + result = check_presence_of_key(list_of_dicts, key_to_check) + assert result == return_value + class TestSetListAttributes: @pytest.mark.parametrize( "input_object, attributes_to_set", [ - (TESTER_OBJ_SINGLE_SIMPLE_ATTR, None), - (TESTER_OBJ_SINGLE_SIMPLE_ATTR, []), - (TESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_0"]), - (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1"]), - (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1", "test_2"]), - (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2"]), - (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2", "test_1"]) - ] + (ClassTESTER_OBJ_SINGLE_SIMPLE_ATTR, None), + (ClassTESTER_OBJ_SINGLE_SIMPLE_ATTR, []), + (ClassTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_0"]), + (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1"]), + (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1", "test_2"]), + (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2"]), + (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2", "test_1"]), + ], ) - def test_set_list_attributes_of_existing_nonlist_attributes(self, input_object, attributes_to_set): + def test_set_list_attributes_of_existing_nonlist_attributes( + self, input_object, attributes_to_set + ): """ Test for function that gets rid of duplicates within object attributes that are lists, or sets attributes to empty list if not present within the object. @@ -306,36 +339,56 @@ def test_set_list_attributes_of_existing_nonlist_attributes(self, input_object, original_object = deepcopy(input_object) result = set_list_attributes(input_object, attributes_to_set) assert result == None - assert vars(input_object) == vars(original_object) # no attributes changed - - @pytest.mark.parametrize( - "input_object, attributes_to_set", - [ - (TESTER_OBJ_SINGLE_SIMPLE_ATTR, 1), - (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, "incorrect_input_type"), - (TESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["hi", 2]) - ] - ) - def test_set_list_attributes_type_errors(self, input_object, attributes_to_set): - """ - Test for function setting list attributes, - TypeError raised because of incorrect argument type. - """ - with pytest.raises(TypeError): - set_list_attributes(input_object, attributes_to_set) + assert vars(input_object) == vars(original_object) # no attributes changed @pytest.mark.parametrize( "input_object, attributes_to_set, orig_lengths, reset_lengths", [ - (TESTER_OBJ_SINGLE_EMPTY_LIST_ATTR, ["list_empty_0"], [0], [0]), - (TESTER_OBJ_SEVERAL_EMPTY_LIST_ATTRS, ["list_empty_0", "list_empty_1", "list_empty_2"], [0, 0, 0], [0, 0, 0]), - (TESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR, ["list_simple_0"], [3], [3]), - (TESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS, ["list_simple_0", "list_simple_1"], [3, 3], [3, 3]), - (TESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR_W_DUP, ["list_simple_0"], [5], [4]), - (TESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP, ["list_simple_0", "list_simple_2", "list_simple_1"], [4, 3, 3], [3, 2, 3]) - ] + (ClassTESTER_OBJ_SINGLE_EMPTY_LIST_ATTR, ["list_empty_0"], [0], [0]), + ( + ClassTESTER_OBJ_SEVERAL_EMPTY_LIST_ATTRS, + ["list_empty_0", "list_empty_1", "list_empty_2"], + [0, 0, 0], + [0, 0, 0], + ), + (ClassTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR, ["list_simple_0"], [3], [3]), + ( + ClassTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS, + ["list_simple_0", "list_simple_1"], + [3, 3], + [3, 3], + ), + (ClassTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR_W_DUP, ["list_simple_0"], [5], [4]), + ( + ClassTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP, + ["list_simple_0", "list_simple_2", "list_simple_1"], + [4, 3, 3], + [3, 2, 3], + ), + (ClassTESTER_OBJ_SINGLE_LIST_OF_DICTS, ["list_of_dicts"], [3], [3]), + ( + ClassTESTER_OBJ_SEVERAL_LISTS_OF_DICTS, + ["list_of_dicts_1", "list_of_dicts_0"], + [3, 3], + [3, 3], + ), + (ClassTESTER_OBJ_SINGLE_LIST_OF_DICTS_W_DUP, ["list_of_dicts"], [6], [3]), + ( + ClassTESTER_OBJ_SEVERAL_LISTS_OF_DICTS_W_DUP, + [ + "list_of_dicts_1", + "list_of_dicts_0", + "list_of_dicts_2", + "list_of_dicts_3", + ], + [3, 3, 5, 3], + [3, 3, 2, 1], + ), + ], ) - def test_set_list_attributes_of_existing_list_attributes(self, input_object, attributes_to_set, orig_lengths, reset_lengths): + def test_set_list_attributes_of_existing_list_attributes( + self, input_object, attributes_to_set, orig_lengths, reset_lengths + ): """ Test for function that gets rid of duplicates within object attributes that are lists, or sets attributes to empty list if not present within the object. @@ -343,8 +396,8 @@ def test_set_list_attributes_of_existing_list_attributes(self, input_object, att """ # import pdb; pdb.set_trace() # check original length of attributes_to_set - for ind, attribute in enumerate(attributes_to_set): - assert len(getattr(input_object, attribute)) == orig_lengths[ind] + for idx, attribute in enumerate(attributes_to_set): + assert len(getattr(input_object, attribute)) == orig_lengths[idx] result = set_list_attributes(input_object, attributes_to_set) @@ -354,7 +407,42 @@ def test_set_list_attributes_of_existing_list_attributes(self, input_object, att assert result == None + @pytest.mark.parametrize( + "input_object, attributes_to_set, num_added_attributes", + [ + (ClassTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_0"], 0), + (ClassTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_1"], 1), + (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_2", "test_3"], 1), + (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_5", "test_0", "test_4"], 2), + (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2"], 0), + (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2", "test_1"], 0), + ], + ) + def test_set_list_attributes_of_nonexistent_attributes( + self, input_object, attributes_to_set, num_added_attributes + ): + """ + Test for function that gets rid of duplicates within object attributes that are lists, + or sets attributes to empty list if not present within the object. + Cases where the attributes to set are nonexistent, so they are added with the value []. + """ + # TODO: this changes the objects permanently since I'm setting attrs + # but I don't think this will affect further testing (specifically, fourth example) + + original_attributes_set = set(dir(input_object)) + num_original_attributes = len(original_attributes_set) + + result = set_list_attributes(input_object, attributes_to_set) + assert result == None + + reset_attributes_set = set(dir(input_object)) + num_reset_attributes = len(reset_attributes_set) + + assert num_added_attributes == (num_reset_attributes - num_original_attributes) + + added_attributes = reset_attributes_set.difference(original_attributes_set) + for attribute in added_attributes: + assert attribute in attributes_to_set + assert getattr(input_object, attribute) == [] -# # # then do non lists -# # # then do nonexistent -# # # then do mixed \ No newline at end of file + # TODO: add a test for mixed cases? (nonexistent + lists + empties, etc.) From dcb737e5823d551485c667b6069a9b6b7d86962c Mon Sep 17 00:00:00 2001 From: vstevensf Date: Fri, 4 Nov 2022 14:28:45 -0400 Subject: [PATCH 11/38] Refactored the utils functions for topological sort into its own file. Resulted in edits within test files, including creation of a new test file for topological sort, and changes in imports for magma/metawfl_handler.py. --- magma/metawfl_handler.py | 3 +- magma/topological_sort.py | 241 ++++++++++++++++++++++++++++++ magma/utils.py | 236 +----------------------------- test/test_topological_sort.py | 217 +++++++++++++++++++++++++++ test/test_utils_magma.py | 267 +++++++++++++--------------------- 5 files changed, 569 insertions(+), 395 deletions(-) create mode 100644 magma/topological_sort.py create mode 100644 test/test_topological_sort.py diff --git a/magma/metawfl_handler.py b/magma/metawfl_handler.py index b2d40ff..3f35f3d 100644 --- a/magma/metawfl_handler.py +++ b/magma/metawfl_handler.py @@ -8,7 +8,8 @@ ################################################ # TODO: functions for dcic utils -- move later ################################################ -from magma.utils import check_presence_of_attributes, set_list_attributes, generate_ordered_steps_list +from magma.utils import check_presence_of_attributes, set_list_attributes +from magma.topological_sort import generate_ordered_steps_list #TODO: make parent class maybe diff --git a/magma/topological_sort.py b/magma/topological_sort.py new file mode 100644 index 0000000..6c2b9c6 --- /dev/null +++ b/magma/topological_sort.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 + +################################################ +# Libraries +################################################ +from copy import deepcopy + +################################################ +# Functions +################################################ + +def check_presence_of_key(list_of_dicts, key_to_check=None): + """ + Takes in a list of dictionaries and a list of keys, checks that those keys + are present within every dict in this list/array. + + :param list_of_dicts: dictionaries to check + :type input_dict: list[dict] + :param key_to_check: key to check + :type key_to_check: str + :return: True, if the specified key is present in each dict, or there is no + key to check, else False + :rtype: bool + """ + #TODO: make the next three commands its own helper function? I repeat variations + # several times -- on this note, look up how to have flexible argument + if key_to_check is None: + return True + + if not all((key_to_check in dictionary) for dictionary in list_of_dicts): + return False + + return True + +#TODO: could make this more general... +def set_dependency_list_values(list_of_dicts, name_of_step_key, name_of_dependencies_key): + """ + Checks for dependency key within each dictionary in list_of_dicts. + If not present, add that key and set value as empty list. + Else, remove duplicates and self-dependencies. + + :param list_of_dicts: list of dictionaries that should hold a dependency list. Each + dictionary corresponds to a step, and the list represents + a "list of steps" with dependencies between them. + :type list_of_dicts: list[dict] + :param name_of_step_key: name of the key corresponding to the step's name – + i.e. attribute referred to by dependency values + :type name_of_step_key: str + :param name_of_dependencies_key: name of the key corresponding to the dependencies list + :type name_of_dependencies_key: str + :return: a copy of list_of_dicts with appropriate dependency lists set + :rtype: list[dict] + """ + + list_of_dicts_copy = deepcopy(list_of_dicts) + # iterate through list of dicts and set dependencies key-value pair + for dictionary in list_of_dicts_copy: + # add empty dependency list if not present + if not name_of_dependencies_key in dictionary: + dictionary[name_of_dependencies_key] = [] + #TODO: do some renaming of this function to follow pattern of obj vs dict key setting? + + # get rid of duplicates + # I choose this method for generalization, in the case that dependencies is + # a list of dictionaries, which are an unhashable type + dependencies = dictionary[name_of_dependencies_key] + non_duplicated_dependencies = [] + for dependency in dependencies: + if dependency not in non_duplicated_dependencies: + non_duplicated_dependencies.append(dependency) + dictionary[name_of_dependencies_key] = non_duplicated_dependencies + #TODO: note -- im working under the assumption that because of the limitations + # of the schema, the dependencies will be of the correct type. Must I include + # a check that each dependency is in fact a name of another metaworkflow? + #(...probably. :/ ) + + # check for self-dependencies + new_dependencies = dictionary[name_of_dependencies_key] #repetitive, but just for readability + dictionary_name = dictionary[name_of_step_key] + # remove from this list + #TODO: should I throw exception instead? I think it's fine to just remove bc it's easy + new_dependencies = list(filter(lambda element: element != dictionary_name, new_dependencies)) + + return list_of_dicts_copy + +def generate_ordered_step_name_list(list_of_dicts, name_of_step_key): + """ + Based on a list of dictionaries (representing a list of steps) with a "name" key + for each dictionary, return a list of the names of each dictionary with + indices corresponding to the indices of the dictionaries themselves (same order). + """ + names = [] + for dictionary in list_of_dicts: + names.append(dictionary[name_of_step_key]) + return names + #TODO: in test, check that it is always in the same order + +def define_forward_dependencies(list_of_dicts, name_of_step_key, name_of_dependencies_key): + """ + Build directed graph by "reversing" dependencies TODO: redo comment + """ + names = generate_ordered_step_name_list(list_of_dicts, name_of_step_key) + + for dictionary in list_of_dicts: + current_dependencies = dictionary[name_of_dependencies_key] + current_dict_name = dictionary[name_of_step_key] + + # go through each step this current step is dependent on + # and add "step_after" attribute + # (dependencies are "origin", "source", or "progenitor" steps) + for dependency in current_dependencies: + # isolate the index of the dependency using names list + #TODO: this matches to the first occurence of dependency within the array + idx = names.index(dependency) + dependency_step_dict = list_of_dicts[idx] + + #TODO: consider helper fxn? but not necessary + #TODO: rename this attribute + if not ("steps_after" in dependency_step_dict): + dependency_step_dict["steps_after"] = [] + + dependency_step_dict["steps_after"].append(current_dict_name) + +def find_index_with_given_step_name(steps_with_dependencies_list, name_of_step_key, name): + for index, step in enumerate(steps_with_dependencies_list): + if step[name_of_step_key] == name: + return index, step + +def topological_sort_DFS_helper(graph, curr_node, curr_idx, name_of_node_key, name_of_dependencies_key, visited_temporary, visited_permanent, queue): + if visited_permanent[curr_idx]: + return queue + if visited_temporary[curr_idx]: + raise Exception("cycle in graph!: node " + curr_node[name_of_node_key]) + + visited_temporary[curr_idx] = True + + for following_step in curr_node[name_of_dependencies_key]: + #TODO: can't have duplicates in names with this method! + idx_following_node, following_node = find_index_with_given_step_name(graph, name_of_node_key, following_step) + topological_sort_DFS_helper(graph, following_node, idx_following_node, name_of_node_key, name_of_dependencies_key, visited_temporary, visited_permanent, queue) + + visited_temporary[curr_idx] = False + visited_permanent[curr_idx] = True + queue.append(curr_node) + return queue + # TODO: for test, can check that there are no duplicates in returned queue + + +def topological_sort(graph, name_of_node_key, name_of_dependencies_key): + """ + DFS algorithm from wikipedia https://en.wikipedia.org/wiki/Topological_sorting + Logic based on topological sort of directed graph from https://www.geeksforgeeks.org/topological-sorting/ + TODO: finish this docstring l8r + Time complexity = O(V+E), where V = # vertices/nodes (steps), E = # edges (in directed graph, dependencies) + https://www.geeksforgeeks.org/detect-cycle-in-directed-graph-using-topological-sort/?id=discuss = cycle detection : + So, in detail, just do a topological sort and get the queue of the results. Then as you pop from the final queue and + push to your result vector/array, check all the adjacent nodes of the last popped item and if the adjacent node + exists in the vector then it's a cycle (if A goes to B then B should not precede A in the topological ordering). + ASSUMPTOPN: no self loops (i deletd them) + + pseudocode from wikipedia: + L ← Empty list that will contain the sorted nodes + while exists nodes without a permanent mark do + select an unmarked node n + visit(n) + + function visit(node n) + if n has a permanent mark then + return + if n has a temporary mark then + stop (graph has at least one cycle) + + mark n with a temporary mark + + for each node m with an edge from n to m do + visit(m) + + remove temporary mark from n + mark n with a permanent mark + add n to head of L + """ + num_steps = len(graph) + visited_temporary = [False]*num_steps + visited_permanent = [False]*num_steps + queue = [] #First In First Out + + while not all((element == True) for element in visited_permanent): + curr_idx = visited_permanent.index(False) + curr_node = graph[curr_idx] + #calling recursive helper function + queue = topological_sort_DFS_helper(graph, curr_node, curr_idx, name_of_node_key, name_of_dependencies_key, visited_temporary, visited_permanent, queue) + + #TODO: remove the following, just for testing + for element in queue: + print(element["name"]) + + return queue + +def generate_ordered_steps_list(steps_with_dependencies_list, name_of_step_key, name_of_dependencies_key): + """ + Takes in list of steps and reorders based on dependencies, returning a separate copy of + a reordered list. + If impossible to create ordered list (circular dependencies, missing steps, etc.), + throws error or exception. + + :param steps_with_dependencies_list: list of dictionaries, where each dictionary has + at least a step name + :type steps_with_dependencies_list: list[dict] + :param name_of_step_key: name of the key corresponding to the step's name – + i.e. attribute referred to by dependency values + :type name_of_step_key: str + :param name_of_dependencies_key: name of the key corresponding to the dependencies list + :type name_of_dependencies_key: str + :return: a copy of the reordered list (if possible) + :rtype: list[dict] + TODO: add errors and exceptions possibly thrown + """ + + # check that all objects in steps_with_dependencies_list have name_of_step_key + if not check_presence_of_key(steps_with_dependencies_list, name_of_step_key): + raise Exception("All dictionary elements in steps_with_dependencies_list must have attribute \"{0}\"".format(name_of_step_key)) + + # TODO: feel like this is overkill, but checking for duplicates in steps_with_dependencies_list? + # is there any case where the exact same step will be defined?? i think gets tricky with sharding maybe? idk + + ### List reordering based on dependencies ### + + ## Preprocessing of dependencies lists + # add dependencies attribute if not present, remove duplicates from dependencies, + # and check for self dependencies + preprocessed_steps_with_dependencies_list = set_dependency_list_values(steps_with_dependencies_list, name_of_step_key, name_of_dependencies_key) + + ## Build directed graph by "reversing" dependencies (TODO: redo this comment and make own function) + define_forward_dependencies(preprocessed_steps_with_dependencies_list, name_of_step_key, name_of_dependencies_key) + + ordered_steps_list = topological_sort(preprocessed_steps_with_dependencies_list, name_of_step_key, name_of_dependencies_key) + + return ordered_steps_list + + + # TODO:edge cases: all steps have dependencies, no steps depending on each other, dependency on self, identical steps \ No newline at end of file diff --git a/magma/utils.py b/magma/utils.py index e89db67..4c2be17 100644 --- a/magma/utils.py +++ b/magma/utils.py @@ -3,12 +3,14 @@ ################################################ # Libraries ################################################ -from copy import deepcopy ################################################ # Functions ################################################ +#TODO: description -- small utility fxns and +# object attribute checking +#TODO: following 2 fxn not used elsewhere but could be in future def check_variable_type(variable, intended_type): """ Checks that given variable is of the intended type. @@ -68,29 +70,6 @@ def check_presence_of_attributes(input_object, attributes_to_check=None): raise ValueError("Object validation error, {0}\n" .format(e.args[0])) -def check_presence_of_key(list_of_dicts, key_to_check=None): - """ - Takes in a list of dictionaries and a list of keys, checks that those keys - are present within every dict in this list/array. - - :param list_of_dicts: dictionaries to check - :type input_dict: list[dict] - :param key_to_check: key to check - :type key_to_check: str - :return: True, if the specified key is present in each dict, or there is no - key to check, else False - :rtype: bool - """ - #TODO: make the next three commands its own helper function? I repeat variations - # several times -- on this note, look up how to have flexible argument - if key_to_check is None: - return True - - if not all((key_to_check in dictionary) for dictionary in list_of_dicts): - return False - - return True - def set_list_attributes(input_object, attributes_to_set=None): """ Checks for given attribute(s) of type list, sets as empty list if not present, @@ -123,211 +102,4 @@ def set_list_attributes(input_object, attributes_to_set=None): non_dup_attrib.append(item) setattr(input_object, attribute, non_dup_attrib) else: - continue - -#TODO: could make this more general... -def set_dependency_list_values(list_of_dicts, name_of_step_key, name_of_dependencies_key): - """ - Checks for dependency key within each dictionary in list_of_dicts. - If not present, add that key and set value as empty list. - Else, remove duplicates and self-dependencies. - - :param list_of_dicts: list of dictionaries that should hold a dependency list. Each - dictionary corresponds to a step, and the list represents - a "list of steps" with dependencies between them. - :type list_of_dicts: list[dict] - :param name_of_step_key: name of the key corresponding to the step's name – - i.e. attribute referred to by dependency values - :type name_of_step_key: str - :param name_of_dependencies_key: name of the key corresponding to the dependencies list - :type name_of_dependencies_key: str - :return: a copy of list_of_dicts with appropriate dependency lists set - :rtype: list[dict] - :raises TypeError: if name_of_dependencies_key is not a string - """ - - list_of_dicts_copy = deepcopy(list_of_dicts) - # iterate through list of dicts and set dependencies key-value pair - for dictionary in list_of_dicts_copy: - # add empty dependency list if not present - if not name_of_dependencies_key in dictionary: - dictionary[name_of_dependencies_key] = [] - - # get rid of duplicates - # I choose this method for generalization, in the case that dependencies is - # a list of dictionaries, which are an unhashable type - dependencies = dictionary[name_of_dependencies_key] - non_duplicated_dependencies = [] - for dependency in dependencies: - if dependency not in non_duplicated_dependencies: - non_duplicated_dependencies.append(dependency) - dictionary[name_of_dependencies_key] = non_duplicated_dependencies - #TODO: note -- im working under the assumption that because of the limitations - # of the schema, the dependencies will be of the correct type. Must I include - # a check that each dependency is in fact a name of another metaworkflow? - #(...probably. :/ ) - - # check for self-dependencies - new_dependencies = dictionary[name_of_dependencies_key] #repetitive, but just for readability - dictionary_name = dictionary[name_of_step_key] - # remove from this list - #TODO: should I throw exception instead? I think it's fine to just remove bc it's easy - new_dependencies = list(filter(lambda element: element != dictionary_name, new_dependencies)) - - return list_of_dicts_copy - -def generate_ordered_step_name_list(list_of_dicts, name_of_step_key): - """ - Based on a list of dictionaries (representing a list of steps) with a "name" key - for each dictionary, return a list of the names of each dictionary with - indices corresponding to the indices of the dictionaries themselves (same order). - """ - names = [] - for dictionary in list_of_dicts: - names.append(dictionary[name_of_step_key]) - return names - #TODO: in test, check that it is always in the same order - -def define_forward_dependencies(list_of_dicts, name_of_step_key, name_of_dependencies_key): - """ - Build directed graph by "reversing" dependencies TODO: redo comment - """ - names = generate_ordered_step_name_list(list_of_dicts, name_of_step_key) - - for dictionary in list_of_dicts: - current_dependencies = dictionary[name_of_dependencies_key] - current_dict_name = dictionary[name_of_step_key] - - # go through each step this current step is dependent on - # and add "step_after" attribute - # (dependencies are "origin", "source", or "progenitor" steps) - for dependency in current_dependencies: - # isolate the index of the dependency using names list - #TODO: this matches to the first occurence of dependency within the array - idx = names.index(dependency) - dependency_step_dict = list_of_dicts[idx] - - #TODO: consider helper fxn? but not necessary - #TODO: rename this attribute - if not ("steps_after" in dependency_step_dict): - dependency_step_dict["steps_after"] = [] - - dependency_step_dict["steps_after"].append(current_dict_name) - -def find_index_with_given_step_name(steps_with_dependencies_list, name_of_step_key, name): - for index, step in enumerate(steps_with_dependencies_list): - if step[name_of_step_key] == name: - return index, step - -def topological_sort_DFS_helper(graph, curr_node, curr_idx, name_of_node_key, name_of_dependencies_key, visited_temporary, visited_permanent, queue): - if visited_permanent[curr_idx]: - return queue - if visited_temporary[curr_idx]: - raise Exception("cycle in graph!: node " + curr_node[name_of_node_key]) - - visited_temporary[curr_idx] = True - - for following_step in curr_node[name_of_dependencies_key]: - #TODO: can't have duplicates in names with this method! - idx_following_node, following_node = find_index_with_given_step_name(graph, name_of_node_key, following_step) - topological_sort_DFS_helper(graph, following_node, idx_following_node, name_of_node_key, name_of_dependencies_key, visited_temporary, visited_permanent, queue) - - visited_temporary[curr_idx] = False - visited_permanent[curr_idx] = True - queue.append(curr_node) - return queue - # TODO: for test, can check that there are no duplicates in returned queue - - -def topological_sort(graph, name_of_node_key, name_of_dependencies_key): - """ - DFS algorithm from wikipedia https://en.wikipedia.org/wiki/Topological_sorting - Logic based on topological sort of directed graph from https://www.geeksforgeeks.org/topological-sorting/ - TODO: finish this docstring l8r - Time complexity = O(V+E), where V = # vertices/nodes (steps), E = # edges (in directed graph, dependencies) - https://www.geeksforgeeks.org/detect-cycle-in-directed-graph-using-topological-sort/?id=discuss = cycle detection : - So, in detail, just do a topological sort and get the queue of the results. Then as you pop from the final queue and - push to your result vector/array, check all the adjacent nodes of the last popped item and if the adjacent node - exists in the vector then it's a cycle (if A goes to B then B should not precede A in the topological ordering). - ASSUMPTOPN: no self loops (i deletd them) - - pseudocode from wikipedia: - L ← Empty list that will contain the sorted nodes - while exists nodes without a permanent mark do - select an unmarked node n - visit(n) - - function visit(node n) - if n has a permanent mark then - return - if n has a temporary mark then - stop (graph has at least one cycle) - - mark n with a temporary mark - - for each node m with an edge from n to m do - visit(m) - - remove temporary mark from n - mark n with a permanent mark - add n to head of L - """ - num_steps = len(graph) - visited_temporary = [False]*num_steps - visited_permanent = [False]*num_steps - queue = [] #First In First Out - - while not all((element == True) for element in visited_permanent): - curr_idx = visited_permanent.index(False) - curr_node = graph[curr_idx] - #calling recursive helper function - queue = topological_sort_DFS_helper(graph, curr_node, curr_idx, name_of_node_key, name_of_dependencies_key, visited_temporary, visited_permanent, queue) - - # for element in queue: - # print(element["name"]) - - return queue - -def generate_ordered_steps_list(steps_with_dependencies_list, name_of_step_key, name_of_dependencies_key): - """ - Takes in list of steps and reorders based on dependencies, returning a separate copy of - a reordered list. - If impossible to create ordered list (circular dependencies, missing steps, etc.), - throws error or exception. - - :param steps_with_dependencies_list: list of dictionaries, where each dictionary has - at least a step name - :type steps_with_dependencies_list: list[dict] - :param name_of_step_key: name of the key corresponding to the step's name – - i.e. attribute referred to by dependency values - :type name_of_step_key: str - :param name_of_dependencies_key: name of the key corresponding to the dependencies list - :type name_of_dependencies_key: str - :return: a copy of the reordered list (if possible) - :rtype: list[dict] - TODO: add errors and exceptions possibly thrown - """ - - # check that all objects in steps_with_dependencies_list have name_of_step_key - if not check_presence_of_key(steps_with_dependencies_list, name_of_step_key): - raise Exception("All dictionary elements in steps_with_dependencies_list must have attribute \"{0}\"".format(name_of_step_key)) - - # TODO: feel like this is overkill, but checking for duplicates in steps_with_dependencies_list? - # is there any case where the exact same step will be defined?? i think gets tricky with sharding maybe? idk - - ### List reordering based on dependencies ### - - ## Preprocessing of dependencies lists - # add dependencies attribute if not present, remove duplicates from dependencies, - # and check for self dependencies - preprocessed_steps_with_dependencies_list = set_dependency_list_values(steps_with_dependencies_list, name_of_step_key, name_of_dependencies_key) - - ## Build directed graph by "reversing" dependencies (TODO: redo this comment and make own function) - define_forward_dependencies(preprocessed_steps_with_dependencies_list, name_of_step_key, name_of_dependencies_key) - - ordered_steps_list = topological_sort(preprocessed_steps_with_dependencies_list, name_of_step_key, name_of_dependencies_key) - - return ordered_steps_list - - - # TODO:edge cases: all steps have dependencies, no steps depending on each other, dependency on self, identical steps + continue \ No newline at end of file diff --git a/test/test_topological_sort.py b/test/test_topological_sort.py new file mode 100644 index 0000000..e68b715 --- /dev/null +++ b/test/test_topological_sort.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python3 + +################################################################# +# Libraries +################################################################# +import pytest +# from copy import deepcopy + +from magma.topological_sort import * + +################################################################# +# Vars +################################################################# +INPUT_DICT_SINGLE_EMPTY_LIST_ATTR = {"list_empty_0": []} +INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS = { + "list_empty_0": [], + "list_empty_1": [], + "list_empty_2": [], +} + +LIST_OF_EMPTY_DICTS = [INPUT_DICT_SINGLE_EMPTY_LIST_ATTR, INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS] + + +MWF_UUID_0 = "test_mwf_uuid_0" +MWF_UUID_1 = "test_mwf_uuid_1" +MWF_UUID_2 = "test_mwf_uuid_2" +MWF_UUID_3 = "test_mwf_uuid_3" +MWF_UUID_4 = "test_mwf_uuid_4" +MWF_UUID_5 = "test_mwf_uuid_5" +MWF_UUID_6 = "test_mwf_uuid_6" +MWF_UUID_7 = "test_mwf_uuid_7" +MWF_UUID_8 = "test_mwf_uuid_8" +MWF_UUID_9 = "test_mwf_uuid_9" +MWF_NAME_A = "A" +MWF_NAME_B = "B" +MWF_NAME_C = "C" +MWF_NAME_D = "D" +MWF_NAME_E = "E" +MWF_NAME_F = "F" +MWF_NAME_G = "G" +MWF_NAME_H = "H" +MWF_NAME_I = "I" +MWF_NAME_J = "J" + +SIMPLE_META_WORKFLOW_DICT_0 = { + "meta_workflow": MWF_UUID_0, + "name": MWF_NAME_A +} +SIMPLE_META_WORKFLOW_DICT_1 = { + "meta_workflow": MWF_UUID_1, + "name": MWF_NAME_B +} +SIMPLE_META_WORKFLOW_DICT_2 = { + "meta_workflow": MWF_UUID_2, + "name": MWF_NAME_C +} +SIMPLE_META_WORKFLOW_DICT_3 = { + "meta_workflow": MWF_UUID_3, + "name": MWF_NAME_D +} +SIMPLE_META_WORKFLOW_DICT_4 = { + "meta_workflow": MWF_UUID_4, + "name": MWF_NAME_E +} +SIMPLE_META_WORKFLOW_DICT_5 = { + "meta_workflow": MWF_UUID_5, + "name": MWF_NAME_F +} +SIMPLE_META_WORKFLOW_DICT_6 = { + "meta_workflow": MWF_UUID_6, + "name": MWF_NAME_G +} +SIMPLE_META_WORKFLOW_DICT_7 = { + "meta_workflow": MWF_UUID_7, + "name": MWF_NAME_H +} +SIMPLE_META_WORKFLOW_DICT_8 = { + "meta_workflow": MWF_UUID_8, + "name": MWF_NAME_I +} +SIMPLE_META_WORKFLOW_DICT_9 = { + "meta_workflow": MWF_UUID_9, + "name": MWF_NAME_J +} + +# META_WORKFLOWS_ARRAY_SINGLE_ITEM = [SIMPLE_META_WORKFLOW_DICT_0] +# SINGLE_ITEM_META_WORKFLOWS_DICT = {"meta_workflows": META_WORKFLOWS_ARRAY_SINGLE_ITEM} + +# META_WORKFLOWS_ARRAY_SEVERAL_ITEMS = [SIMPLE_META_WORKFLOW_DICT_0, SIMPLE_META_WORKFLOW_DICT_1, SIMPLE_META_WORKFLOW_DICT_2] +# SEVERAL_ITEMS_META_WORKFLOWS_DICT = {"meta_workflows": META_WORKFLOWS_ARRAY_SEVERAL_ITEMS} + +# META_WORKFLOWS_ARRAY_SEVERAL_ITEMS_W_DUPLICATES = [SIMPLE_META_WORKFLOW_DICT_0, SIMPLE_META_WORKFLOW_DICT_2, SIMPLE_META_WORKFLOW_DICT_1, SIMPLE_META_WORKFLOW_DICT_2] +# SEVERAL_ITEMS_W_DUPLICATES_META_WORKFLOWS_DICT = {"meta_workflows": META_WORKFLOWS_ARRAY_SEVERAL_ITEMS_W_DUPLICATES} + +# EMPTY_META_WORKFLOWS_DICT = {"meta_workflows": []} + + +################################################################# +# Tests +################################################################# + +@pytest.mark.parametrize( + "list_of_dicts, key_to_check, return_value", + [ + ([], None, True), + ([], "key", True), # kind of weird edge case, but not a biggie (TODO:) + ( + LIST_OF_EMPTY_DICTS, + "list_empty_0", + True, + ), + #TODO: come back to this and finish + # ( + # [ + # INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR, + # INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS, + # INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS, + # INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR_W_DUP, + # INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP, + # ], + # "list_simple_0", + # True, + # ), + # ( + # [INPUT_DICT_SINGLE_LIST_OF_DICTS], + # "list_of_dicts_0", + # False, + # ), + # ( + # [ + # INPUT_DICT_SINGLE_LIST_OF_DICTS, + # INPUT_DICT_SEVERAL_LISTS_OF_DICTS, + # INPUT_DICT_SINGLE_LIST_OF_DICTS_W_DUP, + # ], + # "list_of_dicts", + # False, + # ), + ], +) +def test_check_presence_of_key( + list_of_dicts, key_to_check, return_value +): + """ + Test for function checking that all dictionaries in a given list have the + specified key, no errors raised. + """ + result = check_presence_of_key(list_of_dicts, key_to_check) + assert result == return_value + + +# #TODO: will be generalizing this function later +# class TestSetDependencyListValues: +# @pytest.mark.parametrize( +# "list_of_dicts, name_of_step_key, name_of_dependencies_key, orig_lengths, reset_lengths", +# [ +# ([INPUT_DICT_SINGLE_SIMPLE_ATTR], ["list_empty_0"], [0], [0]), +# ( +# CLASSTESTER_OBJ_SEVERAL_EMPTY_LIST_ATTRS, +# ["list_empty_0", "list_empty_1", "list_empty_2"], +# [0, 0, 0], +# [0, 0, 0], +# ), +# (CLASSTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR, ["list_simple_0"], [3], [3]), +# ( +# CLASSTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS, +# ["list_simple_0", "list_simple_1"], +# [3, 3], +# [3, 3], +# ), +# (CLASSTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR_W_DUP, ["list_simple_0"], [5], [4]), +# ( +# CLASSTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP, +# ["list_simple_0", "list_simple_2", "list_simple_1"], +# [4, 3, 3], +# [3, 2, 3], +# ), +# (CLASSTESTER_OBJ_SINGLE_LIST_OF_DICTS, ["list_of_dicts"], [3], [3]), +# ( +# CLASSTESTER_OBJ_SEVERAL_LISTS_OF_DICTS, +# ["list_of_dicts_1", "list_of_dicts_0"], +# [3, 3], +# [3, 3], +# ), +# (CLASSTESTER_OBJ_SINGLE_LIST_OF_DICTS_W_DUP, ["list_of_dicts"], [6], [3]), +# ( +# CLASSTESTER_OBJ_SEVERAL_LISTS_OF_DICTS_W_DUP, +# [ +# "list_of_dicts_1", +# "list_of_dicts_0", +# "list_of_dicts_2", +# "list_of_dicts_3", +# ], +# [3, 3, 5, 3], +# [3, 3, 2, 1], +# ), +# ], +# ) +# def test_set_list_attributes_of_existing_list_attributes( +# self, input_object, attributes_to_set, orig_lengths, reset_lengths +# ): +# """ +# Test for function that gets rid of duplicates within object attributes that are lists, +# or sets attributes to empty list if not present within the object. +# Cases where the attributes to set are existent and are lists. +# """ +# # import pdb; pdb.set_trace() +# # check original length of attributes_to_set +# for idx, attribute in enumerate(attributes_to_set): +# assert len(getattr(input_object, attribute)) == orig_lengths[idx] + +# result = set_list_attributes(input_object, attributes_to_set) + +# # check length of "reset" attributes_to_set +# for idx, attribute in enumerate(attributes_to_set): +# assert len(getattr(input_object, attribute)) == reset_lengths[idx] + +# assert result == None diff --git a/test/test_utils_magma.py b/test/test_utils_magma.py index fa2a426..9ea64a9 100644 --- a/test/test_utils_magma.py +++ b/test/test_utils_magma.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + ################################################################# # Libraries ################################################################# @@ -6,8 +8,6 @@ from magma.utils import * -# from magma.metawfl_handler import MetaWorkflowStep, MetaWorkflowHandler - ################################################################# # Vars ################################################################# @@ -51,27 +51,19 @@ def __init__(self, input_dict): "list_simple_1": ["a", "b", "c"], "list_simple_2": ["c", 1, "c"], } -INPUT_DICT_SINGLE_LIST_OF_DICTS = { - "list_of_dicts": [ + +LIST_OF_EMPTY_DICTS = [INPUT_DICT_SINGLE_EMPTY_LIST_ATTR, INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS] +LIST_OF_SIMPLE_ATTR_DICTS = [ INPUT_DICT_SINGLE_SIMPLE_ATTR, INPUT_DICT_SINGLE_SIMPLE_ATTR_1, INPUT_DICT_SINGLE_SIMPLE_ATTR_2, ] -} -INPUT_DICT_SEVERAL_LISTS_OF_DICTS = { - "list_of_dicts_0": [ - INPUT_DICT_SINGLE_SIMPLE_ATTR, - INPUT_DICT_SINGLE_SIMPLE_ATTR_1, - INPUT_DICT_SINGLE_SIMPLE_ATTR_2, - ], - "list_of_dicts_1": [ +LIST_OF_SIMPLE_ATTR_DICTS_REORDERED = [ INPUT_DICT_SINGLE_SIMPLE_ATTR, INPUT_DICT_SINGLE_SIMPLE_ATTR_2, INPUT_DICT_SINGLE_SIMPLE_ATTR_1, - ], -} -INPUT_DICT_SINGLE_LIST_OF_DICTS_W_DUP = { - "list_of_dicts": [ + ] +LIST_OF_SIMPLE_ATTR_DICTS_W_DUP = [ INPUT_DICT_SINGLE_SIMPLE_ATTR, INPUT_DICT_SINGLE_SIMPLE_ATTR_1, INPUT_DICT_SINGLE_SIMPLE_ATTR, @@ -79,50 +71,54 @@ def __init__(self, input_dict): INPUT_DICT_SINGLE_SIMPLE_ATTR, INPUT_DICT_SINGLE_SIMPLE_ATTR, ] -} -INPUT_DICT_SEVERAL_LISTS_OF_DICTS_W_DUP = { - "list_of_dicts_0": [ - INPUT_DICT_SINGLE_SIMPLE_ATTR, - INPUT_DICT_SINGLE_SIMPLE_ATTR_1, - INPUT_DICT_SINGLE_SIMPLE_ATTR_2, - ], - "list_of_dicts_1": [ - INPUT_DICT_SINGLE_SIMPLE_ATTR, - INPUT_DICT_SINGLE_SIMPLE_ATTR_2, - INPUT_DICT_SINGLE_SIMPLE_ATTR_1, - ], - "list_of_dicts_2": [ +LIST_OF_SIMPLE_ATTR_DICTS_W_DUP_2 = [ INPUT_DICT_SINGLE_SIMPLE_ATTR, INPUT_DICT_SINGLE_SIMPLE_ATTR, INPUT_DICT_SINGLE_SIMPLE_ATTR, INPUT_DICT_SINGLE_SIMPLE_ATTR_1, INPUT_DICT_SINGLE_SIMPLE_ATTR, - ], - "list_of_dicts_3": [ + ] +LIST_OF_SIMPLE_ATTR_DICTS_W_DUP_3 = [ INPUT_DICT_SINGLE_SIMPLE_ATTR_2, INPUT_DICT_SINGLE_SIMPLE_ATTR_2, INPUT_DICT_SINGLE_SIMPLE_ATTR_2, - ], + ] + +INPUT_DICT_SINGLE_LIST_OF_DICTS = { + "list_of_dicts": LIST_OF_SIMPLE_ATTR_DICTS +} +INPUT_DICT_SEVERAL_LISTS_OF_DICTS = { + "list_of_dicts_0": LIST_OF_SIMPLE_ATTR_DICTS, + "list_of_dicts_1": LIST_OF_SIMPLE_ATTR_DICTS_REORDERED, +} +INPUT_DICT_SINGLE_LIST_OF_DICTS_W_DUP = { + "list_of_dicts": LIST_OF_SIMPLE_ATTR_DICTS_W_DUP +} +INPUT_DICT_SEVERAL_LISTS_OF_DICTS_W_DUP = { + "list_of_dicts_0": LIST_OF_SIMPLE_ATTR_DICTS, + "list_of_dicts_1": LIST_OF_SIMPLE_ATTR_DICTS_REORDERED, + "list_of_dicts_2": LIST_OF_SIMPLE_ATTR_DICTS_W_DUP_2, + "list_of_dicts_3": LIST_OF_SIMPLE_ATTR_DICTS_W_DUP_3 } # ClassTester objects -ClassTESTER_OBJ_SINGLE_SIMPLE_ATTR = ClassTester(INPUT_DICT_SINGLE_SIMPLE_ATTR) -ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS = ClassTester(INPUT_DICT_SEVERAL_SIMPLE_ATTRS) -ClassTESTER_OBJ_SINGLE_EMPTY_LIST_ATTR = ClassTester(INPUT_DICT_SINGLE_EMPTY_LIST_ATTR) -ClassTESTER_OBJ_SEVERAL_EMPTY_LIST_ATTRS = ClassTester(INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS) -ClassTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR = ClassTester(INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR) -ClassTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS = ClassTester(INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS) -ClassTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR_W_DUP = ClassTester( +CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR = ClassTester(INPUT_DICT_SINGLE_SIMPLE_ATTR) +CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS = ClassTester(INPUT_DICT_SEVERAL_SIMPLE_ATTRS) +CLASSTESTER_OBJ_SINGLE_EMPTY_LIST_ATTR = ClassTester(INPUT_DICT_SINGLE_EMPTY_LIST_ATTR) +CLASSTESTER_OBJ_SEVERAL_EMPTY_LIST_ATTRS = ClassTester(INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS) +CLASSTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR = ClassTester(INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR) +CLASSTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS = ClassTester(INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS) +CLASSTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR_W_DUP = ClassTester( INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR_W_DUP ) -ClassTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP = ClassTester( +CLASSTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP = ClassTester( INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP ) -ClassTESTER_OBJ_SINGLE_LIST_OF_DICTS = ClassTester(INPUT_DICT_SINGLE_LIST_OF_DICTS) -ClassTESTER_OBJ_SEVERAL_LISTS_OF_DICTS = ClassTester(INPUT_DICT_SEVERAL_LISTS_OF_DICTS) -ClassTESTER_OBJ_SINGLE_LIST_OF_DICTS_W_DUP = ClassTester(INPUT_DICT_SINGLE_LIST_OF_DICTS_W_DUP) -ClassTESTER_OBJ_SEVERAL_LISTS_OF_DICTS_W_DUP = ClassTester( +CLASSTESTER_OBJ_SINGLE_LIST_OF_DICTS = ClassTester(INPUT_DICT_SINGLE_LIST_OF_DICTS) +CLASSTESTER_OBJ_SEVERAL_LISTS_OF_DICTS = ClassTester(INPUT_DICT_SEVERAL_LISTS_OF_DICTS) +CLASSTESTER_OBJ_SINGLE_LIST_OF_DICTS_W_DUP = ClassTester(INPUT_DICT_SINGLE_LIST_OF_DICTS_W_DUP) +CLASSTESTER_OBJ_SEVERAL_LISTS_OF_DICTS_W_DUP = ClassTester( INPUT_DICT_SEVERAL_LISTS_OF_DICTS_W_DUP ) @@ -130,7 +126,6 @@ def __init__(self, input_dict): # Tests ################################################################# - @pytest.mark.parametrize( "variable, intended_type, return_value", [ @@ -171,7 +166,7 @@ def __init__(self, input_dict): ({"hi": 1}, list, False), ], ) -def test_check_list_elements_type(variable, intended_type, return_value): +def test_check_variable_type(variable, intended_type, return_value): """ Test for function checking if a variable is of a specified type. """ @@ -179,46 +174,45 @@ def test_check_list_elements_type(variable, intended_type, return_value): assert result == return_value -class TestCheckListElementsType: - @pytest.mark.parametrize( - "list_to_check, intended_type, return_value", - [ - ([], str, True), - ([], int, True), - ([], list, True), - ([], object, True), - (["id"], str, True), - (["1", "test", "2"], str, True), - ([1, 2, 3, 4], int, True), - ([[1], [2], ["test", "2"], []], list, True), - ([["1", "2", "3", "4", "5"], ["6"]], str, False), - ([["1", "2", "3", "4", "5"], "6"], list, False), - ([None, "test"], str, False), - ([1, "test"], int, False), - ], - ) - def test_check_list_elements_type_no_errors( - self, list_to_check, intended_type, return_value - ): - """ - Test for function checking that all elements of a list are of a specified type, - no errors raised. - """ - result = check_list_elements_type(list_to_check, intended_type) - assert result == return_value +@pytest.mark.parametrize( + "list_to_check, intended_type, return_value", + [ + ([], str, True), + ([], int, True), + ([], list, True), + ([], object, True), + (["id"], str, True), + (["1", "test", "2"], str, True), + ([1, 2, 3, 4], int, True), + ([[1], [2], ["test", "2"], []], list, True), + ([["1", "2", "3", "4", "5"], ["6"]], str, False), + ([["1", "2", "3", "4", "5"], "6"], list, False), + ([None, "test"], str, False), + ([1, "test"], int, False), + ], +) +def test_check_list_elements_type_no_errors( + list_to_check, intended_type, return_value +): + """ + Test for function checking that all elements of a list are of a specified type, + no errors raised. + """ + result = check_list_elements_type(list_to_check, intended_type) + assert result == return_value class TestCheckPresenceOfAttributes: @pytest.mark.parametrize( "input_object, attributes_to_check", [ - (ClassTESTER_OBJ_SINGLE_SIMPLE_ATTR, None), - (ClassTESTER_OBJ_SINGLE_SIMPLE_ATTR, []), - (ClassTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_0"]), - (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1"]), - (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1", "test_2"]), - (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2"]), - (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2", "test_1"]), + (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, None), + (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, []), + (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_0"]), + (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1"]), + (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1", "test_2"]), + (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2"]), + (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2", "test_1"]), ], ) def test_check_presence_of_attributes_no_errors( @@ -234,19 +228,19 @@ def test_check_presence_of_attributes_no_errors( @pytest.mark.parametrize( "input_object, attributes_to_check", [ - (ClassTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["not_present"]), - (ClassTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["not_present_0", "not_present_1"]), - (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1", "not_present"]), + (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["not_present"]), + (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["not_present_0", "not_present_1"]), + (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1", "not_present"]), ( - ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, + CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "not_present", "test_1", "test_2"], ), ( - ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, + CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "not_present_0", "test_2", "not_present_1"], ), ( - ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, + CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["not_present", "test_0", "test_2", "test_1"], ), ], @@ -263,69 +257,17 @@ def test_check_presence_of_attributes_value_errors( assert "Object validation error" in str(value_err_info.value) -class TestCheckPresenceOfKey: - @pytest.mark.parametrize( - "list_of_dicts, key_to_check, return_value", - [ - ([], None, True), - ([], "key", True), # kind of weird edge case, but not a biggie (TODO:) - ( - [ - INPUT_DICT_SINGLE_EMPTY_LIST_ATTR, - INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS, - ], - "list_empty_0", - True, - ), - ( - [ - INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR, - INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS, - INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS, - INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR_W_DUP, - INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP, - ], - "list_simple_0", - True, - ), - ( - [INPUT_DICT_SINGLE_LIST_OF_DICTS], - "list_of_dicts_0", - False, - ), - ( - [ - INPUT_DICT_SINGLE_LIST_OF_DICTS, - INPUT_DICT_SEVERAL_LISTS_OF_DICTS, - INPUT_DICT_SINGLE_LIST_OF_DICTS_W_DUP, - ], - "list_of_dicts", - False, - ), - ], - ) - def test_check_presence_of_key_no_errors( - self, list_of_dicts, key_to_check, return_value - ): - """ - Test for function checking that all dictionaries in a given list have the - specified key, no errors raised. - """ - result = check_presence_of_key(list_of_dicts, key_to_check) - assert result == return_value - - class TestSetListAttributes: @pytest.mark.parametrize( "input_object, attributes_to_set", [ - (ClassTESTER_OBJ_SINGLE_SIMPLE_ATTR, None), - (ClassTESTER_OBJ_SINGLE_SIMPLE_ATTR, []), - (ClassTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_0"]), - (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1"]), - (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1", "test_2"]), - (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2"]), - (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2", "test_1"]), + (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, None), + (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, []), + (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_0"]), + (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1"]), + (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1", "test_2"]), + (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2"]), + (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2", "test_1"]), ], ) def test_set_list_attributes_of_existing_nonlist_attributes( @@ -340,41 +282,42 @@ def test_set_list_attributes_of_existing_nonlist_attributes( result = set_list_attributes(input_object, attributes_to_set) assert result == None assert vars(input_object) == vars(original_object) # no attributes changed + #TODO: double check the above "vars" functionality @pytest.mark.parametrize( "input_object, attributes_to_set, orig_lengths, reset_lengths", [ - (ClassTESTER_OBJ_SINGLE_EMPTY_LIST_ATTR, ["list_empty_0"], [0], [0]), + (CLASSTESTER_OBJ_SINGLE_EMPTY_LIST_ATTR, ["list_empty_0"], [0], [0]), ( - ClassTESTER_OBJ_SEVERAL_EMPTY_LIST_ATTRS, + CLASSTESTER_OBJ_SEVERAL_EMPTY_LIST_ATTRS, ["list_empty_0", "list_empty_1", "list_empty_2"], [0, 0, 0], [0, 0, 0], ), - (ClassTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR, ["list_simple_0"], [3], [3]), + (CLASSTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR, ["list_simple_0"], [3], [3]), ( - ClassTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS, + CLASSTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS, ["list_simple_0", "list_simple_1"], [3, 3], [3, 3], ), - (ClassTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR_W_DUP, ["list_simple_0"], [5], [4]), + (CLASSTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR_W_DUP, ["list_simple_0"], [5], [4]), ( - ClassTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP, + CLASSTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP, ["list_simple_0", "list_simple_2", "list_simple_1"], [4, 3, 3], [3, 2, 3], ), - (ClassTESTER_OBJ_SINGLE_LIST_OF_DICTS, ["list_of_dicts"], [3], [3]), + (CLASSTESTER_OBJ_SINGLE_LIST_OF_DICTS, ["list_of_dicts"], [3], [3]), ( - ClassTESTER_OBJ_SEVERAL_LISTS_OF_DICTS, + CLASSTESTER_OBJ_SEVERAL_LISTS_OF_DICTS, ["list_of_dicts_1", "list_of_dicts_0"], [3, 3], [3, 3], ), - (ClassTESTER_OBJ_SINGLE_LIST_OF_DICTS_W_DUP, ["list_of_dicts"], [6], [3]), + (CLASSTESTER_OBJ_SINGLE_LIST_OF_DICTS_W_DUP, ["list_of_dicts"], [6], [3]), ( - ClassTESTER_OBJ_SEVERAL_LISTS_OF_DICTS_W_DUP, + CLASSTESTER_OBJ_SEVERAL_LISTS_OF_DICTS_W_DUP, [ "list_of_dicts_1", "list_of_dicts_0", @@ -410,12 +353,12 @@ def test_set_list_attributes_of_existing_list_attributes( @pytest.mark.parametrize( "input_object, attributes_to_set, num_added_attributes", [ - (ClassTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_0"], 0), - (ClassTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_1"], 1), - (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_2", "test_3"], 1), - (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_5", "test_0", "test_4"], 2), - (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2"], 0), - (ClassTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2", "test_1"], 0), + (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_0"], 0), + (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_1"], 1), + (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_2", "test_3"], 1), + (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_5", "test_0", "test_4"], 2), + (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2"], 0), + (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2", "test_1"], 0), ], ) def test_set_list_attributes_of_nonexistent_attributes( @@ -445,4 +388,4 @@ def test_set_list_attributes_of_nonexistent_attributes( assert attribute in attributes_to_set assert getattr(input_object, attribute) == [] - # TODO: add a test for mixed cases? (nonexistent + lists + empties, etc.) + # TODO: add a test for mixed cases? (nonexistent + lists + empties, etc.) \ No newline at end of file From 6bf380984787b642baa42c8b355fabb33e29e56e Mon Sep 17 00:00:00 2001 From: vstevensf Date: Wed, 9 Nov 2022 18:56:57 -0500 Subject: [PATCH 12/38] Added some pytests for topological sort. Changes included additions of different directed graph global vars for testing -- tests for the topological sort on their way. Also removed function for creating "forward dependencies" -- this version of topological sort works with "backward dependencies". --- magma/topological_sort.py | 98 +++---- test/test_topological_sort.py | 515 ++++++++++++++++++++++------------ test/test_utils_magma.py | 2 + 3 files changed, 378 insertions(+), 237 deletions(-) diff --git a/magma/topological_sort.py b/magma/topological_sort.py index 6c2b9c6..bed2b5e 100644 --- a/magma/topological_sort.py +++ b/magma/topological_sort.py @@ -4,6 +4,7 @@ # Libraries ################################################ from copy import deepcopy +from magma.utils import check_variable_type ################################################ # Functions @@ -12,7 +13,7 @@ def check_presence_of_key(list_of_dicts, key_to_check=None): """ Takes in a list of dictionaries and a list of keys, checks that those keys - are present within every dict in this list/array. + are present within every99 dict in this list/array. :param list_of_dicts: dictionaries to check :type input_dict: list[dict] @@ -32,8 +33,19 @@ def check_presence_of_key(list_of_dicts, key_to_check=None): return True +def generate_ordered_step_name_list(list_of_dicts, name_of_step_key): + """ + Based on a list of dictionaries (representing a list of steps) with a "name" key + for each dictionary, return a list of the names of each dictionary with + indices corresponding to the indices of the dictionaries themselves (same order). + """ + names = [] + for dictionary in list_of_dicts: + names.append(dictionary[name_of_step_key]) + return names + #TODO: could make this more general... -def set_dependency_list_values(list_of_dicts, name_of_step_key, name_of_dependencies_key): +def set_dependency_list_values(list_of_dicts, name_of_step_key, name_of_dependencies_key, existing_steps_list): """ Checks for dependency key within each dictionary in list_of_dicts. If not present, add that key and set value as empty list. @@ -52,75 +64,38 @@ def set_dependency_list_values(list_of_dicts, name_of_step_key, name_of_dependen :rtype: list[dict] """ - list_of_dicts_copy = deepcopy(list_of_dicts) + list_of_dicts_copy = deepcopy(list_of_dicts) #TODO: make sure original doesnt change in test # iterate through list of dicts and set dependencies key-value pair for dictionary in list_of_dicts_copy: # add empty dependency list if not present if not name_of_dependencies_key in dictionary: dictionary[name_of_dependencies_key] = [] + continue #TODO: do some renaming of this function to follow pattern of obj vs dict key setting? # get rid of duplicates # I choose this method for generalization, in the case that dependencies is # a list of dictionaries, which are an unhashable type dependencies = dictionary[name_of_dependencies_key] + # check this is indeed a list + if not check_variable_type(dependencies, list): + dictionary[name_of_dependencies_key] = [] + continue + #TODO: throw exception here instead of resetting value + + # get rid of duplicates and self-dependencies + # and each dependency is in fact a name of another metaworkflow non_duplicated_dependencies = [] + dictionary_name = dictionary[name_of_step_key] for dependency in dependencies: - if dependency not in non_duplicated_dependencies: + if (dependency not in non_duplicated_dependencies) and (dependency != dictionary_name) and (dependency in existing_steps_list): non_duplicated_dependencies.append(dependency) + #TODO: throw exception for self dependencies, duplicates, or nonexistent names? dictionary[name_of_dependencies_key] = non_duplicated_dependencies - #TODO: note -- im working under the assumption that because of the limitations - # of the schema, the dependencies will be of the correct type. Must I include - # a check that each dependency is in fact a name of another metaworkflow? - #(...probably. :/ ) - - # check for self-dependencies - new_dependencies = dictionary[name_of_dependencies_key] #repetitive, but just for readability - dictionary_name = dictionary[name_of_step_key] - # remove from this list - #TODO: should I throw exception instead? I think it's fine to just remove bc it's easy - new_dependencies = list(filter(lambda element: element != dictionary_name, new_dependencies)) + # dictionary["steps_after"] = [] return list_of_dicts_copy -def generate_ordered_step_name_list(list_of_dicts, name_of_step_key): - """ - Based on a list of dictionaries (representing a list of steps) with a "name" key - for each dictionary, return a list of the names of each dictionary with - indices corresponding to the indices of the dictionaries themselves (same order). - """ - names = [] - for dictionary in list_of_dicts: - names.append(dictionary[name_of_step_key]) - return names - #TODO: in test, check that it is always in the same order - -def define_forward_dependencies(list_of_dicts, name_of_step_key, name_of_dependencies_key): - """ - Build directed graph by "reversing" dependencies TODO: redo comment - """ - names = generate_ordered_step_name_list(list_of_dicts, name_of_step_key) - - for dictionary in list_of_dicts: - current_dependencies = dictionary[name_of_dependencies_key] - current_dict_name = dictionary[name_of_step_key] - - # go through each step this current step is dependent on - # and add "step_after" attribute - # (dependencies are "origin", "source", or "progenitor" steps) - for dependency in current_dependencies: - # isolate the index of the dependency using names list - #TODO: this matches to the first occurence of dependency within the array - idx = names.index(dependency) - dependency_step_dict = list_of_dicts[idx] - - #TODO: consider helper fxn? but not necessary - #TODO: rename this attribute - if not ("steps_after" in dependency_step_dict): - dependency_step_dict["steps_after"] = [] - - dependency_step_dict["steps_after"].append(current_dict_name) - def find_index_with_given_step_name(steps_with_dependencies_list, name_of_step_key, name): for index, step in enumerate(steps_with_dependencies_list): if step[name_of_step_key] == name: @@ -134,10 +109,10 @@ def topological_sort_DFS_helper(graph, curr_node, curr_idx, name_of_node_key, na visited_temporary[curr_idx] = True - for following_step in curr_node[name_of_dependencies_key]: + for previous_step in curr_node[name_of_dependencies_key]: #TODO: can't have duplicates in names with this method! - idx_following_node, following_node = find_index_with_given_step_name(graph, name_of_node_key, following_step) - topological_sort_DFS_helper(graph, following_node, idx_following_node, name_of_node_key, name_of_dependencies_key, visited_temporary, visited_permanent, queue) + idx_previous_node, previous_node = find_index_with_given_step_name(graph, name_of_node_key, previous_step) + topological_sort_DFS_helper(graph, previous_node, idx_previous_node, name_of_node_key, name_of_dependencies_key, visited_temporary, visited_permanent, queue) visited_temporary[curr_idx] = False visited_permanent[curr_idx] = True @@ -225,17 +200,16 @@ def generate_ordered_steps_list(steps_with_dependencies_list, name_of_step_key, ### List reordering based on dependencies ### + names = generate_ordered_step_name_list(steps_with_dependencies_list, name_of_step_key) + ## Preprocessing of dependencies lists # add dependencies attribute if not present, remove duplicates from dependencies, # and check for self dependencies - preprocessed_steps_with_dependencies_list = set_dependency_list_values(steps_with_dependencies_list, name_of_step_key, name_of_dependencies_key) - - ## Build directed graph by "reversing" dependencies (TODO: redo this comment and make own function) - define_forward_dependencies(preprocessed_steps_with_dependencies_list, name_of_step_key, name_of_dependencies_key) + preprocessed_steps_with_dependencies_list = set_dependency_list_values(steps_with_dependencies_list, name_of_step_key, name_of_dependencies_key, names) + # import pdb; pdb.set_trace() ordered_steps_list = topological_sort(preprocessed_steps_with_dependencies_list, name_of_step_key, name_of_dependencies_key) return ordered_steps_list - - # TODO:edge cases: all steps have dependencies, no steps depending on each other, dependency on self, identical steps \ No newline at end of file + # TODO:edge cases: all steps have dependencies (cycle or deleted self-dependency), no steps depending on each other, dependency on self, identical steps \ No newline at end of file diff --git a/test/test_topological_sort.py b/test/test_topological_sort.py index e68b715..fa751a2 100644 --- a/test/test_topological_sort.py +++ b/test/test_topological_sort.py @@ -18,200 +18,365 @@ "list_empty_2": [], } -LIST_OF_EMPTY_DICTS = [INPUT_DICT_SINGLE_EMPTY_LIST_ATTR, INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS] - - -MWF_UUID_0 = "test_mwf_uuid_0" -MWF_UUID_1 = "test_mwf_uuid_1" -MWF_UUID_2 = "test_mwf_uuid_2" -MWF_UUID_3 = "test_mwf_uuid_3" -MWF_UUID_4 = "test_mwf_uuid_4" -MWF_UUID_5 = "test_mwf_uuid_5" -MWF_UUID_6 = "test_mwf_uuid_6" -MWF_UUID_7 = "test_mwf_uuid_7" -MWF_UUID_8 = "test_mwf_uuid_8" -MWF_UUID_9 = "test_mwf_uuid_9" -MWF_NAME_A = "A" -MWF_NAME_B = "B" -MWF_NAME_C = "C" -MWF_NAME_D = "D" -MWF_NAME_E = "E" -MWF_NAME_F = "F" -MWF_NAME_G = "G" -MWF_NAME_H = "H" -MWF_NAME_I = "I" -MWF_NAME_J = "J" - -SIMPLE_META_WORKFLOW_DICT_0 = { - "meta_workflow": MWF_UUID_0, - "name": MWF_NAME_A -} -SIMPLE_META_WORKFLOW_DICT_1 = { - "meta_workflow": MWF_UUID_1, - "name": MWF_NAME_B -} -SIMPLE_META_WORKFLOW_DICT_2 = { - "meta_workflow": MWF_UUID_2, - "name": MWF_NAME_C -} -SIMPLE_META_WORKFLOW_DICT_3 = { - "meta_workflow": MWF_UUID_3, - "name": MWF_NAME_D -} -SIMPLE_META_WORKFLOW_DICT_4 = { - "meta_workflow": MWF_UUID_4, - "name": MWF_NAME_E -} -SIMPLE_META_WORKFLOW_DICT_5 = { - "meta_workflow": MWF_UUID_5, - "name": MWF_NAME_F -} -SIMPLE_META_WORKFLOW_DICT_6 = { - "meta_workflow": MWF_UUID_6, - "name": MWF_NAME_G -} -SIMPLE_META_WORKFLOW_DICT_7 = { - "meta_workflow": MWF_UUID_7, - "name": MWF_NAME_H -} -SIMPLE_META_WORKFLOW_DICT_8 = { - "meta_workflow": MWF_UUID_8, - "name": MWF_NAME_I -} -SIMPLE_META_WORKFLOW_DICT_9 = { - "meta_workflow": MWF_UUID_9, - "name": MWF_NAME_J -} +LIST_OF_DICTS_EMPTY_LIST_ATTR = [INPUT_DICT_SINGLE_EMPTY_LIST_ATTR, INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS] + +# of the form (mwf_uuid, mwf_name) +# used for factory fixture to generate lists of dicts +MWF_A = ["test_mwf_uuid_0", "A"] +MWF_B = ["test_mwf_uuid_1", "B"] +MWF_C = ["test_mwf_uuid_2", "C"] +MWF_D = ["test_mwf_uuid_3", "D"] +MWF_E = ["test_mwf_uuid_4", "E"] +MWF_F = ["test_mwf_uuid_5", "F"] +MWF_G = ["test_mwf_uuid_6", "G"] +MWF_H = ["test_mwf_uuid_7", "H"] +MWF_I = ["test_mwf_uuid_8", "I"] +MWF_J = ["test_mwf_uuid_9", "J"] + +A = ["A"] +B = ["B"] +C = ["C"] +D = ["D"] +E = ["E"] +F = ["F"] +G = ["G"] +H = ["H"] +I = ["I"] +J = ["J"] + +DEP_ON_A = [A] +DEP_ON_B = [B] +DEP_ON_C = [C] +DEP_ON_D = [D] +DEP_ON_E = [E] +DEP_ON_F = [F] +DEP_ON_G = [G] +DEP_ON_H = [H] +DEP_ON_I = [I] +DEP_ON_J = [J] +DEP_EMPTY = [[]] + +EXISTING_MWF_NAMES = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"] + +SIMPLE_MWF_ORDERED_ARRAY = [MWF_A, MWF_B, MWF_C, MWF_D, MWF_E, MWF_F, MWF_G, MWF_H, MWF_I, MWF_J] + +# a meta_workflow_dict generator of sorts +def meta_workflow_dict(simple_mwf_metadata_list): + mwf_dict = { + "meta_workflow": simple_mwf_metadata_list[0], + "name": simple_mwf_metadata_list[1] + } + if len(simple_mwf_metadata_list) == 3: + mwf_dict["dependencies"] = simple_mwf_metadata_list[2] + return mwf_dict + +@pytest.fixture +def list_of_dicts(): + def _create_list(array_of_mwf): + created_list = [] + for simple_mwf_metadata_list in array_of_mwf: + created_list.append(meta_workflow_dict(simple_mwf_metadata_list)) + return created_list + return _create_list + +#TODO: dawg idk how to draw these +# DAGs (directed acyclic graphs, can be typologically sorted) +# ----------------------------------------------------------- +# DAG_0 +# A B -----> C +DEPENDENCIES_DAG_0 = [[], [], B] + +# DAG_1 +# B -----> D +# | ⋀ ⋀ +# | / | +# ⋁ / | +# A <----- C +DEPENDENCIES_DAG_1 = [B+C, [], [], A+B+C] + +# DAG_2 +# E -----> C +# | | +# | | +# ⋁ ⋁ +# A -----> D +# ⋀ | +# | | +# | ⋁ +# F -----> B +DEPENDENCIES_DAG_2 = [E+F, D+F, E, C, [], []] -# META_WORKFLOWS_ARRAY_SINGLE_ITEM = [SIMPLE_META_WORKFLOW_DICT_0] -# SINGLE_ITEM_META_WORKFLOWS_DICT = {"meta_workflows": META_WORKFLOWS_ARRAY_SINGLE_ITEM} +# DAG_3 +# A -----> C ------> F +# | / | +# | / | +# ⋁ ⋁ ⋁ +# B ---------> E ---> D +# \ ⋀ +# \ __________/ +# ⋁ / +# G +DEPENDENCIES_DAG_3 = [[], A, A, G+E+F, B+F, C, B] -# META_WORKFLOWS_ARRAY_SEVERAL_ITEMS = [SIMPLE_META_WORKFLOW_DICT_0, SIMPLE_META_WORKFLOW_DICT_1, SIMPLE_META_WORKFLOW_DICT_2] -# SEVERAL_ITEMS_META_WORKFLOWS_DICT = {"meta_workflows": META_WORKFLOWS_ARRAY_SEVERAL_ITEMS} +# DAG_4 +# A ----> C ----> F +# ⋀ +# / +# / +# B ------> D -----> G ----> H +# \ ⋀ +# \ __________/ +# ⋁ / +# E +DEPENDENCIES_DAG_4 = [[], [], A, B, B, C+D, E+D, G] -# META_WORKFLOWS_ARRAY_SEVERAL_ITEMS_W_DUPLICATES = [SIMPLE_META_WORKFLOW_DICT_0, SIMPLE_META_WORKFLOW_DICT_2, SIMPLE_META_WORKFLOW_DICT_1, SIMPLE_META_WORKFLOW_DICT_2] -# SEVERAL_ITEMS_W_DUPLICATES_META_WORKFLOWS_DICT = {"meta_workflows": META_WORKFLOWS_ARRAY_SEVERAL_ITEMS_W_DUPLICATES} +# DAG_5 +# A -----> B -----> E +# | | +# | ⋁ +# | D +# | -> F +# ⋁ / +# C -----> H +# | \ J +# | \ ⋀ +# | \ | +# | ---> G -----> I +# | ⋀ +# |___________________| +DEPENDENCIES_DAG_5 = [[], A, A, B, B, C, C, C, C+G, G] -# EMPTY_META_WORKFLOWS_DICT = {"meta_workflows": []} + +# Cyclic graphs, cannot be typologically sorted +# ---------------------------------------------- +# CYCLIC_0 +# A B__ +# ⋀ \_____ +# | | +# | ⋁ +# D <----- C +DEPENDENCIES_CYCLIC_0 = [[], D, B, C] + +# CYCLIC_1 +# A -----> B +# ⋀ | +# | | +# | ⋁ +# D <----- C +DEPENDENCIES_CYCLIC_1 = [D, A, B, C] + +# CYCLIC_2 +# A -----> B ----> E +# ⋀ | ⋀ | +# | | \____| +# | ⋁ +# D <----- C +DEPENDENCIES_CYCLIC_2 = [D, A+E, B, C, B] + +# CYCLIC_3 +# B -----> A -----> D +# ⋀ | ⋀ | +# | | | | +# | | | ⋁ +# C <----- ------- E +DEPENDENCIES_CYCLIC_3 = [B+E, C, A, A, D] + +# CYCLIC_4 +# A -----> B -----> E +# | | +# | ⋁ +# | D +# | -> F +# ⋁ / +# C -----> H +# ⋀ \ J +# | \ ⋀ +# | \ | +# | ---> G -----> I +# | | +# |___________________| +DEPENDENCIES_CYCLIC_4 = [[], A, A+I, B, B, C, C, C, G, G] ################################################################# # Tests ################################################################# +class TestCheckPresenceOfKey: + @pytest.mark.parametrize( + "empty_list_of_dicts, key_to_check, return_value", + [ + ([], None, True), + ([], "key", True), # kind of weird edge case, but not a biggie (TODO:) + ( + LIST_OF_DICTS_EMPTY_LIST_ATTR, + "list_empty_0", + True, + ) + ], + ) + def test_check_presence_of_key_empty_dicts( + self, empty_list_of_dicts, key_to_check, return_value + ): + """ + Test for function checking that all dictionaries in a given list have the + specified key, no errors raised, with empty list or list of empty dicts. + """ + result = check_presence_of_key(empty_list_of_dicts, key_to_check) + assert result == return_value + + @pytest.mark.parametrize( + "array_of_mwf, key_to_check, return_value", + [ + ( + SIMPLE_MWF_ORDERED_ARRAY, + "name", + True, + ), + ( + [MWF_A, MWF_B, MWF_C], + "meta_workflow", + True, + ), + ( + [MWF_J, MWF_I, MWF_H], + "hi", + False, + ) + ], + ) + def test_check_presence_of_key( + self, list_of_dicts, array_of_mwf, key_to_check, return_value + ): + """ + Test for function checking that all dictionaries in a given list have the + specified key, no errors raised, regular cases. + """ + dict_list = list_of_dicts(array_of_mwf) + result = check_presence_of_key(dict_list, key_to_check) + assert result == return_value + result2 = check_presence_of_key(dict_list + LIST_OF_DICTS_EMPTY_LIST_ATTR, "list_empty_0") + assert result2 == False + + @pytest.mark.parametrize( - "list_of_dicts, key_to_check, return_value", + "array_of_mwf, return_value", [ - ([], None, True), - ([], "key", True), # kind of weird edge case, but not a biggie (TODO:) ( - LIST_OF_EMPTY_DICTS, - "list_empty_0", - True, + SIMPLE_MWF_ORDERED_ARRAY, + EXISTING_MWF_NAMES + ), + ( + [MWF_B, MWF_E, MWF_I, MWF_A], + ["B", "E", "I", "A"] ), - #TODO: come back to this and finish - # ( - # [ - # INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR, - # INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS, - # INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS, - # INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR_W_DUP, - # INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP, - # ], - # "list_simple_0", - # True, - # ), - # ( - # [INPUT_DICT_SINGLE_LIST_OF_DICTS], - # "list_of_dicts_0", - # False, - # ), - # ( - # [ - # INPUT_DICT_SINGLE_LIST_OF_DICTS, - # INPUT_DICT_SEVERAL_LISTS_OF_DICTS, - # INPUT_DICT_SINGLE_LIST_OF_DICTS_W_DUP, - # ], - # "list_of_dicts", - # False, - # ), + ( + [], + [] + ) ], ) -def test_check_presence_of_key( - list_of_dicts, key_to_check, return_value +def test_generate_ordered_step_name_list( + list_of_dicts, array_of_mwf, return_value ): """ - Test for function checking that all dictionaries in a given list have the - specified key, no errors raised. + Test for function creating a list of values for a given key, + using a list of dictionaries. """ - result = check_presence_of_key(list_of_dicts, key_to_check) + dict_list = list_of_dicts(array_of_mwf) + result = generate_ordered_step_name_list(dict_list, "name") assert result == return_value -# #TODO: will be generalizing this function later -# class TestSetDependencyListValues: -# @pytest.mark.parametrize( -# "list_of_dicts, name_of_step_key, name_of_dependencies_key, orig_lengths, reset_lengths", -# [ -# ([INPUT_DICT_SINGLE_SIMPLE_ATTR], ["list_empty_0"], [0], [0]), -# ( -# CLASSTESTER_OBJ_SEVERAL_EMPTY_LIST_ATTRS, -# ["list_empty_0", "list_empty_1", "list_empty_2"], -# [0, 0, 0], -# [0, 0, 0], -# ), -# (CLASSTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR, ["list_simple_0"], [3], [3]), -# ( -# CLASSTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS, -# ["list_simple_0", "list_simple_1"], -# [3, 3], -# [3, 3], -# ), -# (CLASSTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR_W_DUP, ["list_simple_0"], [5], [4]), -# ( -# CLASSTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP, -# ["list_simple_0", "list_simple_2", "list_simple_1"], -# [4, 3, 3], -# [3, 2, 3], -# ), -# (CLASSTESTER_OBJ_SINGLE_LIST_OF_DICTS, ["list_of_dicts"], [3], [3]), -# ( -# CLASSTESTER_OBJ_SEVERAL_LISTS_OF_DICTS, -# ["list_of_dicts_1", "list_of_dicts_0"], -# [3, 3], -# [3, 3], -# ), -# (CLASSTESTER_OBJ_SINGLE_LIST_OF_DICTS_W_DUP, ["list_of_dicts"], [6], [3]), -# ( -# CLASSTESTER_OBJ_SEVERAL_LISTS_OF_DICTS_W_DUP, -# [ -# "list_of_dicts_1", -# "list_of_dicts_0", -# "list_of_dicts_2", -# "list_of_dicts_3", -# ], -# [3, 3, 5, 3], -# [3, 3, 2, 1], -# ), -# ], -# ) -# def test_set_list_attributes_of_existing_list_attributes( -# self, input_object, attributes_to_set, orig_lengths, reset_lengths -# ): -# """ -# Test for function that gets rid of duplicates within object attributes that are lists, -# or sets attributes to empty list if not present within the object. -# Cases where the attributes to set are existent and are lists. -# """ -# # import pdb; pdb.set_trace() -# # check original length of attributes_to_set -# for idx, attribute in enumerate(attributes_to_set): -# assert len(getattr(input_object, attribute)) == orig_lengths[idx] - -# result = set_list_attributes(input_object, attributes_to_set) - -# # check length of "reset" attributes_to_set -# for idx, attribute in enumerate(attributes_to_set): -# assert len(getattr(input_object, attribute)) == reset_lengths[idx] - -# assert result == None +#TODO: will be generalizing this function later +class TestSetDependencyListValues: + @pytest.mark.parametrize( + "array_of_mwf, orig_dependencies, reset_dependencies", + [ + ( + [MWF_A + DEP_ON_C, MWF_B + DEP_ON_A, MWF_C + DEP_ON_A], + [C, A, A], + [C, A, A] + ), + ( + [MWF_A + DEP_ON_C, MWF_B + DEP_ON_A, MWF_C + DEP_ON_D], + [C, A, D], + [C, A, []] + ), + ( + [MWF_A + DEP_ON_G, MWF_B + DEP_ON_A, MWF_C + DEP_ON_D], + [G, A, D], + [[], A, []] + ), + ( + [MWF_A + [["B", "A"]], MWF_B + DEP_ON_A], + [["B", "A"], A], + [B, A] + ), + ( + [MWF_A + DEP_ON_C, MWF_B + DEP_ON_A, MWF_C + [["A", "C", "A"]]], + [C, A, ["A", "C", "A"]], + [C, A, A] + ), + ( + [MWF_A + DEP_EMPTY], + DEP_EMPTY, + DEP_EMPTY + ) + ], + ) + def test_set_dependency_list_values_of_existing_dependencies( + self, list_of_dicts, array_of_mwf, orig_dependencies, reset_dependencies + ): + """ + Test for function that gets rid of duplicates within object attributes that are lists, + or sets attributes to empty list if not present within the object. + Cases where the dependency lists are existent. + """ + orig_dict_list = list_of_dicts(array_of_mwf) + existing_step_names = generate_ordered_step_name_list(orig_dict_list, "name") + + reset_dict_list = set_dependency_list_values(orig_dict_list, "name", "dependencies", existing_step_names) + + for idx, dictionary in enumerate(orig_dict_list): + assert dictionary["dependencies"] == orig_dependencies[idx] + + for idx, dictionary in enumerate(reset_dict_list): + assert dictionary["dependencies"] == reset_dependencies[idx] + + +# TODO: dependencies originally not there --> create new dependencies list + # non-list dependencies + # case of [] no dicts at all + @pytest.mark.parametrize( + "array_of_mwf, idx_without_dependencies", + [ + (SIMPLE_MWF_ORDERED_ARRAY, [*range(0, 10)]), + ([MWF_A + ["hi"], MWF_B + DEP_ON_A], [0]), + ([MWF_A + ["hi"], MWF_B], [0, 1]) + ], + ) + def test_set_dependency_list_values_of_non_existing_dependencies( + self, list_of_dicts, array_of_mwf, idx_without_dependencies + ): + """ + Test for function that gets rid of duplicates within object attributes that are lists, + or sets attributes to empty list if not present within the object. + Cases where the dependency lists are non-existent or not of type list. + """ + orig_dict_list = list_of_dicts(array_of_mwf) + existing_step_names = generate_ordered_step_name_list(orig_dict_list, "name") + + reset_dict_list = set_dependency_list_values(orig_dict_list, "name", "dependencies", existing_step_names) + + for idx in idx_without_dependencies: + try: + dependencies_value = orig_dict_list[idx]["dependencies"] + assert isinstance(dependencies_value, list) == False + except KeyError: + pass # dicts at these indices originally didn't have dependencies attr + + # and assert that they were reset + assert reset_dict_list[idx]["dependencies"] == [] + + #TODO: add a test with a mix of the above two? or just assume it works (it does) \ No newline at end of file diff --git a/test/test_utils_magma.py b/test/test_utils_magma.py index 9ea64a9..68e8188 100644 --- a/test/test_utils_magma.py +++ b/test/test_utils_magma.py @@ -138,8 +138,10 @@ def __init__(self, input_dict): (None, type(None), True), (None, object, True), ("a", str, True), + ("a", list, False), ("a", object, True), ("test", str, True), + ("test", list, False), ("test", object, True), ((1, 2), tuple, True), ((1, 2), object, True), From 0ddf3d186fc5137f014cb22955c950169125188f Mon Sep 17 00:00:00 2001 From: vstevensf Date: Thu, 17 Nov 2022 09:39:21 -0500 Subject: [PATCH 13/38] Completed first draft of completed topological sort tests. Addressed some prior comments. Future planned changes include creating a class for the topological sort function to decrease the number of command line arguments that are common among several functions. --- magma/topological_sort.py | 98 ++++++------ test/test_topological_sort.py | 275 +++++++++++++++++++++++++++++----- 2 files changed, 290 insertions(+), 83 deletions(-) diff --git a/magma/topological_sort.py b/magma/topological_sort.py index bed2b5e..079d923 100644 --- a/magma/topological_sort.py +++ b/magma/topological_sort.py @@ -10,6 +10,7 @@ # Functions ################################################ +#TODO: don't make this part of the class, but rather generalized fxn for dcic_utils? def check_presence_of_key(list_of_dicts, key_to_check=None): """ Takes in a list of dictionaries and a list of keys, checks that those keys @@ -33,97 +34,97 @@ def check_presence_of_key(list_of_dicts, key_to_check=None): return True -def generate_ordered_step_name_list(list_of_dicts, name_of_step_key): +def generate_ordered_step_name_list(steps_with_dependencies, step_key): """ Based on a list of dictionaries (representing a list of steps) with a "name" key for each dictionary, return a list of the names of each dictionary with indices corresponding to the indices of the dictionaries themselves (same order). """ names = [] - for dictionary in list_of_dicts: - names.append(dictionary[name_of_step_key]) + for step_with_dependency in steps_with_dependencies: + names.append(step_with_dependency[step_key]) return names #TODO: could make this more general... -def set_dependency_list_values(list_of_dicts, name_of_step_key, name_of_dependencies_key, existing_steps_list): +def set_dependency_list_values(steps_with_dependencies, step_key, dependencies_key, existing_steps_list): """ Checks for dependency key within each dictionary in list_of_dicts. If not present, add that key and set value as empty list. Else, remove duplicates and self-dependencies. - :param list_of_dicts: list of dictionaries that should hold a dependency list. Each + :param steps_with_dependencies: list of dictionaries that should hold a dependency list. Each dictionary corresponds to a step, and the list represents a "list of steps" with dependencies between them. - :type list_of_dicts: list[dict] - :param name_of_step_key: name of the key corresponding to the step's name – + :type steps_with_dependencies: list[dict] + :param step_key: name of the key corresponding to the step's name – i.e. attribute referred to by dependency values - :type name_of_step_key: str - :param name_of_dependencies_key: name of the key corresponding to the dependencies list - :type name_of_dependencies_key: str + :type step_key: str + :param dependencies_key: name of the key corresponding to the dependencies list + :type dependencies_key: str :return: a copy of list_of_dicts with appropriate dependency lists set :rtype: list[dict] """ - list_of_dicts_copy = deepcopy(list_of_dicts) #TODO: make sure original doesnt change in test + steps_with_dependencies_copy = deepcopy(steps_with_dependencies) #TODO: make sure original doesnt change in test # iterate through list of dicts and set dependencies key-value pair - for dictionary in list_of_dicts_copy: + for step_with_dependency in steps_with_dependencies_copy: # add empty dependency list if not present - if not name_of_dependencies_key in dictionary: - dictionary[name_of_dependencies_key] = [] + if not dependencies_key in step_with_dependency: + step_with_dependency[dependencies_key] = [] continue #TODO: do some renaming of this function to follow pattern of obj vs dict key setting? # get rid of duplicates # I choose this method for generalization, in the case that dependencies is # a list of dictionaries, which are an unhashable type - dependencies = dictionary[name_of_dependencies_key] + dependencies = step_with_dependency[dependencies_key] # check this is indeed a list if not check_variable_type(dependencies, list): - dictionary[name_of_dependencies_key] = [] + step_with_dependency[dependencies_key] = [] continue #TODO: throw exception here instead of resetting value # get rid of duplicates and self-dependencies # and each dependency is in fact a name of another metaworkflow non_duplicated_dependencies = [] - dictionary_name = dictionary[name_of_step_key] + step_with_dependency_name = step_with_dependency[step_key] for dependency in dependencies: - if (dependency not in non_duplicated_dependencies) and (dependency != dictionary_name) and (dependency in existing_steps_list): + if (dependency not in non_duplicated_dependencies) and (dependency != step_with_dependency_name) and (dependency in existing_steps_list): non_duplicated_dependencies.append(dependency) #TODO: throw exception for self dependencies, duplicates, or nonexistent names? - dictionary[name_of_dependencies_key] = non_duplicated_dependencies + step_with_dependency[dependencies_key] = non_duplicated_dependencies # dictionary["steps_after"] = [] - return list_of_dicts_copy + return steps_with_dependencies_copy -def find_index_with_given_step_name(steps_with_dependencies_list, name_of_step_key, name): +def find_step_with_given_name(steps_with_dependencies_list, step_key, name): for index, step in enumerate(steps_with_dependencies_list): - if step[name_of_step_key] == name: + if step[step_key] == name: return index, step + raise Exception(f"Node named {name} is a nonexistent step") -def topological_sort_DFS_helper(graph, curr_node, curr_idx, name_of_node_key, name_of_dependencies_key, visited_temporary, visited_permanent, queue): +def topological_sort_dfs_helper(graph, curr_node, curr_idx, node_name_key, dependencies_key, visited_temporary, visited_permanent, queue): if visited_permanent[curr_idx]: return queue if visited_temporary[curr_idx]: - raise Exception("cycle in graph!: node " + curr_node[name_of_node_key]) + raise Exception(f"Cycle in graph: node {curr_node[node_name_key]}") visited_temporary[curr_idx] = True - for previous_step in curr_node[name_of_dependencies_key]: + for previous_step_name in curr_node[dependencies_key]: #TODO: can't have duplicates in names with this method! - idx_previous_node, previous_node = find_index_with_given_step_name(graph, name_of_node_key, previous_step) - topological_sort_DFS_helper(graph, previous_node, idx_previous_node, name_of_node_key, name_of_dependencies_key, visited_temporary, visited_permanent, queue) + idx_previous_node, previous_node = find_step_with_given_name(graph, node_name_key, previous_step_name) + topological_sort_dfs_helper(graph, previous_node, idx_previous_node, node_name_key, dependencies_key, visited_temporary, visited_permanent, queue) visited_temporary[curr_idx] = False visited_permanent[curr_idx] = True queue.append(curr_node) return queue - # TODO: for test, can check that there are no duplicates in returned queue -def topological_sort(graph, name_of_node_key, name_of_dependencies_key): +def topological_sort(graph, node_name_key, dependencies_key): """ - DFS algorithm from wikipedia https://en.wikipedia.org/wiki/Topological_sorting + Depth-first search algorithm from wikipedia https://en.wikipedia.org/wiki/Topological_sorting Logic based on topological sort of directed graph from https://www.geeksforgeeks.org/topological-sorting/ TODO: finish this docstring l8r Time complexity = O(V+E), where V = # vertices/nodes (steps), E = # edges (in directed graph, dependencies) @@ -131,7 +132,9 @@ def topological_sort(graph, name_of_node_key, name_of_dependencies_key): So, in detail, just do a topological sort and get the queue of the results. Then as you pop from the final queue and push to your result vector/array, check all the adjacent nodes of the last popped item and if the adjacent node exists in the vector then it's a cycle (if A goes to B then B should not precede A in the topological ordering). - ASSUMPTOPN: no self loops (i deletd them) + + - an assumption: no self-loops (they were previously deleted) -- but should detect cycles + in those cases anyway pseudocode from wikipedia: L ← Empty list that will contain the sorted nodes @@ -157,21 +160,18 @@ def topological_sort(graph, name_of_node_key, name_of_dependencies_key): num_steps = len(graph) visited_temporary = [False]*num_steps visited_permanent = [False]*num_steps - queue = [] #First In First Out + queue = [] # First In First Out while not all((element == True) for element in visited_permanent): - curr_idx = visited_permanent.index(False) + curr_idx = visited_permanent.index(False) # extract an index of a node that hasn't been visited yet curr_node = graph[curr_idx] #calling recursive helper function - queue = topological_sort_DFS_helper(graph, curr_node, curr_idx, name_of_node_key, name_of_dependencies_key, visited_temporary, visited_permanent, queue) - - #TODO: remove the following, just for testing - for element in queue: - print(element["name"]) + queue = topological_sort_dfs_helper(graph, curr_node, curr_idx, node_name_key, dependencies_key, visited_temporary, visited_permanent, queue) return queue + # TODO: for test, can check that there are no duplicates in returned queue -def generate_ordered_steps_list(steps_with_dependencies_list, name_of_step_key, name_of_dependencies_key): +def generate_ordered_steps_list(steps_with_dependencies_list, step_key, dependencies_key): """ Takes in list of steps and reorders based on dependencies, returning a separate copy of a reordered list. @@ -181,34 +181,34 @@ def generate_ordered_steps_list(steps_with_dependencies_list, name_of_step_key, :param steps_with_dependencies_list: list of dictionaries, where each dictionary has at least a step name :type steps_with_dependencies_list: list[dict] - :param name_of_step_key: name of the key corresponding to the step's name – + :param step_key: name of the key corresponding to the step's name – i.e. attribute referred to by dependency values - :type name_of_step_key: str - :param name_of_dependencies_key: name of the key corresponding to the dependencies list - :type name_of_dependencies_key: str + :type step_key: str + :param dependencies_key: name of the key corresponding to the dependencies list + :type dependencies_key: str :return: a copy of the reordered list (if possible) :rtype: list[dict] TODO: add errors and exceptions possibly thrown """ - # check that all objects in steps_with_dependencies_list have name_of_step_key - if not check_presence_of_key(steps_with_dependencies_list, name_of_step_key): - raise Exception("All dictionary elements in steps_with_dependencies_list must have attribute \"{0}\"".format(name_of_step_key)) + # check that all objects in steps_with_dependencies_list have step_key + if not check_presence_of_key(steps_with_dependencies_list, step_key): + raise Exception("All dictionary elements in steps_with_dependencies_list must have attribute \"{0}\"".format(step_key)) # TODO: feel like this is overkill, but checking for duplicates in steps_with_dependencies_list? # is there any case where the exact same step will be defined?? i think gets tricky with sharding maybe? idk ### List reordering based on dependencies ### - names = generate_ordered_step_name_list(steps_with_dependencies_list, name_of_step_key) + names = generate_ordered_step_name_list(steps_with_dependencies_list, step_key) ## Preprocessing of dependencies lists # add dependencies attribute if not present, remove duplicates from dependencies, # and check for self dependencies - preprocessed_steps_with_dependencies_list = set_dependency_list_values(steps_with_dependencies_list, name_of_step_key, name_of_dependencies_key, names) + preprocessed_steps_with_dependencies_list = set_dependency_list_values(steps_with_dependencies_list, step_key, dependencies_key, names) # import pdb; pdb.set_trace() - ordered_steps_list = topological_sort(preprocessed_steps_with_dependencies_list, name_of_step_key, name_of_dependencies_key) + ordered_steps_list = topological_sort(preprocessed_steps_with_dependencies_list, step_key, dependencies_key) return ordered_steps_list diff --git a/test/test_topological_sort.py b/test/test_topological_sort.py index fa751a2..35b039e 100644 --- a/test/test_topological_sort.py +++ b/test/test_topological_sort.py @@ -21,7 +21,7 @@ LIST_OF_DICTS_EMPTY_LIST_ATTR = [INPUT_DICT_SINGLE_EMPTY_LIST_ATTR, INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS] # of the form (mwf_uuid, mwf_name) -# used for factory fixture to generate lists of dicts +# used for factory fixture to generate lists of dicts (steps with dependencies array) MWF_A = ["test_mwf_uuid_0", "A"] MWF_B = ["test_mwf_uuid_1", "B"] MWF_C = ["test_mwf_uuid_2", "C"] @@ -58,7 +58,23 @@ EXISTING_MWF_NAMES = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"] -SIMPLE_MWF_ORDERED_ARRAY = [MWF_A, MWF_B, MWF_C, MWF_D, MWF_E, MWF_F, MWF_G, MWF_H, MWF_I, MWF_J] +THREE_MWF = [MWF_A, MWF_B, MWF_C] +FOUR_MWF = [MWF_A, MWF_B, MWF_C, MWF_D] +FIVE_MWF = [MWF_A, MWF_B, MWF_C, MWF_D, MWF_E] +SIX_MWF = [MWF_A, MWF_B, MWF_C, MWF_D, MWF_E, MWF_F] +SEVEN_MWF = [MWF_A, MWF_B, MWF_C, MWF_D, MWF_E, MWF_F, MWF_G] +EIGHT_MWF = [MWF_A, MWF_B, MWF_C, MWF_D, MWF_E, MWF_F, MWF_G, MWF_H] +TEN_MWF = [MWF_A, MWF_B, MWF_C, MWF_D, MWF_E, MWF_F, MWF_G, MWF_H, MWF_I, MWF_J] + + +#TODO: add docstring of what this does -- for constructing testing graphs +def construct_array_of_mwf(mwf_metadata_list, dependencies_list): + length = len(mwf_metadata_list) + array_of_mwf = [] + for idx in range(length): + array_of_mwf.append(mwf_metadata_list[idx] + dependencies_list[idx]) + return array_of_mwf + # a meta_workflow_dict generator of sorts def meta_workflow_dict(simple_mwf_metadata_list): @@ -79,12 +95,27 @@ def _create_list(array_of_mwf): return created_list return _create_list +# had to make this fixture because can't use set() method to +# de-duplicate a list of dicts (an unhashable type) +@pytest.fixture +def non_duplicated_array(): + def _non_duplicate_array_creation(arr): + non_dup_arr = [] + for item in arr: + if item not in non_dup_arr: + non_dup_arr.append(item) + return non_dup_arr + return _non_duplicate_array_creation + #TODO: dawg idk how to draw these # DAGs (directed acyclic graphs, can be typologically sorted) +# TODO: briefly explain how the dependency arrays work +# and are used for construction of steps with dependencies mwf array # ----------------------------------------------------------- # DAG_0 # A B -----> C -DEPENDENCIES_DAG_0 = [[], [], B] +DEPENDENCIES_DAG_0 = [DEP_EMPTY, DEP_EMPTY, DEP_ON_B] +DAG_0 = construct_array_of_mwf(THREE_MWF, DEPENDENCIES_DAG_0) # DAG_1 # B -----> D @@ -92,7 +123,11 @@ def _create_list(array_of_mwf): # | / | # ⋁ / | # A <----- C -DEPENDENCIES_DAG_1 = [B+C, [], [], A+B+C] + +#TODO: do something about this nesting of different variables -- consider helper fxn? +#might make it even more confusing to do that though +DEPENDENCIES_DAG_1 = [[B+C], DEP_EMPTY, DEP_EMPTY, [A+B+C]] +DAG_1 = construct_array_of_mwf(FOUR_MWF, DEPENDENCIES_DAG_1) # DAG_2 # E -----> C @@ -104,7 +139,8 @@ def _create_list(array_of_mwf): # | | # | ⋁ # F -----> B -DEPENDENCIES_DAG_2 = [E+F, D+F, E, C, [], []] +DEPENDENCIES_DAG_2 = [[E+F], [D+F], DEP_ON_E, DEP_ON_C, DEP_EMPTY, DEP_EMPTY] +DAG_2 = construct_array_of_mwf(SIX_MWF, DEPENDENCIES_DAG_2) # DAG_3 # A -----> C ------> F @@ -116,7 +152,8 @@ def _create_list(array_of_mwf): # \ __________/ # ⋁ / # G -DEPENDENCIES_DAG_3 = [[], A, A, G+E+F, B+F, C, B] +DEPENDENCIES_DAG_3 = [DEP_EMPTY, DEP_ON_A, DEP_ON_A, [G+E+F], [B+F], DEP_ON_C, DEP_ON_B] +DAG_3 = construct_array_of_mwf(SEVEN_MWF, DEPENDENCIES_DAG_3) # DAG_4 # A ----> C ----> F @@ -128,7 +165,8 @@ def _create_list(array_of_mwf): # \ __________/ # ⋁ / # E -DEPENDENCIES_DAG_4 = [[], [], A, B, B, C+D, E+D, G] +DEPENDENCIES_DAG_4 = [DEP_EMPTY, DEP_EMPTY, DEP_ON_A, DEP_ON_B, DEP_ON_B, [C+D], [E+D], DEP_ON_G] +DAG_4 = construct_array_of_mwf(EIGHT_MWF, DEPENDENCIES_DAG_4) # DAG_5 # A -----> B -----> E @@ -144,7 +182,8 @@ def _create_list(array_of_mwf): # | ---> G -----> I # | ⋀ # |___________________| -DEPENDENCIES_DAG_5 = [[], A, A, B, B, C, C, C, C+G, G] +DEPENDENCIES_DAG_5 = [DEP_EMPTY, DEP_ON_A, DEP_ON_A, DEP_ON_B, DEP_ON_B, DEP_ON_C, DEP_ON_C, DEP_ON_C, [C+G], DEP_ON_G] +DAG_5 = construct_array_of_mwf(TEN_MWF, DEPENDENCIES_DAG_5) # Cyclic graphs, cannot be typologically sorted @@ -155,7 +194,8 @@ def _create_list(array_of_mwf): # | | # | ⋁ # D <----- C -DEPENDENCIES_CYCLIC_0 = [[], D, B, C] +DEPENDENCIES_CYCLIC_0 = [DEP_EMPTY, DEP_ON_D, DEP_ON_B, DEP_ON_C] +CYCLIC_0 = construct_array_of_mwf(FOUR_MWF, DEPENDENCIES_CYCLIC_0) # CYCLIC_1 # A -----> B @@ -163,7 +203,8 @@ def _create_list(array_of_mwf): # | | # | ⋁ # D <----- C -DEPENDENCIES_CYCLIC_1 = [D, A, B, C] +DEPENDENCIES_CYCLIC_1 = [DEP_ON_D, DEP_ON_A, DEP_ON_B, DEP_ON_C] +CYCLIC_1 = construct_array_of_mwf(FOUR_MWF, DEPENDENCIES_CYCLIC_1) # CYCLIC_2 # A -----> B ----> E @@ -171,7 +212,8 @@ def _create_list(array_of_mwf): # | | \____| # | ⋁ # D <----- C -DEPENDENCIES_CYCLIC_2 = [D, A+E, B, C, B] +DEPENDENCIES_CYCLIC_2 = [DEP_ON_D, [A+E], DEP_ON_B, DEP_ON_C, DEP_ON_B] +CYCLIC_2 = construct_array_of_mwf(FIVE_MWF, DEPENDENCIES_CYCLIC_2) # CYCLIC_3 # B -----> A -----> D @@ -179,7 +221,8 @@ def _create_list(array_of_mwf): # | | | | # | | | ⋁ # C <----- ------- E -DEPENDENCIES_CYCLIC_3 = [B+E, C, A, A, D] +DEPENDENCIES_CYCLIC_3 = [[B+E], DEP_ON_C, DEP_ON_A, DEP_ON_A, DEP_ON_D] +CYCLIC_3 = construct_array_of_mwf(FIVE_MWF, DEPENDENCIES_CYCLIC_3) # CYCLIC_4 # A -----> B -----> E @@ -195,7 +238,8 @@ def _create_list(array_of_mwf): # | ---> G -----> I # | | # |___________________| -DEPENDENCIES_CYCLIC_4 = [[], A, A+I, B, B, C, C, C, G, G] +DEPENDENCIES_CYCLIC_4 = [DEP_EMPTY, DEP_ON_A, [A+I], DEP_ON_B, DEP_ON_B, DEP_ON_C, DEP_ON_C, DEP_ON_C, DEP_ON_G, DEP_ON_G] +CYCLIC_4 = construct_array_of_mwf(TEN_MWF, DEPENDENCIES_CYCLIC_4) ################################################################# @@ -204,7 +248,7 @@ def _create_list(array_of_mwf): class TestCheckPresenceOfKey: @pytest.mark.parametrize( - "empty_list_of_dicts, key_to_check, return_value", + "empty_list_of_dicts, key_to_check, expected_result", [ ([], None, True), ([], "key", True), # kind of weird edge case, but not a biggie (TODO:) @@ -216,25 +260,25 @@ class TestCheckPresenceOfKey: ], ) def test_check_presence_of_key_empty_dicts( - self, empty_list_of_dicts, key_to_check, return_value + self, empty_list_of_dicts, key_to_check, expected_result ): """ Test for function checking that all dictionaries in a given list have the specified key, no errors raised, with empty list or list of empty dicts. """ result = check_presence_of_key(empty_list_of_dicts, key_to_check) - assert result == return_value + assert result == expected_result @pytest.mark.parametrize( - "array_of_mwf, key_to_check, return_value", + "array_of_mwf, key_to_check, expected_result", [ ( - SIMPLE_MWF_ORDERED_ARRAY, + TEN_MWF, "name", True, ), ( - [MWF_A, MWF_B, MWF_C], + THREE_MWF, "meta_workflow", True, ), @@ -246,7 +290,7 @@ def test_check_presence_of_key_empty_dicts( ], ) def test_check_presence_of_key( - self, list_of_dicts, array_of_mwf, key_to_check, return_value + self, list_of_dicts, array_of_mwf, key_to_check, expected_result ): """ Test for function checking that all dictionaries in a given list have the @@ -254,21 +298,21 @@ def test_check_presence_of_key( """ dict_list = list_of_dicts(array_of_mwf) result = check_presence_of_key(dict_list, key_to_check) - assert result == return_value + assert result == expected_result result2 = check_presence_of_key(dict_list + LIST_OF_DICTS_EMPTY_LIST_ATTR, "list_empty_0") assert result2 == False @pytest.mark.parametrize( - "array_of_mwf, return_value", + "array_of_mwf, expected_result", [ ( - SIMPLE_MWF_ORDERED_ARRAY, + TEN_MWF, EXISTING_MWF_NAMES ), ( [MWF_B, MWF_E, MWF_I, MWF_A], - ["B", "E", "I", "A"] + ["B", "E", "I", "A"] # B+E+I+A ), ( [], @@ -277,7 +321,7 @@ def test_check_presence_of_key( ], ) def test_generate_ordered_step_name_list( - list_of_dicts, array_of_mwf, return_value + list_of_dicts, array_of_mwf, expected_result ): """ Test for function creating a list of values for a given key, @@ -285,40 +329,47 @@ def test_generate_ordered_step_name_list( """ dict_list = list_of_dicts(array_of_mwf) result = generate_ordered_step_name_list(dict_list, "name") - assert result == return_value + assert result == expected_result #TODO: will be generalizing this function later +#TODO: use your new tester helper function for constructing array_of_mwf class TestSetDependencyListValues: @pytest.mark.parametrize( "array_of_mwf, orig_dependencies, reset_dependencies", [ ( + # no changes made [MWF_A + DEP_ON_C, MWF_B + DEP_ON_A, MWF_C + DEP_ON_A], [C, A, A], [C, A, A] ), ( + # get rid of dependency on nonexistent step [MWF_A + DEP_ON_C, MWF_B + DEP_ON_A, MWF_C + DEP_ON_D], [C, A, D], [C, A, []] ), ( + # get rid of dependency on nonexistent steps [MWF_A + DEP_ON_G, MWF_B + DEP_ON_A, MWF_C + DEP_ON_D], [G, A, D], [[], A, []] ), ( - [MWF_A + [["B", "A"]], MWF_B + DEP_ON_A], - [["B", "A"], A], + # get rid of self-dependencies + [MWF_A + [B + A], MWF_B + DEP_ON_A], + [B + A, A], [B, A] ), ( - [MWF_A + DEP_ON_C, MWF_B + DEP_ON_A, MWF_C + [["A", "C", "A"]]], - [C, A, ["A", "C", "A"]], + # get rid of duplicate dependencies + [MWF_A + DEP_ON_C, MWF_B + DEP_ON_A, MWF_C + [A + C + A]], + [C, A, A + C + A], [C, A, A] ), ( + # no dependencies = no change, just set dependencies to empty list [MWF_A + DEP_EMPTY], DEP_EMPTY, DEP_EMPTY @@ -345,13 +396,13 @@ def test_set_dependency_list_values_of_existing_dependencies( assert dictionary["dependencies"] == reset_dependencies[idx] -# TODO: dependencies originally not there --> create new dependencies list + # TODO: dependencies originally not there --> create new dependencies list # non-list dependencies # case of [] no dicts at all @pytest.mark.parametrize( "array_of_mwf, idx_without_dependencies", [ - (SIMPLE_MWF_ORDERED_ARRAY, [*range(0, 10)]), + (TEN_MWF, [*range(0, 10)]), ([MWF_A + ["hi"], MWF_B + DEP_ON_A], [0]), ([MWF_A + ["hi"], MWF_B], [0, 1]) ], @@ -362,7 +413,9 @@ def test_set_dependency_list_values_of_non_existing_dependencies( """ Test for function that gets rid of duplicates within object attributes that are lists, or sets attributes to empty list if not present within the object. - Cases where the dependency lists are non-existent or not of type list. + Cases where the dependency lists are non-existent or not of type list, + so fxn should either set to empty list (non-existent dependencies) or + raise Key Error when dependencies is not of type list. """ orig_dict_list = list_of_dicts(array_of_mwf) existing_step_names = generate_ordered_step_name_list(orig_dict_list, "name") @@ -379,4 +432,158 @@ def test_set_dependency_list_values_of_non_existing_dependencies( # and assert that they were reset assert reset_dict_list[idx]["dependencies"] == [] - #TODO: add a test with a mix of the above two? or just assume it works (it does) \ No newline at end of file + #TODO: add a test with a mix of the above two? or just assume it works (it does) + +@pytest.mark.parametrize( + "array_of_mwf, name_to_search, expected_step, expected_index", + [ + ([MWF_A, MWF_B, MWF_C], "test_mwf_uuid_1", MWF_B, 1), + ([MWF_A, MWF_B, MWF_C], "test_mwf_uuid_6", MWF_B, 1) # raises Exception + ], +) +def test_find_step_with_given_name( + list_of_dicts, array_of_mwf, name_to_search, expected_step, expected_index +): + try: + steps_with_dependencies = list_of_dicts(array_of_mwf) + index, step = find_step_with_given_name(steps_with_dependencies, "meta_workflow", name_to_search) + except Exception as exception_info: + assert f"Node named {name_to_search} is a nonexistent step" == str(exception_info) + else: + assert index == expected_index + assert step == meta_workflow_dict(expected_step) + + +class TestTopologicalSortDFSHelper: + @pytest.mark.parametrize( + "dag_array_of_mwf, starting_idx, expected_queue_by_index", + [ + (DAG_0, 0, [0]), + (DAG_0, 1, [1]), + (DAG_0, 2, [1, 2]), + (DAG_1, 0, [1, 2, 0]), + (DAG_1, 3, [1, 2, 0, 3]), + (DAG_3, 0, [0]), + (DAG_3, 4, [0, 1, 2, 5, 4]), + (CYCLIC_0, 0, [0]) # won't detect cycles in disconnected graphs, but overall toposort will + ], + ) + def test_topological_sort_helper_no_cycles( + self, list_of_dicts, non_duplicated_array, dag_array_of_mwf, starting_idx, expected_queue_by_index + ): + graph = list_of_dicts(dag_array_of_mwf) + starting_node = graph[starting_idx] + starting_queue = [] + + # TODO: make this a fixture? + length = len(graph) + visited_temporary = [False]*length + visited_permanent = [False]*length + + #TODO: also make this a fixture? + expected_queue = [] + expected_visited_permanent = [False]*length + for i in expected_queue_by_index: + expected_queue.append(graph[i]) + expected_visited_permanent[i] = True + + #TODO: make global constants NAME and DEPENDENCIES keys + resulting_queue = topological_sort_dfs_helper(graph, starting_node, starting_idx, "name", "dependencies", visited_temporary, visited_permanent, starting_queue) + assert resulting_queue == expected_queue + assert visited_permanent == expected_visited_permanent + + # check that there are no duplicates in returned queue + non_dup_resulting_queue = non_duplicated_array(resulting_queue) + assert resulting_queue == non_dup_resulting_queue + + + @pytest.mark.parametrize( + "cyclic_graph_array_of_mwf, starting_idx, node_at_cycle_detection", + [ + (CYCLIC_0, 1, "B"), + (CYCLIC_2, 0, "A"), # just illustrating the nature of DFS w CYCLIC_2 + (CYCLIC_2, 1, "B"), + (CYCLIC_2, 4, "B"), + (CYCLIC_3, 4, "A"), + (CYCLIC_4, 6, "G") # same here + ], + ) + def test_topological_sort_helper_cycles( + self, list_of_dicts, cyclic_graph_array_of_mwf, starting_idx, node_at_cycle_detection + ): + graph = list_of_dicts(cyclic_graph_array_of_mwf) + starting_node = graph[starting_idx] + starting_queue = [] + + # TODO: make this a fixture? (same as prior test, also follow toposort tests) + length = len(graph) + visited_temporary = [False]*length + visited_permanent = [False]*length + + #TODO: make global constants NAME and DEPENDENCIES keys + with pytest.raises(Exception) as exception_info: + topological_sort_dfs_helper(graph, starting_node, starting_idx, "name", "dependencies", visited_temporary, visited_permanent, starting_queue) + assert f"Cycle in graph: node {node_at_cycle_detection}" in str(exception_info.value) + +# TODO: if you make topological sort a class, you can test that visited_permanent is all True +class TestTopologicalSort: + @pytest.mark.parametrize( + "dag_array_of_mwf, expected_queue_by_index", + [ + # TODO: illustrate with different starting indices, to show that + # there exist several valid orderings, based on DFS beginning node + # may make new DAGs w same dependencies but different ordering of the array + (DAG_0, [0, 1, 2]), + (DAG_1, [1, 2, 0, 3]), + (DAG_2, [4, 5, 0, 2, 3, 1]), + (DAG_3, [0, 1, 2, 6, 5, 4, 3]), + (DAG_4, [0, 1, 2, 3, 4, 5, 6, 7]), + (DAG_5, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + ], + ) + def test_topological_sort_no_cycles( + self, list_of_dicts, non_duplicated_array, dag_array_of_mwf, expected_queue_by_index + ): + graph = list_of_dicts(dag_array_of_mwf) + + # TODO: make this a fixture? (same as above tests) + length = len(graph) + + #TODO: also make this a fixture? + expected_queue = [] + for i in expected_queue_by_index: + expected_queue.append(graph[i]) + + #TODO: make global constants NAME and DEPENDENCIES keys + resulting_queue = topological_sort(graph, "name", "dependencies") + assert resulting_queue == expected_queue + + # assert that all nodes have indeed been visited + #TODO: add this when toposort has been made a class + # expected_visited_permanent = [True]*length + # assert visited_permanent == expected_visited_permanent + + # check that there are no duplicates in returned queue + non_dup_resulting_queue = non_duplicated_array(resulting_queue) + assert resulting_queue == non_dup_resulting_queue + + #TODO: again - maybe rearrange cyclic graph nodes to show it works in whatever order + @pytest.mark.parametrize( + "cyclic_graph_array_of_mwf, node_at_cycle_detection", + [ + (CYCLIC_0, "B"), + (CYCLIC_1, "A"), + (CYCLIC_2, "A"), + (CYCLIC_3, "A"), + (CYCLIC_4, "C") + ], + ) + def test_topological_sort_cycles( + self, list_of_dicts, cyclic_graph_array_of_mwf, node_at_cycle_detection + ): + graph = list_of_dicts(cyclic_graph_array_of_mwf) + + #TODO: make global constants NAME and DEPENDENCIES keys + with pytest.raises(Exception) as exception_info: + topological_sort(graph, "name", "dependencies") + assert f"Cycle in graph: node {node_at_cycle_detection}" in str(exception_info.value) \ No newline at end of file From c2231c2c330c6cb325267e684b1062dccca7410c Mon Sep 17 00:00:00 2001 From: vstevensf Date: Thu, 17 Nov 2022 09:40:40 -0500 Subject: [PATCH 14/38] Small changes to utils -- mainly variable naming. Also addressed some prior comments on draft. --- magma/utils.py | 2 +- test/test_utils_magma.py | 34 +++++++++++++++++----------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/magma/utils.py b/magma/utils.py index 4c2be17..b053b7c 100644 --- a/magma/utils.py +++ b/magma/utils.py @@ -70,7 +70,7 @@ def check_presence_of_attributes(input_object, attributes_to_check=None): raise ValueError("Object validation error, {0}\n" .format(e.args[0])) -def set_list_attributes(input_object, attributes_to_set=None): +def set_unique_list_attributes(input_object, attributes_to_set=None): """ Checks for given attribute(s) of type list, sets as empty list if not present, else sets that list attribute, without duplicates. diff --git a/test/test_utils_magma.py b/test/test_utils_magma.py index 68e8188..b91b7a1 100644 --- a/test/test_utils_magma.py +++ b/test/test_utils_magma.py @@ -127,7 +127,7 @@ def __init__(self, input_dict): ################################################################# @pytest.mark.parametrize( - "variable, intended_type, return_value", + "variable, intended_type, expected_result", [ (2, int, True), (-2, int, True), @@ -168,16 +168,16 @@ def __init__(self, input_dict): ({"hi": 1}, list, False), ], ) -def test_check_variable_type(variable, intended_type, return_value): +def test_check_variable_type(variable, intended_type, expected_result): """ Test for function checking if a variable is of a specified type. """ result = check_variable_type(variable, intended_type) - assert result == return_value + assert result == expected_result @pytest.mark.parametrize( - "list_to_check, intended_type, return_value", + "list_to_check, intended_type, expected_result", [ ([], str, True), ([], int, True), @@ -194,14 +194,14 @@ def test_check_variable_type(variable, intended_type, return_value): ], ) def test_check_list_elements_type_no_errors( - list_to_check, intended_type, return_value + list_to_check, intended_type, expected_result ): """ Test for function checking that all elements of a list are of a specified type, no errors raised. """ result = check_list_elements_type(list_to_check, intended_type) - assert result == return_value + assert result == expected_result class TestCheckPresenceOfAttributes: @@ -225,7 +225,7 @@ def test_check_presence_of_attributes_no_errors( no errors raised. """ result = check_presence_of_attributes(input_object, attributes_to_check) - assert result == None + assert result is None @pytest.mark.parametrize( "input_object, attributes_to_check", @@ -259,7 +259,7 @@ def test_check_presence_of_attributes_value_errors( assert "Object validation error" in str(value_err_info.value) -class TestSetListAttributes: +class TestSetUniqueListAttributes: @pytest.mark.parametrize( "input_object, attributes_to_set", [ @@ -272,7 +272,7 @@ class TestSetListAttributes: (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2", "test_1"]), ], ) - def test_set_list_attributes_of_existing_nonlist_attributes( + def test_set_unique_list_attributes_of_existing_nonlist_attributes( self, input_object, attributes_to_set ): """ @@ -281,8 +281,8 @@ def test_set_list_attributes_of_existing_nonlist_attributes( Cases where the attributes to set are existent and are NOT lists, no action done. """ original_object = deepcopy(input_object) - result = set_list_attributes(input_object, attributes_to_set) - assert result == None + result = set_unique_list_attributes(input_object, attributes_to_set) + assert result is None assert vars(input_object) == vars(original_object) # no attributes changed #TODO: double check the above "vars" functionality @@ -331,7 +331,7 @@ def test_set_list_attributes_of_existing_nonlist_attributes( ), ], ) - def test_set_list_attributes_of_existing_list_attributes( + def test_set_unique_list_attributes_of_existing_list_attributes( self, input_object, attributes_to_set, orig_lengths, reset_lengths ): """ @@ -344,13 +344,13 @@ def test_set_list_attributes_of_existing_list_attributes( for idx, attribute in enumerate(attributes_to_set): assert len(getattr(input_object, attribute)) == orig_lengths[idx] - result = set_list_attributes(input_object, attributes_to_set) + result = set_unique_list_attributes(input_object, attributes_to_set) # check length of "reset" attributes_to_set for idx, attribute in enumerate(attributes_to_set): assert len(getattr(input_object, attribute)) == reset_lengths[idx] - assert result == None + assert result is None @pytest.mark.parametrize( "input_object, attributes_to_set, num_added_attributes", @@ -363,7 +363,7 @@ def test_set_list_attributes_of_existing_list_attributes( (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2", "test_1"], 0), ], ) - def test_set_list_attributes_of_nonexistent_attributes( + def test_set_unique_list_attributes_of_nonexistent_attributes( self, input_object, attributes_to_set, num_added_attributes ): """ @@ -377,8 +377,8 @@ def test_set_list_attributes_of_nonexistent_attributes( original_attributes_set = set(dir(input_object)) num_original_attributes = len(original_attributes_set) - result = set_list_attributes(input_object, attributes_to_set) - assert result == None + result = set_unique_list_attributes(input_object, attributes_to_set) + assert result is None reset_attributes_set = set(dir(input_object)) num_reset_attributes = len(reset_attributes_set) From dae12299113e9efc4239318638075407e505ea99 Mon Sep 17 00:00:00 2001 From: vstevensf Date: Wed, 23 Nov 2022 14:16:21 -0500 Subject: [PATCH 15/38] Small change to test file for topological sort --- test/test_topological_sort.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_topological_sort.py b/test/test_topological_sort.py index 35b039e..4b5b2da 100644 --- a/test/test_topological_sort.py +++ b/test/test_topological_sort.py @@ -4,7 +4,6 @@ # Libraries ################################################################# import pytest -# from copy import deepcopy from magma.topological_sort import * @@ -20,7 +19,7 @@ LIST_OF_DICTS_EMPTY_LIST_ATTR = [INPUT_DICT_SINGLE_EMPTY_LIST_ATTR, INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS] -# of the form (mwf_uuid, mwf_name) +# of the form [mwf_uuid, mwf_name] # used for factory fixture to generate lists of dicts (steps with dependencies array) MWF_A = ["test_mwf_uuid_0", "A"] MWF_B = ["test_mwf_uuid_1", "B"] From 87952865a0aff2518892c14a03005d12cbe782e6 Mon Sep 17 00:00:00 2001 From: vstevensf Date: Mon, 28 Nov 2022 14:59:24 -0500 Subject: [PATCH 16/38] Finished validation of MWF handler and its corresponding pytests. --- magma/metawfl_handler.py | 124 ++++++++++-------- test/test_metawfl_handler.py | 240 +++++++++++++++++++++++++++++++++-- 2 files changed, 300 insertions(+), 64 deletions(-) diff --git a/magma/metawfl_handler.py b/magma/metawfl_handler.py index 3f35f3d..1df9001 100644 --- a/magma/metawfl_handler.py +++ b/magma/metawfl_handler.py @@ -8,65 +8,78 @@ ################################################ # TODO: functions for dcic utils -- move later ################################################ -from magma.utils import check_presence_of_attributes, set_list_attributes +from magma.utils import check_presence_of_attributes, set_unique_list_attributes from magma.topological_sort import generate_ordered_steps_list - -#TODO: make parent class maybe - ################################################ -# MetaWorkflowStep +# ValidatedDictionary ################################################ - -class MetaWorkflowStep(object): +class ValidatedDictionary(object): """ - Class to represent a MetaWorkflow object, - as a step within a MetaWorkflow Handler object + Parent class for MetaWorkflowStep and MetaWorkflowHandler classes. + Takes in an input dictionary, and validates basic attributes. """ def __init__(self, input_dict): """ Constructor method, initialize object and attributes. - :param input_dict: a MetaWorkflow step (object) and accompanying info within handler, defined by json file + :param input_dict: input dictionary, defined by json file, which defines basic attributes of this object :type input_dict: dict """ - ### Basic (non-calculated) attributes ### + # Set basic (non-calculated) attributes # for key in input_dict: setattr(self, key, input_dict[key]) - # Validate presence of basic attributes of this MetaWorkflow step - self._validate_basic_attributes() - - # Get rid of dependency duplicates -- TODO: already done within mwf? + def _validate_basic_attributes(self, list_of_attributes=None): + """ + Validation of the JSON input for this object. + Checks that given attributes are present in the created object. + """ + check_presence_of_attributes(self, list_of_attributes) - # Initialize Metaworkflow (magma (ff?)) using embedded call to metaworkflow LinkTo - #TODO: do in magma ff? - # metaworkflow_linkto = getattr(self, "meta_workflow") #TODO: embedding API -- how to test?? +################################################ +# MetaWorkflowStep +################################################ +class MetaWorkflowStep(ValidatedDictionary): + """ + Class to represent a MetaWorkflow object, + as a step within a MetaWorkflow Handler object + """ - #TODO: import and call magma mwf to initialize the mwf within the handler - # THEN check the dependencies - # also need to fill in the names for the mwfs + META_WORKFLOW_ATTR = "meta_workflow" + NAME_ATTR = "name" + DUP_FLAG_ATTR = "duplication_flag" + ITEMS_CREATION_PROP_TRACE = "items_for_creation_property_trace" + ITEMS_CREATION_UUID = "items_for_creation_uuid" + LIST_OF_ATTRS = [META_WORKFLOW_ATTR, NAME_ATTR, DUP_FLAG_ATTR] - #TODO: check that names of metaworkflow steps are unique -- also - # use setdefault for filling in names (in ff? or here?) -- rather, check circularity + def __init__(self, input_dict): + """ + Constructor method, initialize object and attributes. - #TODO: case where a metaworkflow is repeated downstream? does this ever happen? + :param input_dict: a MetaWorkflow step (object) and accompanying info within handler, defined by json file + :type input_dict: dict + """ + super().__init__(input_dict) + # Validate presence of basic attributes of this MetaWorkflow step + self._validate_basic_attributes(self.LIST_OF_ATTRS) - def _validate_basic_attributes(self): #TODO: create this as part of the utility function? + def _validate_basic_attributes(self, list_of_attributes): """ - Validation of the JSON input for the MetaWorkflow step - - Checks that necessary MetaWorkflow attributes are present for this MetaWorkflow step + Validation of the JSON input for the MetaWorkflow step. + Checks that necessary MetaWorkflow attributes are present for this MetaWorkflow step. """ - check_presence_of_attributes(self, ["meta_workflow", "name", "duplication_flag"]) + super()._validate_basic_attributes(list_of_attributes) # str, must be unique TODO: name filling in ff try: + #TODO: what about if both are present? UUID is taken as default for now # set None for [default] arg to not throw AttributeError - if getattr(self, "items_for_creation_property_trace", True): - getattr(self, "items_for_creation_uuid") + if not getattr(self, self.ITEMS_CREATION_UUID, None): + # import pdb; pdb.set_trace() + getattr(self, self.ITEMS_CREATION_PROP_TRACE) except AttributeError as e: raise ValueError("JSON validation error, {0}\n" .format(e.args[0])) @@ -76,12 +89,18 @@ def _validate_basic_attributes(self): #TODO: create this as part of the utility ################################################ # MetaWorkflowHandler ################################################ -class MetaWorkflowHandler(object): +class MetaWorkflowHandler(ValidatedDictionary): """ Class representing a MetaWorkflow Handler object, a list of MetaWorkflows with specified dependencies """ + UUID_ATTR = "uuid" + META_WORKFLOW_NAME_ATTR = "name" + META_WORKFLOW_DEPENDENCIES_ATTR = "dependencies" + META_WORKFLOWS_ATTR = "meta_workflows" + LIST_OF_ATTRS = [UUID_ATTR] + def __init__(self, input_dict): """ Constructor method, initialize object and attributes. @@ -89,43 +108,36 @@ def __init__(self, input_dict): :param input_dict: MetaWorkflow Handler object, defined by json file from portal :type input_dict: dict """ - ### Basic attributes ### - - for key in input_dict: - setattr(self, key, input_dict[key]) - - # Validate presence of basic attributes of this MetaWorkflow Handler - # - Required: project, institution TODO: taken care of in schema - # - Identifying: uuid, aliases, accession TODO: is this kinda the same as required? - # - Commonly present: title, name, description, meta_workflows (list) - # See cgap_portal meta_workflow_handler schema for more info. - check_presence_of_attributes(self, ["uuid"]) + super().__init__(input_dict) + + super()._validate_basic_attributes(self.LIST_OF_ATTRS) ### Calculated attributes ### - # to check for non-existent meta_workflows attribute self._set_meta_workflows_list() # order the meta_workflows list based on dependencies - ordered_meta_workflows = generate_ordered_steps_list(self.meta_workflows, "name", "dependencies") - self.ordered_meta_workflows = ordered_meta_workflows - #TODO: should i make this a new calculated attribute, rather than redefining? YES + # ordered_meta_workflows = generate_ordered_steps_list(self.meta_workflows, self.META_WORKFLOW_NAME_ATTR, self.META_WORKFLOW_DEPENDENCIES_ATTR) + self.ordered_meta_workflows = self._create_ordered_meta_workflows_list() - # create MetaWorkflow object for each metaworkflow step in meta_workflows - #TODO: do in magma-ff? because requires pulling metadata using UUID - #self.create_meta_workflow_steps() + # using ordered metaworkflows list, create a list of objects using class MetaWorkflowStep + # this validates basic attributes needed for each metaworkflow step + self.ordered_meta_workflow_steps = self._create_meta_workflow_step_objects() def _set_meta_workflows_list(self): """ Checks for meta_workflows attribute, gets rid of duplicates, else sets as empty list if not present - TODO: better to throw error if duplicates are present? """ - set_list_attributes(self, ["meta_workflows"]) + set_unique_list_attributes(self, [self.META_WORKFLOWS_ATTR]) - # def create_meta_workflow_steps(self): #TODO: in magma ff? - # meta_workflows_list = getattr(self, "meta_workflows") # list - # for meta_workflow_step in meta_workflows_list: + def _create_ordered_meta_workflows_list(self): + return generate_ordered_steps_list(self.meta_workflows, self.META_WORKFLOW_NAME_ATTR, self.META_WORKFLOW_DEPENDENCIES_ATTR) - #TODO: getting global input of first step ## getattr(self, 'input') # list + def _create_meta_workflow_step_objects(self): + meta_workflow_step_list = [] + for meta_workflow in self.ordered_meta_workflows: + meta_workflow_step_object = MetaWorkflowStep(meta_workflow) + meta_workflow_step_list.append(meta_workflow_step_object) + return meta_workflow_step_list diff --git a/test/test_metawfl_handler.py b/test/test_metawfl_handler.py index 13f23c9..6ae4fe4 100644 --- a/test/test_metawfl_handler.py +++ b/test/test_metawfl_handler.py @@ -2,16 +2,240 @@ # Libraries ################################################################# import pytest -import json +from copy import deepcopy -from magma import metawfl_handler as mwfh +from magma.metawfl_handler import MetaWorkflowStep, MetaWorkflowHandler -#TODO: how is the json object passed to magma? as list? or dict? +################################################################# +# Vars +################################################################# + +MWF_HANDLER_NAME = "test_mwf_handler" +PROJECT = "test_project" +INSTITUTION = "test_institution" +MWF_HANDLER_UUID = "test_mwf_handler_uuid" + +TESTER_UUID = "test_item_uuid" + +# basic meta_workflow steps (dicts) used in meta_workflows array +#TODO: for validation of basic attributes, what if the value of an attribute is None? +# e.g. name or meta_workflow in metaworkflowstep? (because my helper function +# only checks that you can get the actual attribute, but getattr works still +# if the value is None) +MWF_A = {"meta_workflow": "test_mwf_uuid_0", "name": "A"} +MWF_B = {"meta_workflow": "test_mwf_uuid_1", "name": "B"} +MWF_C = {"meta_workflow": "test_mwf_uuid_2", "name": "C"} +MWF_D = {"meta_workflow": "test_mwf_uuid_3", "name": "D"} + +DEP_ON_A = ["A"] +DEP_ON_B = ["B"] +DEP_ON_C = ["C"] +DEP_ON_D = ["D"] + +def mwf_with_added_attrs(metaworkflow_dict, items_for_creation_property_trace=None, items_for_creation_uuid=None, dependencies=None, duplication_flag=None): + dict_copy = deepcopy(metaworkflow_dict) + if items_for_creation_property_trace: + dict_copy["items_for_creation_property_trace"] = items_for_creation_property_trace + if items_for_creation_uuid: + dict_copy["items_for_creation_uuid"] = items_for_creation_uuid + if dependencies is not None: + dict_copy["dependencies"] = dependencies + if duplication_flag is not None: + dict_copy["duplication_flag"] = duplication_flag + return dict_copy + + + +# meta_workflows arrays for MetaWorkflow Handler +# handler without uuid -- fails validation of basic attributes +HANDLER_WITHOUT_UUID = { + "name": MWF_HANDLER_NAME, + "project": PROJECT, + "institution": INSTITUTION +} + +# handler without metaworkflows array -- passes validation, should set empty metaworkflows array +HANDLER_WITHOUT_MWF_ARRAY = { + "name": MWF_HANDLER_NAME, + "project": PROJECT, + "institution": INSTITUTION, + "uuid": MWF_HANDLER_UUID +} + +# DAG_0 +# A B -----> C +MWF_A_DAG_0 = mwf_with_added_attrs(MWF_A, None, TESTER_UUID, [], True) +MWF_B_DAG_0 = mwf_with_added_attrs(MWF_B, None, TESTER_UUID, [], True) +MWF_C_DAG_0 = mwf_with_added_attrs(MWF_C, None, TESTER_UUID, DEP_ON_B, True) +DAG_0_MWF_ARRAY = [MWF_B_DAG_0, MWF_A_DAG_0, MWF_C_DAG_0] # purposely in this order to test toposort +HANDLER_DAG_0 = { + "name": MWF_HANDLER_NAME, + "project": PROJECT, + "institution": INSTITUTION, + "uuid": MWF_HANDLER_UUID, + "meta_workflows": DAG_0_MWF_ARRAY +} +DAG_0_MWF_ARRAY_W_DUPLICATES = [MWF_B_DAG_0, MWF_A_DAG_0, MWF_C_DAG_0, MWF_B_DAG_0] +HANDLER_DAG_0_W_DUPLICATES = { + "name": MWF_HANDLER_NAME, + "project": PROJECT, + "institution": INSTITUTION, + "uuid": MWF_HANDLER_UUID, + "meta_workflows": DAG_0_MWF_ARRAY_W_DUPLICATES +} +REORDERED_MWF_ARRAY_DAG_0 = [MWF_B_DAG_0, MWF_A_DAG_0, MWF_C_DAG_0] + +# DAG_1 +# B -----> D +# | ⋀ ⋀ +# | / | +# ⋁ / | +# A <----- C +MWF_A_DAG_1 = mwf_with_added_attrs(MWF_A, None, TESTER_UUID, DEP_ON_B + DEP_ON_C, True) +MWF_B_DAG_1 = mwf_with_added_attrs(MWF_B, None, TESTER_UUID, [], True) +MWF_C_DAG_1 = mwf_with_added_attrs(MWF_C, None, TESTER_UUID, [], True) +MWF_D_DAG_1 = mwf_with_added_attrs(MWF_D, None, TESTER_UUID, DEP_ON_A + DEP_ON_B + DEP_ON_C, True) +DAG_1_MWF_ARRAY = [MWF_A_DAG_1, MWF_B_DAG_1, MWF_C_DAG_1, MWF_D_DAG_1] +HANDLER_DAG_1 = { + "name": MWF_HANDLER_NAME, + "project": PROJECT, + "institution": INSTITUTION, + "uuid": MWF_HANDLER_UUID, + "meta_workflows": DAG_1_MWF_ARRAY +} +MWF_B_DAG_1_W_SELF_DEP = mwf_with_added_attrs(MWF_B, None, TESTER_UUID, DEP_ON_B, True) +DAG_1_MWF_ARRAY_W_SELF_DEP = [MWF_A_DAG_1, MWF_B_DAG_1_W_SELF_DEP, MWF_C_DAG_1, MWF_D_DAG_1] +HANDLER_DAG_1_W_SELF_DEP = { + "name": MWF_HANDLER_NAME, + "project": PROJECT, + "institution": INSTITUTION, + "uuid": MWF_HANDLER_UUID, + "meta_workflows": DAG_1_MWF_ARRAY_W_SELF_DEP +} +REORDERED_MWF_ARRAY_DAG_1 = [MWF_B_DAG_1, MWF_C_DAG_1, MWF_A_DAG_1, MWF_D_DAG_1] + +# CYCLIC_0 +# A B__ +# ⋀ \_____ +# | | +# | | +# C <----- | +MWF_A_CYCLIC_0 = mwf_with_added_attrs(MWF_A, None, TESTER_UUID, [], True) +MWF_B_CYCLIC_0 = mwf_with_added_attrs(MWF_B, None, TESTER_UUID, DEP_ON_C, True) +MWF_C_CYCLIC_0 = mwf_with_added_attrs(MWF_C, None, TESTER_UUID, DEP_ON_B, True) +CYCLIC_0_MWF_ARRAY = [MWF_A_CYCLIC_0, MWF_B_CYCLIC_0, MWF_C_CYCLIC_0] +HANDLER_CYCLIC_0 = { + "name": MWF_HANDLER_NAME, + "project": PROJECT, + "institution": INSTITUTION, + "uuid": MWF_HANDLER_UUID, + "meta_workflows": CYCLIC_0_MWF_ARRAY +} + +# CYCLIC_1 +# A -----> B +# ⋀ | +# | | +# | ⋁ +# D <----- C +MWF_A_CYCLIC_1 = mwf_with_added_attrs(MWF_A, None, TESTER_UUID, DEP_ON_D, True) +MWF_B_CYCLIC_1 = mwf_with_added_attrs(MWF_B, None, TESTER_UUID, DEP_ON_A, True) +MWF_C_CYCLIC_1 = mwf_with_added_attrs(MWF_C, None, TESTER_UUID, DEP_ON_B, True) +MWF_D_CYCLIC_1 = mwf_with_added_attrs(MWF_D, None, TESTER_UUID, DEP_ON_C, True) +CYCLIC_1_MWF_ARRAY = [MWF_A_CYCLIC_1, MWF_B_CYCLIC_1, MWF_C_CYCLIC_1, MWF_D_CYCLIC_1] +HANDLER_CYCLIC_1 = { + "name": MWF_HANDLER_NAME, + "project": PROJECT, + "institution": INSTITUTION, + "uuid": MWF_HANDLER_UUID, + "meta_workflows": CYCLIC_1_MWF_ARRAY +} + +################################################################# +# Tests +################################################################# + +#TODO: make test for ValidatedDictionary parent object? +# I basically test it through the child classes below + +@pytest.mark.parametrize( + "mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag, num_attributes", + [ + (MWF_A, "sample_processing.samples", None, None, True, 4), + (MWF_B, None, TESTER_UUID, None, False, 4), + (MWF_B, None, TESTER_UUID, DEP_ON_A, True, 5), + (MWF_C, "sample_processing.samples", TESTER_UUID, None, True, 5), # items for creation UUID taken by default + # the following should throw ValueError + (MWF_A, None, None, None, True, None), # missing items for creation + (MWF_A, None, TESTER_UUID, None, None, None) # missing duplication flag + ] +) +def test_attribute_validation_mwf_step(mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag, num_attributes): + try: + completed_dict = mwf_with_added_attrs(mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag) + meta_workflow_step_object = MetaWorkflowStep(completed_dict) + except ValueError as val_err_info: + assert "validation error" in str(val_err_info) + else: + assert num_attributes == len(meta_workflow_step_object.__dict__) + + +class TestMetaWorkflowHandler: + def test_attribute_validation_mwf_handler(self): + with pytest.raises(ValueError) as val_err_info: + meta_workflow_handler = MetaWorkflowHandler(HANDLER_WITHOUT_UUID) + assert "validation error" in str(val_err_info) + + @pytest.mark.parametrize( + "mwf_handler_dict, length_of_mwf_list", + [ + (HANDLER_WITHOUT_MWF_ARRAY, 0), # sets empty list if attr not present + (HANDLER_DAG_0, 3), + (HANDLER_DAG_0_W_DUPLICATES, 3) # gets rid of duplicates + ] + ) + def test_set_meta_workflows_list(self, mwf_handler_dict, length_of_mwf_list): + meta_workflow_handler = MetaWorkflowHandler(mwf_handler_dict) + assert len(getattr(meta_workflow_handler, "meta_workflows")) == length_of_mwf_list -with open('test/files/test_METAWFL_HANDLER.json') as json_file: - data = json.load(json_file) + @pytest.mark.parametrize( + "mwf_handler_dict, reordered_mwf_list", + [ + (HANDLER_WITHOUT_MWF_ARRAY, []), + (HANDLER_DAG_0, REORDERED_MWF_ARRAY_DAG_0), + (HANDLER_DAG_0_W_DUPLICATES, REORDERED_MWF_ARRAY_DAG_0), + (HANDLER_DAG_1, REORDERED_MWF_ARRAY_DAG_1), + (HANDLER_DAG_1_W_SELF_DEP, REORDERED_MWF_ARRAY_DAG_1) + ] + ) + def test_create_ordered_meta_workflows_list(self, mwf_handler_dict, reordered_mwf_list): + meta_workflow_handler = MetaWorkflowHandler(mwf_handler_dict) + assert getattr(meta_workflow_handler, "ordered_meta_workflows") == reordered_mwf_list -print(data) -print(type(data)) + @pytest.mark.parametrize( + "mwf_handler_dict", + [ + (HANDLER_CYCLIC_0), + (HANDLER_CYCLIC_1) + ] + ) + def test_cycles(self, mwf_handler_dict): + with pytest.raises(Exception) as exc_info: + meta_workflow_handler = MetaWorkflowHandler(mwf_handler_dict) + assert "Cycle in graph: node" in str(exc_info) -mwfh.MetaWorkflowHandler(data) \ No newline at end of file + @pytest.mark.parametrize( + "mwf_handler_dict", + [ + (HANDLER_WITHOUT_MWF_ARRAY), + (HANDLER_DAG_0), + (HANDLER_DAG_0_W_DUPLICATES), + (HANDLER_DAG_1), + (HANDLER_DAG_1_W_SELF_DEP) + ] + ) + def test_create_ordered_meta_workflow_steps_list(self, mwf_handler_dict): + meta_workflow_handler = MetaWorkflowHandler(mwf_handler_dict) + ordered_meta_workflow_steps = getattr(meta_workflow_handler, "ordered_meta_workflow_steps") + for step in ordered_meta_workflow_steps: + assert isinstance(step, MetaWorkflowStep) From ffefa6a0bd19366b2ea79bb1b6ec71ec7afa56f6 Mon Sep 17 00:00:00 2001 From: vstevensf Date: Wed, 11 Jan 2023 18:04:51 -0500 Subject: [PATCH 17/38] Put the ValidatedDictionary class in its own file --- magma/validated_dictionary.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 magma/validated_dictionary.py diff --git a/magma/validated_dictionary.py b/magma/validated_dictionary.py new file mode 100644 index 0000000..292d4f8 --- /dev/null +++ b/magma/validated_dictionary.py @@ -0,0 +1,32 @@ +################################################ +# TODO: functions for dcic utils -- move later +################################################ +from magma.utils import check_presence_of_attributes + + +################################################ +# ValidatedDictionary +################################################ +class ValidatedDictionary(object): + """ + Parent class for MetaWorkflowStep and MetaWorkflowHandler classes. + Takes in an input dictionary, and validates basic attributes. + """ + + def __init__(self, input_dict): + """ + Constructor method, initialize object and attributes. + + :param input_dict: input dictionary, defined by json file, which defines basic attributes of this object + :type input_dict: dict + """ + # Set basic (non-calculated) attributes # + for key in input_dict: + setattr(self, key, input_dict[key]) + + def _validate_basic_attributes(self, list_of_attributes=None): + """ TODO: make list of attributes a class attribute + Validation of the JSON input for this object. + Checks that given attributes are present in the created object. + """ + check_presence_of_attributes(self, list_of_attributes) \ No newline at end of file From 5fd89162af5d3d2e0a80cd52d8c087e471afae83 Mon Sep 17 00:00:00 2001 From: vstevensf Date: Fri, 13 Jan 2023 12:53:58 -0500 Subject: [PATCH 18/38] Finished ValidatedDictionary class Changes included merging some former magma utils fxns into this class, and removing extraneous utils fxns. Also includes new test file for this class. --- magma/utils.py | 105 -------- magma/validated_dictionary.py | 28 ++- test/test_utils_magma.py | 393 ------------------------------ test/test_validated_dictionary.py | 83 +++++++ 4 files changed, 99 insertions(+), 510 deletions(-) delete mode 100644 magma/utils.py delete mode 100644 test/test_utils_magma.py create mode 100644 test/test_validated_dictionary.py diff --git a/magma/utils.py b/magma/utils.py deleted file mode 100644 index b053b7c..0000000 --- a/magma/utils.py +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env python3 - -################################################ -# Libraries -################################################ - -################################################ -# Functions -################################################ -#TODO: description -- small utility fxns and -# object attribute checking - -#TODO: following 2 fxn not used elsewhere but could be in future -def check_variable_type(variable, intended_type): - """ - Checks that given variable is of the intended type. - - :param variable: variable to be checked - :type variable: data type - :param intended_type: the variable type that is intended - :type intended_type: data type - :return: True if the variable if of the intended_type, else False - :rtype: bool - """ - if not isinstance(variable, intended_type): - return False - else: - return True - -def check_list_elements_type(list_to_check, intended_type): - """ - Checks that all elements in list are of a given type. - Raises Exception if not all elements are strings. - - :param list_to_check: list to be checked - :type list_to_check: list - :param intended_type: the variable type that is intended - :type intended_type: data type - :return: True if all elements of list_to_check are of the intended_type, else False - :rtype: bool - """ - # check that all elements in list are strings - if not all(isinstance(element, intended_type) for element in list_to_check): - return False - else: - return True - -def check_presence_of_attributes(input_object, attributes_to_check=None): - """ - Takes in an object and a list of attributes, checks that those attributes are present - in this object - - :param input_object: object to check - :type input_object: object (instance of some class) - :param attributes_to_check: list of attributes to check - :type attributes_to_check: list[str] - :return: None, if there are no attributes to check - :return: None, if all specified attributes are present - :raises ValueError: if input_object doesn't have a specified attribute - """ - #TODO: make the next three commands its own helper function? I repeat variations - # several times - if attributes_to_check is None: - return - - for attribute in attributes_to_check: - try: - getattr(input_object, attribute) - except AttributeError as e: - raise ValueError("Object validation error, {0}\n" - .format(e.args[0])) - -def set_unique_list_attributes(input_object, attributes_to_set=None): - """ - Checks for given attribute(s) of type list, sets as empty list if not present, - else sets that list attribute, without duplicates. - - :param input_object: object with attributes to be set - :type input_object: object (dict) - :param attributes_to_set: list of attributes to set - :type attributes_to_set: list[str] - :return: None, if there are no attributes to set - :return: None, once entire function is completed with no errors - """ - if attributes_to_set is None: - return - - # especially if we are handling duplicates in reordering list function - for attribute in attributes_to_set: - if not hasattr(input_object, attribute): - # if not present, set attribute as empty list - setattr(input_object, attribute, []) - else: - attrib = getattr(input_object, attribute) - - # check if this attribute is a list - if check_variable_type(attrib, list): - # then get rid of duplicates, if present - non_dup_attrib = [] - for item in attrib: - if item not in non_dup_attrib: - non_dup_attrib.append(item) - setattr(input_object, attribute, non_dup_attrib) - else: - continue \ No newline at end of file diff --git a/magma/validated_dictionary.py b/magma/validated_dictionary.py index 292d4f8..e103309 100644 --- a/magma/validated_dictionary.py +++ b/magma/validated_dictionary.py @@ -1,16 +1,10 @@ ################################################ -# TODO: functions for dcic utils -- move later -################################################ -from magma.utils import check_presence_of_attributes - - -################################################ -# ValidatedDictionary +# ValidatedDictionary TODO: eventually make part of dcicutils? ################################################ class ValidatedDictionary(object): """ - Parent class for MetaWorkflowStep and MetaWorkflowHandler classes. - Takes in an input dictionary, and validates basic attributes. + Parent class for MetaWorkflow(Run)Step and MetaWorkflow(Run) Handler classes. + Takes in an input dictionary, and validates basic attributes (makes sure given attributes are present). """ def __init__(self, input_dict): @@ -24,9 +18,19 @@ def __init__(self, input_dict): for key in input_dict: setattr(self, key, input_dict[key]) - def _validate_basic_attributes(self, list_of_attributes=None): - """ TODO: make list of attributes a class attribute + def _validate_basic_attributes(self, *attributes_to_check): + """ Validation of the JSON input for this object. Checks that given attributes are present in the created object. + + :param attributes_to_check: attributes that are checked (variable number of non-keyword arguments) + :type attributes_to_check: str(s) + :return: None, if all specified attributes are present + :raises ValueError: if this Validated Dictionary object doesn't have a specified attribute """ - check_presence_of_attributes(self, list_of_attributes) \ No newline at end of file + for attribute in attributes_to_check: + try: + getattr(self, attribute) + except AttributeError as e: + raise ValueError("Object validation error, {0}\n" + .format(e.args[0])) \ No newline at end of file diff --git a/test/test_utils_magma.py b/test/test_utils_magma.py deleted file mode 100644 index b91b7a1..0000000 --- a/test/test_utils_magma.py +++ /dev/null @@ -1,393 +0,0 @@ -#!/usr/bin/env python3 - -################################################################# -# Libraries -################################################################# -import pytest -from copy import deepcopy - -from magma.utils import * - -################################################################# -# Vars -################################################################# - -# dummy class for creating simple objects -class ClassTester: - """ - Class for creation of simple objects, based on an input dictionary - """ - - def __init__(self, input_dict): - """ - Constructor method, initialize object and attributes. - - :param input_dict: dictionary defining the basic attributes of object to be created - :type input_dict: dict - """ - for key in input_dict: - setattr(self, key, input_dict[key]) - -# TODO: is there a way to functionalize this? -# input dicts to create ClassTester objects -INPUT_DICT_SINGLE_SIMPLE_ATTR = {"test_0": 0} -INPUT_DICT_SINGLE_SIMPLE_ATTR_1 = {"test_1": 0} -INPUT_DICT_SINGLE_SIMPLE_ATTR_2 = {"test_2": 0} -INPUT_DICT_SEVERAL_SIMPLE_ATTRS = {"test_0": 0, "test_1": 1, "test_2": 2} -INPUT_DICT_SINGLE_EMPTY_LIST_ATTR = {"list_empty_0": []} -INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS = { - "list_empty_0": [], - "list_empty_1": [], - "list_empty_2": [], -} -INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR = {"list_simple_0": [1, 2, 3]} -INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS = { - "list_simple_0": [1, 2, 3], - "list_simple_1": ["a", "b", "c"], -} -INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR_W_DUP = {"list_simple_0": [1, 2, 3, 4, 3]} -INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP = { - "list_simple_0": [1, "a", 3, 3], - "list_simple_1": ["a", "b", "c"], - "list_simple_2": ["c", 1, "c"], -} - -LIST_OF_EMPTY_DICTS = [INPUT_DICT_SINGLE_EMPTY_LIST_ATTR, INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS] -LIST_OF_SIMPLE_ATTR_DICTS = [ - INPUT_DICT_SINGLE_SIMPLE_ATTR, - INPUT_DICT_SINGLE_SIMPLE_ATTR_1, - INPUT_DICT_SINGLE_SIMPLE_ATTR_2, - ] -LIST_OF_SIMPLE_ATTR_DICTS_REORDERED = [ - INPUT_DICT_SINGLE_SIMPLE_ATTR, - INPUT_DICT_SINGLE_SIMPLE_ATTR_2, - INPUT_DICT_SINGLE_SIMPLE_ATTR_1, - ] -LIST_OF_SIMPLE_ATTR_DICTS_W_DUP = [ - INPUT_DICT_SINGLE_SIMPLE_ATTR, - INPUT_DICT_SINGLE_SIMPLE_ATTR_1, - INPUT_DICT_SINGLE_SIMPLE_ATTR, - INPUT_DICT_SINGLE_SIMPLE_ATTR_2, - INPUT_DICT_SINGLE_SIMPLE_ATTR, - INPUT_DICT_SINGLE_SIMPLE_ATTR, - ] -LIST_OF_SIMPLE_ATTR_DICTS_W_DUP_2 = [ - INPUT_DICT_SINGLE_SIMPLE_ATTR, - INPUT_DICT_SINGLE_SIMPLE_ATTR, - INPUT_DICT_SINGLE_SIMPLE_ATTR, - INPUT_DICT_SINGLE_SIMPLE_ATTR_1, - INPUT_DICT_SINGLE_SIMPLE_ATTR, - ] -LIST_OF_SIMPLE_ATTR_DICTS_W_DUP_3 = [ - INPUT_DICT_SINGLE_SIMPLE_ATTR_2, - INPUT_DICT_SINGLE_SIMPLE_ATTR_2, - INPUT_DICT_SINGLE_SIMPLE_ATTR_2, - ] - -INPUT_DICT_SINGLE_LIST_OF_DICTS = { - "list_of_dicts": LIST_OF_SIMPLE_ATTR_DICTS -} -INPUT_DICT_SEVERAL_LISTS_OF_DICTS = { - "list_of_dicts_0": LIST_OF_SIMPLE_ATTR_DICTS, - "list_of_dicts_1": LIST_OF_SIMPLE_ATTR_DICTS_REORDERED, -} -INPUT_DICT_SINGLE_LIST_OF_DICTS_W_DUP = { - "list_of_dicts": LIST_OF_SIMPLE_ATTR_DICTS_W_DUP -} -INPUT_DICT_SEVERAL_LISTS_OF_DICTS_W_DUP = { - "list_of_dicts_0": LIST_OF_SIMPLE_ATTR_DICTS, - "list_of_dicts_1": LIST_OF_SIMPLE_ATTR_DICTS_REORDERED, - "list_of_dicts_2": LIST_OF_SIMPLE_ATTR_DICTS_W_DUP_2, - "list_of_dicts_3": LIST_OF_SIMPLE_ATTR_DICTS_W_DUP_3 -} - - -# ClassTester objects -CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR = ClassTester(INPUT_DICT_SINGLE_SIMPLE_ATTR) -CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS = ClassTester(INPUT_DICT_SEVERAL_SIMPLE_ATTRS) -CLASSTESTER_OBJ_SINGLE_EMPTY_LIST_ATTR = ClassTester(INPUT_DICT_SINGLE_EMPTY_LIST_ATTR) -CLASSTESTER_OBJ_SEVERAL_EMPTY_LIST_ATTRS = ClassTester(INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS) -CLASSTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR = ClassTester(INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR) -CLASSTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS = ClassTester(INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS) -CLASSTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR_W_DUP = ClassTester( - INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR_W_DUP -) -CLASSTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP = ClassTester( - INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP -) -CLASSTESTER_OBJ_SINGLE_LIST_OF_DICTS = ClassTester(INPUT_DICT_SINGLE_LIST_OF_DICTS) -CLASSTESTER_OBJ_SEVERAL_LISTS_OF_DICTS = ClassTester(INPUT_DICT_SEVERAL_LISTS_OF_DICTS) -CLASSTESTER_OBJ_SINGLE_LIST_OF_DICTS_W_DUP = ClassTester(INPUT_DICT_SINGLE_LIST_OF_DICTS_W_DUP) -CLASSTESTER_OBJ_SEVERAL_LISTS_OF_DICTS_W_DUP = ClassTester( - INPUT_DICT_SEVERAL_LISTS_OF_DICTS_W_DUP -) - -################################################################# -# Tests -################################################################# - -@pytest.mark.parametrize( - "variable, intended_type, expected_result", - [ - (2, int, True), - (-2, int, True), - (float("inf"), float, True), - (complex(1, 1.0), complex, True), - (True, bool, True), - (False, bool, True), - (None, type(None), True), - (None, object, True), - ("a", str, True), - ("a", list, False), - ("a", object, True), - ("test", str, True), - ("test", list, False), - ("test", object, True), - ((1, 2), tuple, True), - ((1, 2), object, True), - ([], list, True), - ([], object, True), - (set(), set, True), - (set(), object, True), - ([1, "test"], list, True), - ([1, "test"], object, True), - ({}, dict, True), - ({}, object, True), - ({"hi": 1}, dict, True), - ({"hi": 1}, object, True), - (2, list, False), - (float("inf"), int, False), - (complex(1, 1.0), float, False), - (True, str, False), - (None, bool, False), - ("a", int, False), - ("test", list, False), - ((1, 2), set, False), - (set(), tuple, False), - ([1, "test"], dict, False), - ({"hi": 1}, list, False), - ], -) -def test_check_variable_type(variable, intended_type, expected_result): - """ - Test for function checking if a variable is of a specified type. - """ - result = check_variable_type(variable, intended_type) - assert result == expected_result - - -@pytest.mark.parametrize( - "list_to_check, intended_type, expected_result", - [ - ([], str, True), - ([], int, True), - ([], list, True), - ([], object, True), - (["id"], str, True), - (["1", "test", "2"], str, True), - ([1, 2, 3, 4], int, True), - ([[1], [2], ["test", "2"], []], list, True), - ([["1", "2", "3", "4", "5"], ["6"]], str, False), - ([["1", "2", "3", "4", "5"], "6"], list, False), - ([None, "test"], str, False), - ([1, "test"], int, False), - ], -) -def test_check_list_elements_type_no_errors( - list_to_check, intended_type, expected_result -): - """ - Test for function checking that all elements of a list are of a specified type, - no errors raised. - """ - result = check_list_elements_type(list_to_check, intended_type) - assert result == expected_result - - -class TestCheckPresenceOfAttributes: - @pytest.mark.parametrize( - "input_object, attributes_to_check", - [ - (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, None), - (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, []), - (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_0"]), - (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1"]), - (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1", "test_2"]), - (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2"]), - (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2", "test_1"]), - ], - ) - def test_check_presence_of_attributes_no_errors( - self, input_object, attributes_to_check - ): - """ - Test for function checking that specified attributes are part of a given object, - no errors raised. - """ - result = check_presence_of_attributes(input_object, attributes_to_check) - assert result is None - - @pytest.mark.parametrize( - "input_object, attributes_to_check", - [ - (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["not_present"]), - (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["not_present_0", "not_present_1"]), - (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1", "not_present"]), - ( - CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, - ["test_0", "not_present", "test_1", "test_2"], - ), - ( - CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, - ["test_0", "not_present_0", "test_2", "not_present_1"], - ), - ( - CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, - ["not_present", "test_0", "test_2", "test_1"], - ), - ], - ) - def test_check_presence_of_attributes_value_errors( - self, input_object, attributes_to_check - ): - """ - Test for function checking that specified attributes are part of a given object, - ValueError raised. - """ - with pytest.raises(ValueError) as value_err_info: - check_presence_of_attributes(input_object, attributes_to_check) - assert "Object validation error" in str(value_err_info.value) - - -class TestSetUniqueListAttributes: - @pytest.mark.parametrize( - "input_object, attributes_to_set", - [ - (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, None), - (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, []), - (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_0"]), - (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1"]), - (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1", "test_2"]), - (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2"]), - (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2", "test_1"]), - ], - ) - def test_set_unique_list_attributes_of_existing_nonlist_attributes( - self, input_object, attributes_to_set - ): - """ - Test for function that gets rid of duplicates within object attributes that are lists, - or sets attributes to empty list if not present within the object. - Cases where the attributes to set are existent and are NOT lists, no action done. - """ - original_object = deepcopy(input_object) - result = set_unique_list_attributes(input_object, attributes_to_set) - assert result is None - assert vars(input_object) == vars(original_object) # no attributes changed - #TODO: double check the above "vars" functionality - - @pytest.mark.parametrize( - "input_object, attributes_to_set, orig_lengths, reset_lengths", - [ - (CLASSTESTER_OBJ_SINGLE_EMPTY_LIST_ATTR, ["list_empty_0"], [0], [0]), - ( - CLASSTESTER_OBJ_SEVERAL_EMPTY_LIST_ATTRS, - ["list_empty_0", "list_empty_1", "list_empty_2"], - [0, 0, 0], - [0, 0, 0], - ), - (CLASSTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR, ["list_simple_0"], [3], [3]), - ( - CLASSTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS, - ["list_simple_0", "list_simple_1"], - [3, 3], - [3, 3], - ), - (CLASSTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR_W_DUP, ["list_simple_0"], [5], [4]), - ( - CLASSTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP, - ["list_simple_0", "list_simple_2", "list_simple_1"], - [4, 3, 3], - [3, 2, 3], - ), - (CLASSTESTER_OBJ_SINGLE_LIST_OF_DICTS, ["list_of_dicts"], [3], [3]), - ( - CLASSTESTER_OBJ_SEVERAL_LISTS_OF_DICTS, - ["list_of_dicts_1", "list_of_dicts_0"], - [3, 3], - [3, 3], - ), - (CLASSTESTER_OBJ_SINGLE_LIST_OF_DICTS_W_DUP, ["list_of_dicts"], [6], [3]), - ( - CLASSTESTER_OBJ_SEVERAL_LISTS_OF_DICTS_W_DUP, - [ - "list_of_dicts_1", - "list_of_dicts_0", - "list_of_dicts_2", - "list_of_dicts_3", - ], - [3, 3, 5, 3], - [3, 3, 2, 1], - ), - ], - ) - def test_set_unique_list_attributes_of_existing_list_attributes( - self, input_object, attributes_to_set, orig_lengths, reset_lengths - ): - """ - Test for function that gets rid of duplicates within object attributes that are lists, - or sets attributes to empty list if not present within the object. - Cases where the attributes to set are existent and are lists. - """ - # import pdb; pdb.set_trace() - # check original length of attributes_to_set - for idx, attribute in enumerate(attributes_to_set): - assert len(getattr(input_object, attribute)) == orig_lengths[idx] - - result = set_unique_list_attributes(input_object, attributes_to_set) - - # check length of "reset" attributes_to_set - for idx, attribute in enumerate(attributes_to_set): - assert len(getattr(input_object, attribute)) == reset_lengths[idx] - - assert result is None - - @pytest.mark.parametrize( - "input_object, attributes_to_set, num_added_attributes", - [ - (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_0"], 0), - (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_1"], 1), - (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_2", "test_3"], 1), - (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_5", "test_0", "test_4"], 2), - (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2"], 0), - (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2", "test_1"], 0), - ], - ) - def test_set_unique_list_attributes_of_nonexistent_attributes( - self, input_object, attributes_to_set, num_added_attributes - ): - """ - Test for function that gets rid of duplicates within object attributes that are lists, - or sets attributes to empty list if not present within the object. - Cases where the attributes to set are nonexistent, so they are added with the value []. - """ - # TODO: this changes the objects permanently since I'm setting attrs - # but I don't think this will affect further testing (specifically, fourth example) - - original_attributes_set = set(dir(input_object)) - num_original_attributes = len(original_attributes_set) - - result = set_unique_list_attributes(input_object, attributes_to_set) - assert result is None - - reset_attributes_set = set(dir(input_object)) - num_reset_attributes = len(reset_attributes_set) - - assert num_added_attributes == (num_reset_attributes - num_original_attributes) - - added_attributes = reset_attributes_set.difference(original_attributes_set) - for attribute in added_attributes: - assert attribute in attributes_to_set - assert getattr(input_object, attribute) == [] - - # TODO: add a test for mixed cases? (nonexistent + lists + empties, etc.) \ No newline at end of file diff --git a/test/test_validated_dictionary.py b/test/test_validated_dictionary.py new file mode 100644 index 0000000..58307ad --- /dev/null +++ b/test/test_validated_dictionary.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 + +################################################################# +# Libraries +################################################################# +import pytest + +from magma.validated_dictionary import ValidatedDictionary + +################################################################# +# Vars +################################################################# +EMPTY_INPUT_DICT = {} +SIMPLE_INPUT_DICT = {"attr_0": 0} +EXTENSIVE_INPUT_DICT = { + "attr_0": 0, + "attr_1": "foo", + "attr_2": False, + "attr_3": [0, 1, 2, 3], + "attr_4": { + "subattr_0": 0, + "subattr_1": "bar" + } +} + +EMPTY_VALIDATED_DICT = ValidatedDictionary(EMPTY_INPUT_DICT) +SIMPLE_VALIDATED_DICT = ValidatedDictionary(SIMPLE_INPUT_DICT) +EXTENSIVE_VALIDATED_DICT = ValidatedDictionary(EXTENSIVE_INPUT_DICT) + +################################################################# +# Tests +################################################################# +class TestValidatedDictionary: + @pytest.mark.parametrize( + "validated_dictionary_object, input_dict", + [ + (EMPTY_VALIDATED_DICT, EMPTY_INPUT_DICT), + (SIMPLE_VALIDATED_DICT, SIMPLE_INPUT_DICT), + (EXTENSIVE_VALIDATED_DICT, EXTENSIVE_INPUT_DICT) + ] + ) + def test_validated_dictionary_init(self, validated_dictionary_object, input_dict): + """ + Test of the __init__ function of the ValidatedDictionary class + """ + present_attributes = list(input_dict.keys()) + for attr in present_attributes: + assert hasattr(validated_dictionary_object, attr) == True + assert getattr(validated_dictionary_object, attr) == input_dict[attr] + + @pytest.mark.parametrize( + "validated_dictionary_object, attributes_to_check", + [ + (EMPTY_VALIDATED_DICT, ()), + (SIMPLE_VALIDATED_DICT, ("attr_0",)), + (EXTENSIVE_VALIDATED_DICT, ("attr_2",)), + (EXTENSIVE_VALIDATED_DICT, ("attr_0", "attr_1", "attr_2", "attr_3", "attr_4")) + ] + ) + def test_validate_basic_attributes_no_errors(self, validated_dictionary_object, attributes_to_check): + """ + Test for function checking that specified attributes are part of a given ValidatedDictionary object, + no errors raised. + """ + result = validated_dictionary_object._validate_basic_attributes(*attributes_to_check) + assert result is None + + @pytest.mark.parametrize( + "validated_dictionary_object, attributes_to_check", + [ + (EMPTY_VALIDATED_DICT, ("not_present", "also_not_present")), + (SIMPLE_VALIDATED_DICT, ("attr_0", "not_present")), + (EXTENSIVE_VALIDATED_DICT, ("attr_0", "attr_1", "not_present", "attr_2")) + ] + ) + def test_validate_basic_attributes_value_errors(self, validated_dictionary_object, attributes_to_check): + """ + Test for function checking that specified attributes are part of a given ValidatedDictionary object, + ValueError raised. + """ + with pytest.raises(ValueError) as value_err_info: + validated_dictionary_object._validate_basic_attributes(*attributes_to_check) + assert "Object validation error" in str(value_err_info.value) \ No newline at end of file From 2d16ab0fb0971beb2a87705ef43effc705f7af83 Mon Sep 17 00:00:00 2001 From: vstevensf Date: Fri, 20 Jan 2023 06:09:47 -0500 Subject: [PATCH 19/38] Refactored Topological Sort with TopologicalSorter from dcicutils. Also wrote a draft of pytests --- magma/metawfl_handler.py | 177 +++++----- magma/topological_sort.py | 240 ++----------- magma/validated_dictionary.py | 4 +- test/test_metawfl_handler.py | 415 ++++++++++++++++++----- test/test_topological_sort.py | 539 ++++-------------------------- test/test_validated_dictionary.py | 8 +- 6 files changed, 519 insertions(+), 864 deletions(-) diff --git a/magma/metawfl_handler.py b/magma/metawfl_handler.py index 1df9001..543079f 100644 --- a/magma/metawfl_handler.py +++ b/magma/metawfl_handler.py @@ -3,45 +3,12 @@ ################################################ # Libraries ################################################ -# from magma import metawfl #TODO: do this in FF - -################################################ -# TODO: functions for dcic utils -- move later -################################################ -from magma.utils import check_presence_of_attributes, set_unique_list_attributes -from magma.topological_sort import generate_ordered_steps_list - -################################################ -# ValidatedDictionary -################################################ -class ValidatedDictionary(object): - """ - Parent class for MetaWorkflowStep and MetaWorkflowHandler classes. - Takes in an input dictionary, and validates basic attributes. - """ - - def __init__(self, input_dict): - """ - Constructor method, initialize object and attributes. - - :param input_dict: input dictionary, defined by json file, which defines basic attributes of this object - :type input_dict: dict - """ - # Set basic (non-calculated) attributes # - for key in input_dict: - setattr(self, key, input_dict[key]) - - def _validate_basic_attributes(self, list_of_attributes=None): - """ - Validation of the JSON input for this object. - Checks that given attributes are present in the created object. - """ - check_presence_of_attributes(self, list_of_attributes) +from magma.validated_dictionary import ValidatedDictionary +# from magma.topological_sort import generate_ordered_steps_list ################################################ # MetaWorkflowStep ################################################ - class MetaWorkflowStep(ValidatedDictionary): """ Class to represent a MetaWorkflow object, @@ -53,7 +20,6 @@ class MetaWorkflowStep(ValidatedDictionary): DUP_FLAG_ATTR = "duplication_flag" ITEMS_CREATION_PROP_TRACE = "items_for_creation_property_trace" ITEMS_CREATION_UUID = "items_for_creation_uuid" - LIST_OF_ATTRS = [META_WORKFLOW_ATTR, NAME_ATTR, DUP_FLAG_ATTR] def __init__(self, input_dict): """ @@ -65,79 +31,94 @@ def __init__(self, input_dict): super().__init__(input_dict) # Validate presence of basic attributes of this MetaWorkflow step - self._validate_basic_attributes(self.LIST_OF_ATTRS) + self._validate_basic_attributes(self.META_WORKFLOW_ATTR, self.NAME_ATTR, self.DUP_FLAG_ATTR) - def _validate_basic_attributes(self, list_of_attributes): + def _validate_basic_attributes(self, *list_of_attributes): """ Validation of the JSON input for the MetaWorkflow step. Checks that necessary MetaWorkflow attributes are present for this MetaWorkflow step. """ - super()._validate_basic_attributes(list_of_attributes) + super()._validate_basic_attributes(*list_of_attributes) # str, must be unique TODO: name filling in ff try: - #TODO: what about if both are present? UUID is taken as default for now # set None for [default] arg to not throw AttributeError if not getattr(self, self.ITEMS_CREATION_UUID, None): - # import pdb; pdb.set_trace() getattr(self, self.ITEMS_CREATION_PROP_TRACE) except AttributeError as e: - raise ValueError("JSON validation error, {0}\n" + raise AttributeError("Object validation error, {0}\n" .format(e.args[0])) - - -################################################ -# MetaWorkflowHandler -################################################ -class MetaWorkflowHandler(ValidatedDictionary): - """ - Class representing a MetaWorkflow Handler object, - a list of MetaWorkflows with specified dependencies - """ - - UUID_ATTR = "uuid" - META_WORKFLOW_NAME_ATTR = "name" - META_WORKFLOW_DEPENDENCIES_ATTR = "dependencies" - META_WORKFLOWS_ATTR = "meta_workflows" - LIST_OF_ATTRS = [UUID_ATTR] - - def __init__(self, input_dict): - """ - Constructor method, initialize object and attributes. - - :param input_dict: MetaWorkflow Handler object, defined by json file from portal - :type input_dict: dict - """ - ### Basic attributes ### - super().__init__(input_dict) + # for items for creation, this object can only have + # either the UUID or property trace, but not both + if hasattr(self, self.ITEMS_CREATION_PROP_TRACE) and hasattr(self, self.ITEMS_CREATION_UUID): + raise AttributeError("Object validation error, 'MetaWorkflowStep' object cannot have both of the following attributes: 'items_for_creation_property_trace' and 'items_for_creation_uuid'") + + +# ################################################ +# # MetaWorkflowHandler +# ################################################ +# class MetaWorkflowHandler(ValidatedDictionary): +# """ +# Class representing a MetaWorkflow Handler object, +# a list of MetaWorkflows with specified dependencies +# """ + +# UUID_ATTR = "uuid" +# META_WORKFLOWS_ATTR = "meta_workflows" +# META_WORKFLOW_NAME_ATTR = "name" +# META_WORKFLOW_DEPENDENCIES_ATTR = "dependencies" + +# def __init__(self, input_dict): +# """ +# Constructor method, initialize object and attributes. + +# :param input_dict: MetaWorkflow Handler object, defined by json file from portal +# :type input_dict: dict +# """ +# ### Basic attributes ### +# super.__init__(input_dict) - super()._validate_basic_attributes(self.LIST_OF_ATTRS) - - ### Calculated attributes ### - # to check for non-existent meta_workflows attribute - self._set_meta_workflows_list() - - # order the meta_workflows list based on dependencies - # ordered_meta_workflows = generate_ordered_steps_list(self.meta_workflows, self.META_WORKFLOW_NAME_ATTR, self.META_WORKFLOW_DEPENDENCIES_ATTR) - self.ordered_meta_workflows = self._create_ordered_meta_workflows_list() - - # using ordered metaworkflows list, create a list of objects using class MetaWorkflowStep - # this validates basic attributes needed for each metaworkflow step - self.ordered_meta_workflow_steps = self._create_meta_workflow_step_objects() - - def _set_meta_workflows_list(self): - """ - Checks for meta_workflows attribute, gets rid of duplicates, - else sets as empty list if not present - """ - set_unique_list_attributes(self, [self.META_WORKFLOWS_ATTR]) - - def _create_ordered_meta_workflows_list(self): - return generate_ordered_steps_list(self.meta_workflows, self.META_WORKFLOW_NAME_ATTR, self.META_WORKFLOW_DEPENDENCIES_ATTR) - - def _create_meta_workflow_step_objects(self): - meta_workflow_step_list = [] - for meta_workflow in self.ordered_meta_workflows: - meta_workflow_step_object = MetaWorkflowStep(meta_workflow) - meta_workflow_step_list.append(meta_workflow_step_object) - return meta_workflow_step_list +# self._validate_basic_attributes(self.UUID_ATTR) + +# ### Calculated attributes ### +# # to check for non-existent meta_workflows attribute +# # if present, get rid of duplicates (by MetaWorkflow name) +# self._set_meta_workflows_list() + +# # order the meta_workflows list based on dependencies +# self.ordered_meta_workflows = self._create_ordered_meta_workflows_list() + +# # using ordered metaworkflows list, create a list of objects using class MetaWorkflowStep +# # this validates basic attributes needed for each metaworkflow step +# self.ordered_meta_workflow_steps = self._create_meta_workflow_step_objects() + +# def _set_meta_workflows_list(self): +# """ +# Checks for meta_workflows attribute, +# sets as empty list if not present, +# else gets rid of duplicates (by metaworkflow name) +# """ +# if not hasattr(self, self.META_WORKFLOWS_ATTR): +# # if not present, set attribute as empty list +# setattr(self, self.META_WORKFLOWS_ATTR, []) +# else: +# attrib = getattr(self, self.META_WORKFLOWS_ATTR) + +# # then get rid of duplicates, if present +# # non_dup_attrib = [] +# # for item in attrib: +# # if item not in non_dup_attrib: +# # non_dup_attrib.append(item) +# # setattr(self, self.META_WORKFLOWS_ATTR, non_dup_attrib) + +# def _create_ordered_meta_workflows_list(self): +# return generate_ordered_steps_list(self.meta_workflows, self.META_WORKFLOW_NAME_ATTR, self.META_WORKFLOW_DEPENDENCIES_ATTR) + +# def _create_meta_workflow_step_objects(self): +# meta_workflow_step_list = [] +# for meta_workflow in self.ordered_meta_workflows: +# meta_workflow_step_object = MetaWorkflowStep(meta_workflow) +# meta_workflow_step_list.append(meta_workflow_step_object) +# return meta_workflow_step_list + +# #TODO: check that there are no duplictes in ordered metaworkflows -- does this throw error or nah? TBD. diff --git a/magma/topological_sort.py b/magma/topological_sort.py index 079d923..7cdb784 100644 --- a/magma/topological_sort.py +++ b/magma/topological_sort.py @@ -4,212 +4,42 @@ # Libraries ################################################ from copy import deepcopy -from magma.utils import check_variable_type + +from dcicutils.misc_utils import TopologicalSorter ################################################ # Functions ################################################ - -#TODO: don't make this part of the class, but rather generalized fxn for dcic_utils? -def check_presence_of_key(list_of_dicts, key_to_check=None): - """ - Takes in a list of dictionaries and a list of keys, checks that those keys - are present within every99 dict in this list/array. - - :param list_of_dicts: dictionaries to check - :type input_dict: list[dict] - :param key_to_check: key to check - :type key_to_check: str - :return: True, if the specified key is present in each dict, or there is no - key to check, else False - :rtype: bool - """ - #TODO: make the next three commands its own helper function? I repeat variations - # several times -- on this note, look up how to have flexible argument - if key_to_check is None: - return True - - if not all((key_to_check in dictionary) for dictionary in list_of_dicts): - return False - - return True - -def generate_ordered_step_name_list(steps_with_dependencies, step_key): - """ - Based on a list of dictionaries (representing a list of steps) with a "name" key - for each dictionary, return a list of the names of each dictionary with - indices corresponding to the indices of the dictionaries themselves (same order). - """ - names = [] - for step_with_dependency in steps_with_dependencies: - names.append(step_with_dependency[step_key]) - return names - -#TODO: could make this more general... -def set_dependency_list_values(steps_with_dependencies, step_key, dependencies_key, existing_steps_list): - """ - Checks for dependency key within each dictionary in list_of_dicts. - If not present, add that key and set value as empty list. - Else, remove duplicates and self-dependencies. - - :param steps_with_dependencies: list of dictionaries that should hold a dependency list. Each - dictionary corresponds to a step, and the list represents - a "list of steps" with dependencies between them. - :type steps_with_dependencies: list[dict] - :param step_key: name of the key corresponding to the step's name – - i.e. attribute referred to by dependency values - :type step_key: str - :param dependencies_key: name of the key corresponding to the dependencies list - :type dependencies_key: str - :return: a copy of list_of_dicts with appropriate dependency lists set - :rtype: list[dict] - """ - - steps_with_dependencies_copy = deepcopy(steps_with_dependencies) #TODO: make sure original doesnt change in test - # iterate through list of dicts and set dependencies key-value pair - for step_with_dependency in steps_with_dependencies_copy: - # add empty dependency list if not present - if not dependencies_key in step_with_dependency: - step_with_dependency[dependencies_key] = [] - continue - #TODO: do some renaming of this function to follow pattern of obj vs dict key setting? - - # get rid of duplicates - # I choose this method for generalization, in the case that dependencies is - # a list of dictionaries, which are an unhashable type - dependencies = step_with_dependency[dependencies_key] - # check this is indeed a list - if not check_variable_type(dependencies, list): - step_with_dependency[dependencies_key] = [] - continue - #TODO: throw exception here instead of resetting value - - # get rid of duplicates and self-dependencies - # and each dependency is in fact a name of another metaworkflow - non_duplicated_dependencies = [] - step_with_dependency_name = step_with_dependency[step_key] - for dependency in dependencies: - if (dependency not in non_duplicated_dependencies) and (dependency != step_with_dependency_name) and (dependency in existing_steps_list): - non_duplicated_dependencies.append(dependency) - #TODO: throw exception for self dependencies, duplicates, or nonexistent names? - step_with_dependency[dependencies_key] = non_duplicated_dependencies - # dictionary["steps_after"] = [] - - return steps_with_dependencies_copy - -def find_step_with_given_name(steps_with_dependencies_list, step_key, name): - for index, step in enumerate(steps_with_dependencies_list): - if step[step_key] == name: - return index, step - raise Exception(f"Node named {name} is a nonexistent step") - -def topological_sort_dfs_helper(graph, curr_node, curr_idx, node_name_key, dependencies_key, visited_temporary, visited_permanent, queue): - if visited_permanent[curr_idx]: - return queue - if visited_temporary[curr_idx]: - raise Exception(f"Cycle in graph: node {curr_node[node_name_key]}") - - visited_temporary[curr_idx] = True - - for previous_step_name in curr_node[dependencies_key]: - #TODO: can't have duplicates in names with this method! - idx_previous_node, previous_node = find_step_with_given_name(graph, node_name_key, previous_step_name) - topological_sort_dfs_helper(graph, previous_node, idx_previous_node, node_name_key, dependencies_key, visited_temporary, visited_permanent, queue) - - visited_temporary[curr_idx] = False - visited_permanent[curr_idx] = True - queue.append(curr_node) - return queue - - -def topological_sort(graph, node_name_key, dependencies_key): - """ - Depth-first search algorithm from wikipedia https://en.wikipedia.org/wiki/Topological_sorting - Logic based on topological sort of directed graph from https://www.geeksforgeeks.org/topological-sorting/ - TODO: finish this docstring l8r - Time complexity = O(V+E), where V = # vertices/nodes (steps), E = # edges (in directed graph, dependencies) - https://www.geeksforgeeks.org/detect-cycle-in-directed-graph-using-topological-sort/?id=discuss = cycle detection : - So, in detail, just do a topological sort and get the queue of the results. Then as you pop from the final queue and - push to your result vector/array, check all the adjacent nodes of the last popped item and if the adjacent node - exists in the vector then it's a cycle (if A goes to B then B should not precede A in the topological ordering). - - - an assumption: no self-loops (they were previously deleted) -- but should detect cycles - in those cases anyway - - pseudocode from wikipedia: - L ← Empty list that will contain the sorted nodes - while exists nodes without a permanent mark do - select an unmarked node n - visit(n) - - function visit(node n) - if n has a permanent mark then - return - if n has a temporary mark then - stop (graph has at least one cycle) - - mark n with a temporary mark - - for each node m with an edge from n to m do - visit(m) - - remove temporary mark from n - mark n with a permanent mark - add n to head of L - """ - num_steps = len(graph) - visited_temporary = [False]*num_steps - visited_permanent = [False]*num_steps - queue = [] # First In First Out - - while not all((element == True) for element in visited_permanent): - curr_idx = visited_permanent.index(False) # extract an index of a node that hasn't been visited yet - curr_node = graph[curr_idx] - #calling recursive helper function - queue = topological_sort_dfs_helper(graph, curr_node, curr_idx, node_name_key, dependencies_key, visited_temporary, visited_permanent, queue) - - return queue - # TODO: for test, can check that there are no duplicates in returned queue - -def generate_ordered_steps_list(steps_with_dependencies_list, step_key, dependencies_key): - """ - Takes in list of steps and reorders based on dependencies, returning a separate copy of - a reordered list. - If impossible to create ordered list (circular dependencies, missing steps, etc.), - throws error or exception. - - :param steps_with_dependencies_list: list of dictionaries, where each dictionary has - at least a step name - :type steps_with_dependencies_list: list[dict] - :param step_key: name of the key corresponding to the step's name – - i.e. attribute referred to by dependency values - :type step_key: str - :param dependencies_key: name of the key corresponding to the dependencies list - :type dependencies_key: str - :return: a copy of the reordered list (if possible) - :rtype: list[dict] - TODO: add errors and exceptions possibly thrown - """ - - # check that all objects in steps_with_dependencies_list have step_key - if not check_presence_of_key(steps_with_dependencies_list, step_key): - raise Exception("All dictionary elements in steps_with_dependencies_list must have attribute \"{0}\"".format(step_key)) - - # TODO: feel like this is overkill, but checking for duplicates in steps_with_dependencies_list? - # is there any case where the exact same step will be defined?? i think gets tricky with sharding maybe? idk - - ### List reordering based on dependencies ### - - names = generate_ordered_step_name_list(steps_with_dependencies_list, step_key) - - ## Preprocessing of dependencies lists - # add dependencies attribute if not present, remove duplicates from dependencies, - # and check for self dependencies - preprocessed_steps_with_dependencies_list = set_dependency_list_values(steps_with_dependencies_list, step_key, dependencies_key, names) - - # import pdb; pdb.set_trace() - ordered_steps_list = topological_sort(preprocessed_steps_with_dependencies_list, step_key, dependencies_key) - - return ordered_steps_list - - # TODO:edge cases: all steps have dependencies (cycle or deleted self-dependency), no steps depending on each other, dependency on self, identical steps \ No newline at end of file +class TopologicalSortHandler(object): + + META_WORKFLOW_DEPENDENCIES_ATTR = "dependencies" + + def __init__(self, meta_workflows_dict): + """ + Constructor method, initialize object and attributes. + Calls method to create graph input for TopologicalSorter from dcicutils + + :param meta_workflows_dict: input dictionary of meta_workflows from MetaWorkflowHandler + :type meta_workflows_dict: dict + """ + # Create graph for TopologicalSorter + self.graph = self._create_topo_sort_graph_input(meta_workflows_dict) + + # Create the sorter itself + self.sorter = TopologicalSorter(self.graph) + + def _create_topo_sort_graph_input(self, meta_workflows_dict): + graph = {} + # the dict is of the form {mwf_name: MetaWorkflowStep object,...} + for mwf_step_name, mwf_step_obj in meta_workflows_dict.items(): + dependencies = getattr(mwf_step_obj, self.META_WORKFLOW_DEPENDENCIES_ATTR) + # if there are dependencies for this step, add to the input graph + if dependencies: + graph[mwf_step_name] = set(dependencies) + else: + graph[mwf_step_name] = {} + return graph + + def sorted_graph_list(self): + sorted_meta_workflows_list = list(self.sorter.static_order()) + return sorted_meta_workflows_list \ No newline at end of file diff --git a/magma/validated_dictionary.py b/magma/validated_dictionary.py index e103309..7e10015 100644 --- a/magma/validated_dictionary.py +++ b/magma/validated_dictionary.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + ################################################ # ValidatedDictionary TODO: eventually make part of dcicutils? ################################################ @@ -32,5 +34,5 @@ def _validate_basic_attributes(self, *attributes_to_check): try: getattr(self, attribute) except AttributeError as e: - raise ValueError("Object validation error, {0}\n" + raise AttributeError("Object validation error, {0}\n" .format(e.args[0])) \ No newline at end of file diff --git a/test/test_metawfl_handler.py b/test/test_metawfl_handler.py index 6ae4fe4..79f175a 100644 --- a/test/test_metawfl_handler.py +++ b/test/test_metawfl_handler.py @@ -1,10 +1,12 @@ +#!/usr/bin/env python3 + ################################################################# # Libraries ################################################################# import pytest from copy import deepcopy -from magma.metawfl_handler import MetaWorkflowStep, MetaWorkflowHandler +from magma.metawfl_handler import MetaWorkflowStep #, MetaWorkflowHandler ################################################################# # Vars @@ -45,7 +47,6 @@ def mwf_with_added_attrs(metaworkflow_dict, items_for_creation_property_trace=No return dict_copy - # meta_workflows arrays for MetaWorkflow Handler # handler without uuid -- fails validation of basic attributes HANDLER_WITHOUT_UUID = { @@ -154,88 +155,352 @@ def mwf_with_added_attrs(metaworkflow_dict, items_for_creation_property_trace=No ################################################################# # Tests ################################################################# - -#TODO: make test for ValidatedDictionary parent object? -# I basically test it through the child classes below - -@pytest.mark.parametrize( - "mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag, num_attributes", - [ - (MWF_A, "sample_processing.samples", None, None, True, 4), - (MWF_B, None, TESTER_UUID, None, False, 4), - (MWF_B, None, TESTER_UUID, DEP_ON_A, True, 5), - (MWF_C, "sample_processing.samples", TESTER_UUID, None, True, 5), # items for creation UUID taken by default - # the following should throw ValueError - (MWF_A, None, None, None, True, None), # missing items for creation - (MWF_A, None, TESTER_UUID, None, None, None) # missing duplication flag - ] -) -def test_attribute_validation_mwf_step(mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag, num_attributes): - try: +class TestMetaWorkflowStep: + @pytest.mark.parametrize( + "mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag, num_attributes", + [ + (MWF_A, "sample_processing.samples", None, None, True, 4), + (MWF_B, None, TESTER_UUID, None, False, 4), + (MWF_B, None, TESTER_UUID, DEP_ON_A, True, 5) + ] + ) + def test_attribute_validation_no_errors(self, mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag, num_attributes): + """ + Tests creation of appropriate MetaWorkflowStep objects, + no errors raised. + """ completed_dict = mwf_with_added_attrs(mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag) meta_workflow_step_object = MetaWorkflowStep(completed_dict) - except ValueError as val_err_info: - assert "validation error" in str(val_err_info) - else: assert num_attributes == len(meta_workflow_step_object.__dict__) - -class TestMetaWorkflowHandler: - def test_attribute_validation_mwf_handler(self): - with pytest.raises(ValueError) as val_err_info: - meta_workflow_handler = MetaWorkflowHandler(HANDLER_WITHOUT_UUID) - assert "validation error" in str(val_err_info) + required_attributes = ["meta_workflow", "name", "duplication_flag"] + for attr in required_attributes: + assert hasattr(meta_workflow_step_object, attr) == True @pytest.mark.parametrize( - "mwf_handler_dict, length_of_mwf_list", + "mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag", [ - (HANDLER_WITHOUT_MWF_ARRAY, 0), # sets empty list if attr not present - (HANDLER_DAG_0, 3), - (HANDLER_DAG_0_W_DUPLICATES, 3) # gets rid of duplicates + (MWF_C, "sample_processing.samples", TESTER_UUID, None, True), # has both uuid and property trace for items for creation + (MWF_A, None, None, None, True), # missing items for creation + (MWF_A, None, TESTER_UUID, None, None) # missing duplication flag ] ) - def test_set_meta_workflows_list(self, mwf_handler_dict, length_of_mwf_list): - meta_workflow_handler = MetaWorkflowHandler(mwf_handler_dict) - assert len(getattr(meta_workflow_handler, "meta_workflows")) == length_of_mwf_list + def test_attribute_validation_attribute_errors(self, mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag): + """ + Tests creation of appropriate MetaWorkflowStep objects, + no errors raised. + """ + with pytest.raises(AttributeError) as attr_err_info: + completed_dict = mwf_with_added_attrs(mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag) + MetaWorkflowStep(completed_dict) + assert "Object validation error" in str(attr_err_info.value) - @pytest.mark.parametrize( - "mwf_handler_dict, reordered_mwf_list", - [ - (HANDLER_WITHOUT_MWF_ARRAY, []), - (HANDLER_DAG_0, REORDERED_MWF_ARRAY_DAG_0), - (HANDLER_DAG_0_W_DUPLICATES, REORDERED_MWF_ARRAY_DAG_0), - (HANDLER_DAG_1, REORDERED_MWF_ARRAY_DAG_1), - (HANDLER_DAG_1_W_SELF_DEP, REORDERED_MWF_ARRAY_DAG_1) - ] - ) - def test_create_ordered_meta_workflows_list(self, mwf_handler_dict, reordered_mwf_list): - meta_workflow_handler = MetaWorkflowHandler(mwf_handler_dict) - assert getattr(meta_workflow_handler, "ordered_meta_workflows") == reordered_mwf_list - @pytest.mark.parametrize( - "mwf_handler_dict", - [ - (HANDLER_CYCLIC_0), - (HANDLER_CYCLIC_1) - ] - ) - def test_cycles(self, mwf_handler_dict): - with pytest.raises(Exception) as exc_info: - meta_workflow_handler = MetaWorkflowHandler(mwf_handler_dict) - assert "Cycle in graph: node" in str(exc_info) +# class TestMetaWorkflowHandler: +# def test_attribute_validation_mwf_handler(self): +# with pytest.raises(ValueError) as val_err_info: +# meta_workflow_handler = MetaWorkflowHandler(HANDLER_WITHOUT_UUID) +# assert "validation error" in str(val_err_info) - @pytest.mark.parametrize( - "mwf_handler_dict", - [ - (HANDLER_WITHOUT_MWF_ARRAY), - (HANDLER_DAG_0), - (HANDLER_DAG_0_W_DUPLICATES), - (HANDLER_DAG_1), - (HANDLER_DAG_1_W_SELF_DEP) - ] - ) - def test_create_ordered_meta_workflow_steps_list(self, mwf_handler_dict): - meta_workflow_handler = MetaWorkflowHandler(mwf_handler_dict) - ordered_meta_workflow_steps = getattr(meta_workflow_handler, "ordered_meta_workflow_steps") - for step in ordered_meta_workflow_steps: - assert isinstance(step, MetaWorkflowStep) +# @pytest.mark.parametrize( +# "mwf_handler_dict, length_of_mwf_list", +# [ +# (HANDLER_WITHOUT_MWF_ARRAY, 0), # sets empty list if attr not present +# (HANDLER_DAG_0, 3), +# (HANDLER_DAG_0_W_DUPLICATES, 3) # gets rid of duplicates +# ] +# ) +# def test_set_meta_workflows_list(self, mwf_handler_dict, length_of_mwf_list): +# meta_workflow_handler = MetaWorkflowHandler(mwf_handler_dict) +# assert len(getattr(meta_workflow_handler, "meta_workflows")) == length_of_mwf_list + +# @pytest.mark.parametrize( +# "mwf_handler_dict, reordered_mwf_list", +# [ +# (HANDLER_WITHOUT_MWF_ARRAY, []), +# (HANDLER_DAG_0, REORDERED_MWF_ARRAY_DAG_0), +# (HANDLER_DAG_0_W_DUPLICATES, REORDERED_MWF_ARRAY_DAG_0), +# (HANDLER_DAG_1, REORDERED_MWF_ARRAY_DAG_1), +# (HANDLER_DAG_1_W_SELF_DEP, REORDERED_MWF_ARRAY_DAG_1) +# ] +# ) +# def test_create_ordered_meta_workflows_list(self, mwf_handler_dict, reordered_mwf_list): +# meta_workflow_handler = MetaWorkflowHandler(mwf_handler_dict) +# assert getattr(meta_workflow_handler, "ordered_meta_workflows") == reordered_mwf_list + +# @pytest.mark.parametrize( +# "mwf_handler_dict", +# [ +# (HANDLER_CYCLIC_0), +# (HANDLER_CYCLIC_1) +# ] +# ) +# def test_cycles(self, mwf_handler_dict): +# with pytest.raises(Exception) as exc_info: +# meta_workflow_handler = MetaWorkflowHandler(mwf_handler_dict) +# assert "Cycle in graph: node" in str(exc_info) + +# @pytest.mark.parametrize( +# "mwf_handler_dict", +# [ +# (HANDLER_WITHOUT_MWF_ARRAY), +# (HANDLER_DAG_0), +# (HANDLER_DAG_0_W_DUPLICATES), +# (HANDLER_DAG_1), +# (HANDLER_DAG_1_W_SELF_DEP) +# ] +# ) +# def test_create_ordered_meta_workflow_steps_list(self, mwf_handler_dict): +# meta_workflow_handler = MetaWorkflowHandler(mwf_handler_dict) +# ordered_meta_workflow_steps = getattr(meta_workflow_handler, "ordered_meta_workflow_steps") +# for step in ordered_meta_workflow_steps: +# assert isinstance(step, MetaWorkflowStep) + + + + + + + +# # dummy class for creating simple objects +# class ClassTester: +# """ +# Class for creation of simple objects, based on an input dictionary +# """ + +# def __init__(self, input_dict): +# """ +# Constructor method, initialize object and attributes. + +# :param input_dict: dictionary defining the basic attributes of object to be created +# :type input_dict: dict +# """ +# for key in input_dict: +# setattr(self, key, input_dict[key]) + +# # TODO: is there a way to functionalize this? +# # input dicts to create ClassTester objects +# INPUT_DICT_SINGLE_SIMPLE_ATTR = {"test_0": 0} +# INPUT_DICT_SINGLE_SIMPLE_ATTR_1 = {"test_1": 0} +# INPUT_DICT_SINGLE_SIMPLE_ATTR_2 = {"test_2": 0} +# INPUT_DICT_SEVERAL_SIMPLE_ATTRS = {"test_0": 0, "test_1": 1, "test_2": 2} +# INPUT_DICT_SINGLE_EMPTY_LIST_ATTR = {"list_empty_0": []} +# INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS = { +# "list_empty_0": [], +# "list_empty_1": [], +# "list_empty_2": [], +# } +# INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR = {"list_simple_0": [1, 2, 3]} +# INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS = { +# "list_simple_0": [1, 2, 3], +# "list_simple_1": ["a", "b", "c"], +# } +# INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR_W_DUP = {"list_simple_0": [1, 2, 3, 4, 3]} +# INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP = { +# "list_simple_0": [1, "a", 3, 3], +# "list_simple_1": ["a", "b", "c"], +# "list_simple_2": ["c", 1, "c"], +# } + +# LIST_OF_EMPTY_DICTS = [INPUT_DICT_SINGLE_EMPTY_LIST_ATTR, INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS] +# LIST_OF_SIMPLE_ATTR_DICTS = [ +# INPUT_DICT_SINGLE_SIMPLE_ATTR, +# INPUT_DICT_SINGLE_SIMPLE_ATTR_1, +# INPUT_DICT_SINGLE_SIMPLE_ATTR_2, +# ] +# LIST_OF_SIMPLE_ATTR_DICTS_REORDERED = [ +# INPUT_DICT_SINGLE_SIMPLE_ATTR, +# INPUT_DICT_SINGLE_SIMPLE_ATTR_2, +# INPUT_DICT_SINGLE_SIMPLE_ATTR_1, +# ] +# LIST_OF_SIMPLE_ATTR_DICTS_W_DUP = [ +# INPUT_DICT_SINGLE_SIMPLE_ATTR, +# INPUT_DICT_SINGLE_SIMPLE_ATTR_1, +# INPUT_DICT_SINGLE_SIMPLE_ATTR, +# INPUT_DICT_SINGLE_SIMPLE_ATTR_2, +# INPUT_DICT_SINGLE_SIMPLE_ATTR, +# INPUT_DICT_SINGLE_SIMPLE_ATTR, +# ] +# LIST_OF_SIMPLE_ATTR_DICTS_W_DUP_2 = [ +# INPUT_DICT_SINGLE_SIMPLE_ATTR, +# INPUT_DICT_SINGLE_SIMPLE_ATTR, +# INPUT_DICT_SINGLE_SIMPLE_ATTR, +# INPUT_DICT_SINGLE_SIMPLE_ATTR_1, +# INPUT_DICT_SINGLE_SIMPLE_ATTR, +# ] +# LIST_OF_SIMPLE_ATTR_DICTS_W_DUP_3 = [ +# INPUT_DICT_SINGLE_SIMPLE_ATTR_2, +# INPUT_DICT_SINGLE_SIMPLE_ATTR_2, +# INPUT_DICT_SINGLE_SIMPLE_ATTR_2, +# ] + +# INPUT_DICT_SINGLE_LIST_OF_DICTS = { +# "list_of_dicts": LIST_OF_SIMPLE_ATTR_DICTS +# } +# INPUT_DICT_SEVERAL_LISTS_OF_DICTS = { +# "list_of_dicts_0": LIST_OF_SIMPLE_ATTR_DICTS, +# "list_of_dicts_1": LIST_OF_SIMPLE_ATTR_DICTS_REORDERED, +# } +# INPUT_DICT_SINGLE_LIST_OF_DICTS_W_DUP = { +# "list_of_dicts": LIST_OF_SIMPLE_ATTR_DICTS_W_DUP +# } +# INPUT_DICT_SEVERAL_LISTS_OF_DICTS_W_DUP = { +# "list_of_dicts_0": LIST_OF_SIMPLE_ATTR_DICTS, +# "list_of_dicts_1": LIST_OF_SIMPLE_ATTR_DICTS_REORDERED, +# "list_of_dicts_2": LIST_OF_SIMPLE_ATTR_DICTS_W_DUP_2, +# "list_of_dicts_3": LIST_OF_SIMPLE_ATTR_DICTS_W_DUP_3 +# } + + +# # ClassTester objects +# CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR = ClassTester(INPUT_DICT_SINGLE_SIMPLE_ATTR) +# CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS = ClassTester(INPUT_DICT_SEVERAL_SIMPLE_ATTRS) +# CLASSTESTER_OBJ_SINGLE_EMPTY_LIST_ATTR = ClassTester(INPUT_DICT_SINGLE_EMPTY_LIST_ATTR) +# CLASSTESTER_OBJ_SEVERAL_EMPTY_LIST_ATTRS = ClassTester(INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS) +# CLASSTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR = ClassTester(INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR) +# CLASSTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS = ClassTester(INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS) +# CLASSTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR_W_DUP = ClassTester( +# INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR_W_DUP +# ) +# CLASSTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP = ClassTester( +# INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP +# ) +# CLASSTESTER_OBJ_SINGLE_LIST_OF_DICTS = ClassTester(INPUT_DICT_SINGLE_LIST_OF_DICTS) +# CLASSTESTER_OBJ_SEVERAL_LISTS_OF_DICTS = ClassTester(INPUT_DICT_SEVERAL_LISTS_OF_DICTS) +# CLASSTESTER_OBJ_SINGLE_LIST_OF_DICTS_W_DUP = ClassTester(INPUT_DICT_SINGLE_LIST_OF_DICTS_W_DUP) +# CLASSTESTER_OBJ_SEVERAL_LISTS_OF_DICTS_W_DUP = ClassTester( +# INPUT_DICT_SEVERAL_LISTS_OF_DICTS_W_DUP +# ) + +# class TestSetUniqueListAttributes: +# @pytest.mark.parametrize( +# "input_object, attributes_to_set", +# [ +# (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, None), +# (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, []), +# (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_0"]), +# (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1"]), +# (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1", "test_2"]), +# (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2"]), +# (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2", "test_1"]), +# ], +# ) +# def test_set_unique_list_attributes_of_existing_nonlist_attributes( +# self, input_object, attributes_to_set +# ): +# """ +# Test for function that gets rid of duplicates within object attributes that are lists, +# or sets attributes to empty list if not present within the object. +# Cases where the attributes to set are existent and are NOT lists, no action done. +# """ +# original_object = deepcopy(input_object) +# result = set_unique_list_attributes(input_object, attributes_to_set) +# assert result is None +# assert vars(input_object) == vars(original_object) # no attributes changed +# #TODO: double check the above "vars" functionality + +# @pytest.mark.parametrize( +# "input_object, attributes_to_set, orig_lengths, reset_lengths", +# [ +# (CLASSTESTER_OBJ_SINGLE_EMPTY_LIST_ATTR, ["list_empty_0"], [0], [0]), +# ( +# CLASSTESTER_OBJ_SEVERAL_EMPTY_LIST_ATTRS, +# ["list_empty_0", "list_empty_1", "list_empty_2"], +# [0, 0, 0], +# [0, 0, 0], +# ), +# (CLASSTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR, ["list_simple_0"], [3], [3]), +# ( +# CLASSTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS, +# ["list_simple_0", "list_simple_1"], +# [3, 3], +# [3, 3], +# ), +# (CLASSTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR_W_DUP, ["list_simple_0"], [5], [4]), +# ( +# CLASSTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP, +# ["list_simple_0", "list_simple_2", "list_simple_1"], +# [4, 3, 3], +# [3, 2, 3], +# ), +# (CLASSTESTER_OBJ_SINGLE_LIST_OF_DICTS, ["list_of_dicts"], [3], [3]), +# ( +# CLASSTESTER_OBJ_SEVERAL_LISTS_OF_DICTS, +# ["list_of_dicts_1", "list_of_dicts_0"], +# [3, 3], +# [3, 3], +# ), +# (CLASSTESTER_OBJ_SINGLE_LIST_OF_DICTS_W_DUP, ["list_of_dicts"], [6], [3]), +# ( +# CLASSTESTER_OBJ_SEVERAL_LISTS_OF_DICTS_W_DUP, +# [ +# "list_of_dicts_1", +# "list_of_dicts_0", +# "list_of_dicts_2", +# "list_of_dicts_3", +# ], +# [3, 3, 5, 3], +# [3, 3, 2, 1], +# ), +# ], +# ) +# def test_set_unique_list_attributes_of_existing_list_attributes( +# self, input_object, attributes_to_set, orig_lengths, reset_lengths +# ): +# """ +# Test for function that gets rid of duplicates within object attributes that are lists, +# or sets attributes to empty list if not present within the object. +# Cases where the attributes to set are existent and are lists. +# """ +# # import pdb; pdb.set_trace() +# # check original length of attributes_to_set +# for idx, attribute in enumerate(attributes_to_set): +# assert len(getattr(input_object, attribute)) == orig_lengths[idx] + +# result = set_unique_list_attributes(input_object, attributes_to_set) + +# # check length of "reset" attributes_to_set +# for idx, attribute in enumerate(attributes_to_set): +# assert len(getattr(input_object, attribute)) == reset_lengths[idx] + +# assert result is None + +# @pytest.mark.parametrize( +# "input_object, attributes_to_set, num_added_attributes", +# [ +# (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_0"], 0), +# (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_1"], 1), +# (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_2", "test_3"], 1), +# (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_5", "test_0", "test_4"], 2), +# (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2"], 0), +# (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2", "test_1"], 0), +# ], +# ) +# def test_set_unique_list_attributes_of_nonexistent_attributes( +# self, input_object, attributes_to_set, num_added_attributes +# ): +# """ +# Test for function that gets rid of duplicates within object attributes that are lists, +# or sets attributes to empty list if not present within the object. +# Cases where the attributes to set are nonexistent, so they are added with the value []. +# """ +# # TODO: this changes the objects permanently since I'm setting attrs +# # but I don't think this will affect further testing (specifically, fourth example) + +# original_attributes_set = set(dir(input_object)) +# num_original_attributes = len(original_attributes_set) + +# result = set_unique_list_attributes(input_object, attributes_to_set) +# assert result is None + +# reset_attributes_set = set(dir(input_object)) +# num_reset_attributes = len(reset_attributes_set) + +# assert num_added_attributes == (num_reset_attributes - num_original_attributes) + +# added_attributes = reset_attributes_set.difference(original_attributes_set) +# for attribute in added_attributes: +# assert attribute in attributes_to_set +# assert getattr(input_object, attribute) == [] + +# # TODO: add a test for mixed cases? (nonexistent + lists + empties, etc.) \ No newline at end of file diff --git a/test/test_topological_sort.py b/test/test_topological_sort.py index 4b5b2da..ef68810 100644 --- a/test/test_topological_sort.py +++ b/test/test_topological_sort.py @@ -5,65 +5,44 @@ ################################################################# import pytest -from magma.topological_sort import * +from magma.metawfl_handler import MetaWorkflowStep +from magma.topological_sort import TopologicalSortHandler +from dcicutils.misc_utils import CycleError ################################################################# # Vars ################################################################# -INPUT_DICT_SINGLE_EMPTY_LIST_ATTR = {"list_empty_0": []} -INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS = { - "list_empty_0": [], - "list_empty_1": [], - "list_empty_2": [], -} -LIST_OF_DICTS_EMPTY_LIST_ATTR = [INPUT_DICT_SINGLE_EMPTY_LIST_ATTR, INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS] +A_name = "A" +B_name = "B" +C_name = "C" +D_name = "D" +E_name = "E" # of the form [mwf_uuid, mwf_name] -# used for factory fixture to generate lists of dicts (steps with dependencies array) -MWF_A = ["test_mwf_uuid_0", "A"] -MWF_B = ["test_mwf_uuid_1", "B"] -MWF_C = ["test_mwf_uuid_2", "C"] -MWF_D = ["test_mwf_uuid_3", "D"] -MWF_E = ["test_mwf_uuid_4", "E"] -MWF_F = ["test_mwf_uuid_5", "F"] -MWF_G = ["test_mwf_uuid_6", "G"] -MWF_H = ["test_mwf_uuid_7", "H"] -MWF_I = ["test_mwf_uuid_8", "I"] -MWF_J = ["test_mwf_uuid_9", "J"] - -A = ["A"] -B = ["B"] -C = ["C"] -D = ["D"] -E = ["E"] -F = ["F"] -G = ["G"] -H = ["H"] -I = ["I"] -J = ["J"] +# used for factory (defined below) to generate lists of dicts (steps with dependencies array) +MWF_A = ["test_mwf_uuid_0", A_name] +MWF_B = ["test_mwf_uuid_1", B_name] +MWF_C = ["test_mwf_uuid_2", C_name] +MWF_D = ["test_mwf_uuid_3", D_name] +MWF_E = ["test_mwf_uuid_4", E_name] + +A = [A_name] +B = [B_name] +C = [C_name] +D = [D_name] +E = [E_name] DEP_ON_A = [A] DEP_ON_B = [B] DEP_ON_C = [C] DEP_ON_D = [D] DEP_ON_E = [E] -DEP_ON_F = [F] -DEP_ON_G = [G] -DEP_ON_H = [H] -DEP_ON_I = [I] -DEP_ON_J = [J] DEP_EMPTY = [[]] -EXISTING_MWF_NAMES = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"] - THREE_MWF = [MWF_A, MWF_B, MWF_C] FOUR_MWF = [MWF_A, MWF_B, MWF_C, MWF_D] FIVE_MWF = [MWF_A, MWF_B, MWF_C, MWF_D, MWF_E] -SIX_MWF = [MWF_A, MWF_B, MWF_C, MWF_D, MWF_E, MWF_F] -SEVEN_MWF = [MWF_A, MWF_B, MWF_C, MWF_D, MWF_E, MWF_F, MWF_G] -EIGHT_MWF = [MWF_A, MWF_B, MWF_C, MWF_D, MWF_E, MWF_F, MWF_G, MWF_H] -TEN_MWF = [MWF_A, MWF_B, MWF_C, MWF_D, MWF_E, MWF_F, MWF_G, MWF_H, MWF_I, MWF_J] #TODO: add docstring of what this does -- for constructing testing graphs @@ -83,28 +62,20 @@ def meta_workflow_dict(simple_mwf_metadata_list): } if len(simple_mwf_metadata_list) == 3: mwf_dict["dependencies"] = simple_mwf_metadata_list[2] + + # just to be able to create MetaWorkflowStep objects without error + mwf_dict["items_for_creation_uuid"] = "foo" + mwf_dict["duplication_flag"] = False return mwf_dict -@pytest.fixture -def list_of_dicts(): - def _create_list(array_of_mwf): - created_list = [] - for simple_mwf_metadata_list in array_of_mwf: - created_list.append(meta_workflow_dict(simple_mwf_metadata_list)) - return created_list - return _create_list +def create_input_meta_workflows_dict(array_of_mwf): + input_meta_workflows_dict = {} + for mwf in array_of_mwf: + mwf_dictionary = meta_workflow_dict(mwf) + mwf_name = mwf_dictionary["name"] + input_meta_workflows_dict[mwf_name] = MetaWorkflowStep(mwf_dictionary) + return input_meta_workflows_dict -# had to make this fixture because can't use set() method to -# de-duplicate a list of dicts (an unhashable type) -@pytest.fixture -def non_duplicated_array(): - def _non_duplicate_array_creation(arr): - non_dup_arr = [] - for item in arr: - if item not in non_dup_arr: - non_dup_arr.append(item) - return non_dup_arr - return _non_duplicate_array_creation #TODO: dawg idk how to draw these # DAGs (directed acyclic graphs, can be typologically sorted) @@ -128,62 +99,6 @@ def _non_duplicate_array_creation(arr): DEPENDENCIES_DAG_1 = [[B+C], DEP_EMPTY, DEP_EMPTY, [A+B+C]] DAG_1 = construct_array_of_mwf(FOUR_MWF, DEPENDENCIES_DAG_1) -# DAG_2 -# E -----> C -# | | -# | | -# ⋁ ⋁ -# A -----> D -# ⋀ | -# | | -# | ⋁ -# F -----> B -DEPENDENCIES_DAG_2 = [[E+F], [D+F], DEP_ON_E, DEP_ON_C, DEP_EMPTY, DEP_EMPTY] -DAG_2 = construct_array_of_mwf(SIX_MWF, DEPENDENCIES_DAG_2) - -# DAG_3 -# A -----> C ------> F -# | / | -# | / | -# ⋁ ⋁ ⋁ -# B ---------> E ---> D -# \ ⋀ -# \ __________/ -# ⋁ / -# G -DEPENDENCIES_DAG_3 = [DEP_EMPTY, DEP_ON_A, DEP_ON_A, [G+E+F], [B+F], DEP_ON_C, DEP_ON_B] -DAG_3 = construct_array_of_mwf(SEVEN_MWF, DEPENDENCIES_DAG_3) - -# DAG_4 -# A ----> C ----> F -# ⋀ -# / -# / -# B ------> D -----> G ----> H -# \ ⋀ -# \ __________/ -# ⋁ / -# E -DEPENDENCIES_DAG_4 = [DEP_EMPTY, DEP_EMPTY, DEP_ON_A, DEP_ON_B, DEP_ON_B, [C+D], [E+D], DEP_ON_G] -DAG_4 = construct_array_of_mwf(EIGHT_MWF, DEPENDENCIES_DAG_4) - -# DAG_5 -# A -----> B -----> E -# | | -# | ⋁ -# | D -# | -> F -# ⋁ / -# C -----> H -# | \ J -# | \ ⋀ -# | \ | -# | ---> G -----> I -# | ⋀ -# |___________________| -DEPENDENCIES_DAG_5 = [DEP_EMPTY, DEP_ON_A, DEP_ON_A, DEP_ON_B, DEP_ON_B, DEP_ON_C, DEP_ON_C, DEP_ON_C, [C+G], DEP_ON_G] -DAG_5 = construct_array_of_mwf(TEN_MWF, DEPENDENCIES_DAG_5) - # Cyclic graphs, cannot be typologically sorted # ---------------------------------------------- @@ -197,392 +112,54 @@ def _non_duplicate_array_creation(arr): CYCLIC_0 = construct_array_of_mwf(FOUR_MWF, DEPENDENCIES_CYCLIC_0) # CYCLIC_1 -# A -----> B -# ⋀ | -# | | -# | ⋁ -# D <----- C -DEPENDENCIES_CYCLIC_1 = [DEP_ON_D, DEP_ON_A, DEP_ON_B, DEP_ON_C] -CYCLIC_1 = construct_array_of_mwf(FOUR_MWF, DEPENDENCIES_CYCLIC_1) - -# CYCLIC_2 # A -----> B ----> E # ⋀ | ⋀ | # | | \____| # | ⋁ # D <----- C -DEPENDENCIES_CYCLIC_2 = [DEP_ON_D, [A+E], DEP_ON_B, DEP_ON_C, DEP_ON_B] -CYCLIC_2 = construct_array_of_mwf(FIVE_MWF, DEPENDENCIES_CYCLIC_2) - -# CYCLIC_3 -# B -----> A -----> D -# ⋀ | ⋀ | -# | | | | -# | | | ⋁ -# C <----- ------- E -DEPENDENCIES_CYCLIC_3 = [[B+E], DEP_ON_C, DEP_ON_A, DEP_ON_A, DEP_ON_D] -CYCLIC_3 = construct_array_of_mwf(FIVE_MWF, DEPENDENCIES_CYCLIC_3) - -# CYCLIC_4 -# A -----> B -----> E -# | | -# | ⋁ -# | D -# | -> F -# ⋁ / -# C -----> H -# ⋀ \ J -# | \ ⋀ -# | \ | -# | ---> G -----> I -# | | -# |___________________| -DEPENDENCIES_CYCLIC_4 = [DEP_EMPTY, DEP_ON_A, [A+I], DEP_ON_B, DEP_ON_B, DEP_ON_C, DEP_ON_C, DEP_ON_C, DEP_ON_G, DEP_ON_G] -CYCLIC_4 = construct_array_of_mwf(TEN_MWF, DEPENDENCIES_CYCLIC_4) +DEPENDENCIES_CYCLIC_1 = [DEP_ON_D, [A+E], DEP_ON_B, DEP_ON_C, DEP_ON_B] +CYCLIC_1 = construct_array_of_mwf(FIVE_MWF, DEPENDENCIES_CYCLIC_1) ################################################################# # Tests ################################################################# - -class TestCheckPresenceOfKey: - @pytest.mark.parametrize( - "empty_list_of_dicts, key_to_check, expected_result", - [ - ([], None, True), - ([], "key", True), # kind of weird edge case, but not a biggie (TODO:) - ( - LIST_OF_DICTS_EMPTY_LIST_ATTR, - "list_empty_0", - True, - ) - ], - ) - def test_check_presence_of_key_empty_dicts( - self, empty_list_of_dicts, key_to_check, expected_result - ): - """ - Test for function checking that all dictionaries in a given list have the - specified key, no errors raised, with empty list or list of empty dicts. - """ - result = check_presence_of_key(empty_list_of_dicts, key_to_check) - assert result == expected_result - - @pytest.mark.parametrize( - "array_of_mwf, key_to_check, expected_result", - [ - ( - TEN_MWF, - "name", - True, - ), - ( - THREE_MWF, - "meta_workflow", - True, - ), - ( - [MWF_J, MWF_I, MWF_H], - "hi", - False, - ) - ], - ) - def test_check_presence_of_key( - self, list_of_dicts, array_of_mwf, key_to_check, expected_result - ): - """ - Test for function checking that all dictionaries in a given list have the - specified key, no errors raised, regular cases. - """ - dict_list = list_of_dicts(array_of_mwf) - result = check_presence_of_key(dict_list, key_to_check) - assert result == expected_result - result2 = check_presence_of_key(dict_list + LIST_OF_DICTS_EMPTY_LIST_ATTR, "list_empty_0") - assert result2 == False - - -@pytest.mark.parametrize( - "array_of_mwf, expected_result", - [ - ( - TEN_MWF, - EXISTING_MWF_NAMES - ), - ( - [MWF_B, MWF_E, MWF_I, MWF_A], - ["B", "E", "I", "A"] # B+E+I+A - ), - ( - [], - [] - ) - ], -) -def test_generate_ordered_step_name_list( - list_of_dicts, array_of_mwf, expected_result -): - """ - Test for function creating a list of values for a given key, - using a list of dictionaries. - """ - dict_list = list_of_dicts(array_of_mwf) - result = generate_ordered_step_name_list(dict_list, "name") - assert result == expected_result - - -#TODO: will be generalizing this function later -#TODO: use your new tester helper function for constructing array_of_mwf -class TestSetDependencyListValues: - @pytest.mark.parametrize( - "array_of_mwf, orig_dependencies, reset_dependencies", - [ - ( - # no changes made - [MWF_A + DEP_ON_C, MWF_B + DEP_ON_A, MWF_C + DEP_ON_A], - [C, A, A], - [C, A, A] - ), - ( - # get rid of dependency on nonexistent step - [MWF_A + DEP_ON_C, MWF_B + DEP_ON_A, MWF_C + DEP_ON_D], - [C, A, D], - [C, A, []] - ), - ( - # get rid of dependency on nonexistent steps - [MWF_A + DEP_ON_G, MWF_B + DEP_ON_A, MWF_C + DEP_ON_D], - [G, A, D], - [[], A, []] - ), - ( - # get rid of self-dependencies - [MWF_A + [B + A], MWF_B + DEP_ON_A], - [B + A, A], - [B, A] - ), - ( - # get rid of duplicate dependencies - [MWF_A + DEP_ON_C, MWF_B + DEP_ON_A, MWF_C + [A + C + A]], - [C, A, A + C + A], - [C, A, A] - ), - ( - # no dependencies = no change, just set dependencies to empty list - [MWF_A + DEP_EMPTY], - DEP_EMPTY, - DEP_EMPTY - ) - ], - ) - def test_set_dependency_list_values_of_existing_dependencies( - self, list_of_dicts, array_of_mwf, orig_dependencies, reset_dependencies - ): - """ - Test for function that gets rid of duplicates within object attributes that are lists, - or sets attributes to empty list if not present within the object. - Cases where the dependency lists are existent. - """ - orig_dict_list = list_of_dicts(array_of_mwf) - existing_step_names = generate_ordered_step_name_list(orig_dict_list, "name") - - reset_dict_list = set_dependency_list_values(orig_dict_list, "name", "dependencies", existing_step_names) - - for idx, dictionary in enumerate(orig_dict_list): - assert dictionary["dependencies"] == orig_dependencies[idx] - - for idx, dictionary in enumerate(reset_dict_list): - assert dictionary["dependencies"] == reset_dependencies[idx] - - - # TODO: dependencies originally not there --> create new dependencies list - # non-list dependencies - # case of [] no dicts at all +class TestTopologicalSortHandler: @pytest.mark.parametrize( - "array_of_mwf, idx_without_dependencies", + "array_of_mwf, input_graph_to_topological_sort", [ - (TEN_MWF, [*range(0, 10)]), - ([MWF_A + ["hi"], MWF_B + DEP_ON_A], [0]), - ([MWF_A + ["hi"], MWF_B], [0, 1]) + (DAG_0, {A_name: {}, B_name: {}, C_name: {B_name}}), + (DAG_1, {A_name: {B_name, C_name}, B_name: {}, C_name: {}, D_name: {A_name, B_name, C_name}}), + (CYCLIC_0, {A_name: {}, B_name: {D_name}, C_name: {B_name}, D_name: {C_name}}) ], ) - def test_set_dependency_list_values_of_non_existing_dependencies( - self, list_of_dicts, array_of_mwf, idx_without_dependencies - ): - """ - Test for function that gets rid of duplicates within object attributes that are lists, - or sets attributes to empty list if not present within the object. - Cases where the dependency lists are non-existent or not of type list, - so fxn should either set to empty list (non-existent dependencies) or - raise Key Error when dependencies is not of type list. - """ - orig_dict_list = list_of_dicts(array_of_mwf) - existing_step_names = generate_ordered_step_name_list(orig_dict_list, "name") - - reset_dict_list = set_dependency_list_values(orig_dict_list, "name", "dependencies", existing_step_names) - - for idx in idx_without_dependencies: - try: - dependencies_value = orig_dict_list[idx]["dependencies"] - assert isinstance(dependencies_value, list) == False - except KeyError: - pass # dicts at these indices originally didn't have dependencies attr - - # and assert that they were reset - assert reset_dict_list[idx]["dependencies"] == [] - - #TODO: add a test with a mix of the above two? or just assume it works (it does) - -@pytest.mark.parametrize( - "array_of_mwf, name_to_search, expected_step, expected_index", - [ - ([MWF_A, MWF_B, MWF_C], "test_mwf_uuid_1", MWF_B, 1), - ([MWF_A, MWF_B, MWF_C], "test_mwf_uuid_6", MWF_B, 1) # raises Exception - ], -) -def test_find_step_with_given_name( - list_of_dicts, array_of_mwf, name_to_search, expected_step, expected_index -): - try: - steps_with_dependencies = list_of_dicts(array_of_mwf) - index, step = find_step_with_given_name(steps_with_dependencies, "meta_workflow", name_to_search) - except Exception as exception_info: - assert f"Node named {name_to_search} is a nonexistent step" == str(exception_info) - else: - assert index == expected_index - assert step == meta_workflow_dict(expected_step) + def test_create_topo_sort_graph_input(self, array_of_mwf, input_graph_to_topological_sort): + # TODO: could make these next two lines a fxn because i reuse over and over + input_mwf_dict = create_input_meta_workflows_dict(array_of_mwf) + sorter = TopologicalSortHandler(input_mwf_dict) + assert sorter.graph == input_graph_to_topological_sort - -class TestTopologicalSortDFSHelper: @pytest.mark.parametrize( - "dag_array_of_mwf, starting_idx, expected_queue_by_index", + "array_of_mwf, possible_sorted_lists", [ - (DAG_0, 0, [0]), - (DAG_0, 1, [1]), - (DAG_0, 2, [1, 2]), - (DAG_1, 0, [1, 2, 0]), - (DAG_1, 3, [1, 2, 0, 3]), - (DAG_3, 0, [0]), - (DAG_3, 4, [0, 1, 2, 5, 4]), - (CYCLIC_0, 0, [0]) # won't detect cycles in disconnected graphs, but overall toposort will + (DAG_0, [[A_name, B_name, C_name]]), + (DAG_1, [[B_name, C_name, A_name, D_name], [C_name, B_name, A_name, D_name]]) ], ) - def test_topological_sort_helper_no_cycles( - self, list_of_dicts, non_duplicated_array, dag_array_of_mwf, starting_idx, expected_queue_by_index - ): - graph = list_of_dicts(dag_array_of_mwf) - starting_node = graph[starting_idx] - starting_queue = [] - - # TODO: make this a fixture? - length = len(graph) - visited_temporary = [False]*length - visited_permanent = [False]*length - - #TODO: also make this a fixture? - expected_queue = [] - expected_visited_permanent = [False]*length - for i in expected_queue_by_index: - expected_queue.append(graph[i]) - expected_visited_permanent[i] = True - - #TODO: make global constants NAME and DEPENDENCIES keys - resulting_queue = topological_sort_dfs_helper(graph, starting_node, starting_idx, "name", "dependencies", visited_temporary, visited_permanent, starting_queue) - assert resulting_queue == expected_queue - assert visited_permanent == expected_visited_permanent - - # check that there are no duplicates in returned queue - non_dup_resulting_queue = non_duplicated_array(resulting_queue) - assert resulting_queue == non_dup_resulting_queue - + def test_sorted_graph_list(self, array_of_mwf, possible_sorted_lists): + input_mwf_dict = create_input_meta_workflows_dict(array_of_mwf) + sorter = TopologicalSortHandler(input_mwf_dict) + assert sorter.sorted_graph_list() in possible_sorted_lists @pytest.mark.parametrize( - "cyclic_graph_array_of_mwf, starting_idx, node_at_cycle_detection", + "array_of_mwf", [ - (CYCLIC_0, 1, "B"), - (CYCLIC_2, 0, "A"), # just illustrating the nature of DFS w CYCLIC_2 - (CYCLIC_2, 1, "B"), - (CYCLIC_2, 4, "B"), - (CYCLIC_3, 4, "A"), - (CYCLIC_4, 6, "G") # same here + (CYCLIC_0), (CYCLIC_1) ], ) - def test_topological_sort_helper_cycles( - self, list_of_dicts, cyclic_graph_array_of_mwf, starting_idx, node_at_cycle_detection - ): - graph = list_of_dicts(cyclic_graph_array_of_mwf) - starting_node = graph[starting_idx] - starting_queue = [] - - # TODO: make this a fixture? (same as prior test, also follow toposort tests) - length = len(graph) - visited_temporary = [False]*length - visited_permanent = [False]*length - - #TODO: make global constants NAME and DEPENDENCIES keys - with pytest.raises(Exception) as exception_info: - topological_sort_dfs_helper(graph, starting_node, starting_idx, "name", "dependencies", visited_temporary, visited_permanent, starting_queue) - assert f"Cycle in graph: node {node_at_cycle_detection}" in str(exception_info.value) - -# TODO: if you make topological sort a class, you can test that visited_permanent is all True -class TestTopologicalSort: - @pytest.mark.parametrize( - "dag_array_of_mwf, expected_queue_by_index", - [ - # TODO: illustrate with different starting indices, to show that - # there exist several valid orderings, based on DFS beginning node - # may make new DAGs w same dependencies but different ordering of the array - (DAG_0, [0, 1, 2]), - (DAG_1, [1, 2, 0, 3]), - (DAG_2, [4, 5, 0, 2, 3, 1]), - (DAG_3, [0, 1, 2, 6, 5, 4, 3]), - (DAG_4, [0, 1, 2, 3, 4, 5, 6, 7]), - (DAG_5, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) - ], - ) - def test_topological_sort_no_cycles( - self, list_of_dicts, non_duplicated_array, dag_array_of_mwf, expected_queue_by_index - ): - graph = list_of_dicts(dag_array_of_mwf) - - # TODO: make this a fixture? (same as above tests) - length = len(graph) - - #TODO: also make this a fixture? - expected_queue = [] - for i in expected_queue_by_index: - expected_queue.append(graph[i]) - - #TODO: make global constants NAME and DEPENDENCIES keys - resulting_queue = topological_sort(graph, "name", "dependencies") - assert resulting_queue == expected_queue - - # assert that all nodes have indeed been visited - #TODO: add this when toposort has been made a class - # expected_visited_permanent = [True]*length - # assert visited_permanent == expected_visited_permanent - - # check that there are no duplicates in returned queue - non_dup_resulting_queue = non_duplicated_array(resulting_queue) - assert resulting_queue == non_dup_resulting_queue - - #TODO: again - maybe rearrange cyclic graph nodes to show it works in whatever order - @pytest.mark.parametrize( - "cyclic_graph_array_of_mwf, node_at_cycle_detection", - [ - (CYCLIC_0, "B"), - (CYCLIC_1, "A"), - (CYCLIC_2, "A"), - (CYCLIC_3, "A"), - (CYCLIC_4, "C") - ], - ) - def test_topological_sort_cycles( - self, list_of_dicts, cyclic_graph_array_of_mwf, node_at_cycle_detection - ): - graph = list_of_dicts(cyclic_graph_array_of_mwf) - - #TODO: make global constants NAME and DEPENDENCIES keys - with pytest.raises(Exception) as exception_info: - topological_sort(graph, "name", "dependencies") - assert f"Cycle in graph: node {node_at_cycle_detection}" in str(exception_info.value) \ No newline at end of file + def test_sorted_graph_list(self, array_of_mwf): + with pytest.raises(CycleError) as cycle_err_info: + input_mwf_dict = create_input_meta_workflows_dict(array_of_mwf) + sorter = TopologicalSortHandler(input_mwf_dict) + sorter.sorted_graph_list() + assert "nodes are in a cycle" in str(cycle_err_info.value) \ No newline at end of file diff --git a/test/test_validated_dictionary.py b/test/test_validated_dictionary.py index 58307ad..97a10bc 100644 --- a/test/test_validated_dictionary.py +++ b/test/test_validated_dictionary.py @@ -73,11 +73,11 @@ def test_validate_basic_attributes_no_errors(self, validated_dictionary_object, (EXTENSIVE_VALIDATED_DICT, ("attr_0", "attr_1", "not_present", "attr_2")) ] ) - def test_validate_basic_attributes_value_errors(self, validated_dictionary_object, attributes_to_check): + def test_validate_basic_attributes_attribute_errors(self, validated_dictionary_object, attributes_to_check): """ Test for function checking that specified attributes are part of a given ValidatedDictionary object, - ValueError raised. + AttributeError raised. """ - with pytest.raises(ValueError) as value_err_info: + with pytest.raises(AttributeError) as attr_err_info: validated_dictionary_object._validate_basic_attributes(*attributes_to_check) - assert "Object validation error" in str(value_err_info.value) \ No newline at end of file + assert "Object validation error" in str(attr_err_info.value) \ No newline at end of file From c6a0e7fc8cd5e5752fe164b30180f3b80907b7eb Mon Sep 17 00:00:00 2001 From: vstevensf Date: Fri, 20 Jan 2023 08:31:43 -0500 Subject: [PATCH 20/38] Draft of MWF Handler, without creation of MWFR Handler --- magma/metawfl_handler.py | 134 ++++++++++++++++++---------------- magma/topological_sort.py | 2 - test/test_topological_sort.py | 2 +- 3 files changed, 74 insertions(+), 64 deletions(-) diff --git a/magma/metawfl_handler.py b/magma/metawfl_handler.py index 543079f..e98a39f 100644 --- a/magma/metawfl_handler.py +++ b/magma/metawfl_handler.py @@ -3,8 +3,10 @@ ################################################ # Libraries ################################################ +from copy import deepcopy + from magma.validated_dictionary import ValidatedDictionary -# from magma.topological_sort import generate_ordered_steps_list +from magma.topological_sort import TopologicalSortHandler ################################################ # MetaWorkflowStep @@ -54,71 +56,81 @@ def _validate_basic_attributes(self, *list_of_attributes): raise AttributeError("Object validation error, 'MetaWorkflowStep' object cannot have both of the following attributes: 'items_for_creation_property_trace' and 'items_for_creation_uuid'") -# ################################################ -# # MetaWorkflowHandler -# ################################################ -# class MetaWorkflowHandler(ValidatedDictionary): -# """ -# Class representing a MetaWorkflow Handler object, -# a list of MetaWorkflows with specified dependencies -# """ - -# UUID_ATTR = "uuid" -# META_WORKFLOWS_ATTR = "meta_workflows" -# META_WORKFLOW_NAME_ATTR = "name" -# META_WORKFLOW_DEPENDENCIES_ATTR = "dependencies" - -# def __init__(self, input_dict): -# """ -# Constructor method, initialize object and attributes. - -# :param input_dict: MetaWorkflow Handler object, defined by json file from portal -# :type input_dict: dict -# """ -# ### Basic attributes ### -# super.__init__(input_dict) +################################################ +# MetaWorkflowHandler +################################################ +class MetaWorkflowHandler(ValidatedDictionary): + """ + Class representing a MetaWorkflow Handler object, + a list of MetaWorkflows with specified dependencies + """ + + UUID_ATTR = "uuid" + META_WORKFLOWS_ATTR = "meta_workflows" + META_WORKFLOW_NAME_ATTR = "name" + META_WORKFLOW_DEPENDENCIES_ATTR = "dependencies" + + def __init__(self, input_dict): + """ + Constructor method, initialize object and attributes. + + :param input_dict: MetaWorkflow Handler object, defined by json file from portal + :type input_dict: dict + """ + ### Basic attributes ### + super().__init__(input_dict) -# self._validate_basic_attributes(self.UUID_ATTR) + super()._validate_basic_attributes(self.UUID_ATTR) -# ### Calculated attributes ### -# # to check for non-existent meta_workflows attribute -# # if present, get rid of duplicates (by MetaWorkflow name) -# self._set_meta_workflows_list() + ### Calculated attributes ### + # set meta_workflows attribute + self._set_meta_workflows_dict() -# # order the meta_workflows list based on dependencies -# self.ordered_meta_workflows = self._create_ordered_meta_workflows_list() + # order the meta_workflows list based on dependencies TODO: use setattr instead? + self.ordered_meta_workflows = self._create_ordered_meta_workflows_list() # # using ordered metaworkflows list, create a list of objects using class MetaWorkflowStep # # this validates basic attributes needed for each metaworkflow step # self.ordered_meta_workflow_steps = self._create_meta_workflow_step_objects() -# def _set_meta_workflows_list(self): -# """ -# Checks for meta_workflows attribute, -# sets as empty list if not present, -# else gets rid of duplicates (by metaworkflow name) -# """ -# if not hasattr(self, self.META_WORKFLOWS_ATTR): -# # if not present, set attribute as empty list -# setattr(self, self.META_WORKFLOWS_ATTR, []) -# else: -# attrib = getattr(self, self.META_WORKFLOWS_ATTR) - -# # then get rid of duplicates, if present -# # non_dup_attrib = [] -# # for item in attrib: -# # if item not in non_dup_attrib: -# # non_dup_attrib.append(item) -# # setattr(self, self.META_WORKFLOWS_ATTR, non_dup_attrib) - -# def _create_ordered_meta_workflows_list(self): -# return generate_ordered_steps_list(self.meta_workflows, self.META_WORKFLOW_NAME_ATTR, self.META_WORKFLOW_DEPENDENCIES_ATTR) - -# def _create_meta_workflow_step_objects(self): -# meta_workflow_step_list = [] -# for meta_workflow in self.ordered_meta_workflows: -# meta_workflow_step_object = MetaWorkflowStep(meta_workflow) -# meta_workflow_step_list.append(meta_workflow_step_object) -# return meta_workflow_step_list - -# #TODO: check that there are no duplictes in ordered metaworkflows -- does this throw error or nah? TBD. + def _set_meta_workflows_dict(self): + """ + Checks for meta_workflows attribute. + + If nonexistent, set as an empty dictionary + If present, copy that list temporarily and redefine as a dictionary + of the form {meta_workflow_name: meta_workflow_step,....} + getting rid of duplicates in the process (by MetaWorkflow name) + # TODO: this method doesn't allow for metaworkflows of the same name + # to be utilized in the same handler, even if they have distinct dependencies + # check if this is disastrous lol + + :return: None, if all MetaWorkflowSteps are created successfully + """ + if not hasattr(self, self.META_WORKFLOWS_ATTR): + # if not present, set attribute as empty dictionary + setattr(self, self.META_WORKFLOWS_ATTR, {}) + else: + orig_mwf_list_copy = deepcopy(getattr(self, self.META_WORKFLOWS_ATTR)) + + temp_mwf_step_dict = {} + + for mwf in orig_mwf_list_copy: + # create MetaWorkflowStep object for this metaworkflow + mwf_step = MetaWorkflowStep(mwf) + + # then add to the meta_workflows dictionary + # of the form {mwf["name"]: MetaWorkflowStep(mwf)} + temp_mwf_step_dict.setdefault(mwf["name"], mwf_step) + + # reset the "meta_workflows" attribute as an empty dictionary (rather than array) + setattr(self, self.META_WORKFLOWS_ATTR, temp_mwf_step_dict) + + def _create_ordered_meta_workflows_list(self): + # create "graph" that will be passed into the topological sorter + # graph = self._create_topo_sort_graph() + meta_workflows_dict = getattr(self, self.META_WORKFLOWS_ATTR) + sorter = TopologicalSortHandler(meta_workflows_dict) + + # # now topologically sort the steps + return sorter.sorted_graph_list() diff --git a/magma/topological_sort.py b/magma/topological_sort.py index 7cdb784..ae1a7f1 100644 --- a/magma/topological_sort.py +++ b/magma/topological_sort.py @@ -3,8 +3,6 @@ ################################################ # Libraries ################################################ -from copy import deepcopy - from dcicutils.misc_utils import TopologicalSorter ################################################ diff --git a/test/test_topological_sort.py b/test/test_topological_sort.py index ef68810..fd02fde 100644 --- a/test/test_topological_sort.py +++ b/test/test_topological_sort.py @@ -142,7 +142,7 @@ def test_create_topo_sort_graph_input(self, array_of_mwf, input_graph_to_topolog @pytest.mark.parametrize( "array_of_mwf, possible_sorted_lists", [ - (DAG_0, [[A_name, B_name, C_name]]), + (DAG_0, [[A_name, B_name, C_name], [B_name, A_name, C_name], [B_name, C_name, A_name]]), (DAG_1, [[B_name, C_name, A_name, D_name], [C_name, B_name, A_name, D_name]]) ], ) From 6cb41ac0be833c1b06960ec002447a214185abc3 Mon Sep 17 00:00:00 2001 From: vstevensf Date: Mon, 6 Feb 2023 07:59:13 -0500 Subject: [PATCH 21/38] Further edits to basic handler classes includes addition of custom Exception classes --- magma/metawfl_handler.py | 93 ++++++-- magma/metawflrun_handler.py | 179 ++++++++++++++ test/test_metawfl_handler.py | 402 +++++++------------------------- test/test_metawflrun_handler.py | 312 +++++++++++++++++++++++++ 4 files changed, 659 insertions(+), 327 deletions(-) create mode 100644 magma/metawflrun_handler.py create mode 100644 test/test_metawflrun_handler.py diff --git a/magma/metawfl_handler.py b/magma/metawfl_handler.py index e98a39f..7565d3b 100644 --- a/magma/metawfl_handler.py +++ b/magma/metawfl_handler.py @@ -7,6 +7,20 @@ from magma.validated_dictionary import ValidatedDictionary from magma.topological_sort import TopologicalSortHandler +from dcicutils.misc_utils import CycleError + +################################################ +# Custom Exception classes +################################################ +class MetaWorkflowStepCycleError(CycleError): + """Custom exception for cycle error tracking.""" + pass + +class MetaWorkflowStepDuplicateError(ValueError): + pass + +class MetaWorkflowStepSelfDependencyError(ValueError): + pass ################################################ # MetaWorkflowStep @@ -19,6 +33,7 @@ class MetaWorkflowStep(ValidatedDictionary): META_WORKFLOW_ATTR = "meta_workflow" NAME_ATTR = "name" + DEPENDENCIES_ATTR = "dependencies" DUP_FLAG_ATTR = "duplication_flag" ITEMS_CREATION_PROP_TRACE = "items_for_creation_property_trace" ITEMS_CREATION_UUID = "items_for_creation_uuid" @@ -35,6 +50,8 @@ def __init__(self, input_dict): # Validate presence of basic attributes of this MetaWorkflow step self._validate_basic_attributes(self.META_WORKFLOW_ATTR, self.NAME_ATTR, self.DUP_FLAG_ATTR) + self._check_self_dependency() + def _validate_basic_attributes(self, *list_of_attributes): """ Validation of the JSON input for the MetaWorkflow step. @@ -44,6 +61,8 @@ def _validate_basic_attributes(self, *list_of_attributes): # str, must be unique TODO: name filling in ff try: # set None for [default] arg to not throw AttributeError + #TODO: move the differentiation with property trace to FF + # and just handle creation uuids here if not getattr(self, self.ITEMS_CREATION_UUID, None): getattr(self, self.ITEMS_CREATION_PROP_TRACE) except AttributeError as e: @@ -55,6 +74,13 @@ def _validate_basic_attributes(self, *list_of_attributes): if hasattr(self, self.ITEMS_CREATION_PROP_TRACE) and hasattr(self, self.ITEMS_CREATION_UUID): raise AttributeError("Object validation error, 'MetaWorkflowStep' object cannot have both of the following attributes: 'items_for_creation_property_trace' and 'items_for_creation_uuid'") + def _check_self_dependency(self): + if hasattr(self, self.DEPENDENCIES_ATTR): + dependencies = getattr(self, self.DEPENDENCIES_ATTR) + for dependency in dependencies: + if dependency == getattr(self, self.NAME_ATTR): + raise MetaWorkflowStepSelfDependencyError(f'"{dependency}" has a self dependency.') + ################################################ # MetaWorkflowHandler @@ -84,15 +110,15 @@ def __init__(self, input_dict): ### Calculated attributes ### # set meta_workflows attribute + # TODO: is this redefinition into a dictionary allowed? + # or should I just make a new attribute? I dunno how this would affect json in portal + # except maybe in patching self._set_meta_workflows_dict() - # order the meta_workflows list based on dependencies TODO: use setattr instead? + # order the meta_workflows list based on dependencies + # this ordered list is what's used to create the array of mwf runs in Run handler self.ordered_meta_workflows = self._create_ordered_meta_workflows_list() -# # using ordered metaworkflows list, create a list of objects using class MetaWorkflowStep -# # this validates basic attributes needed for each metaworkflow step -# self.ordered_meta_workflow_steps = self._create_meta_workflow_step_objects() - def _set_meta_workflows_dict(self): """ Checks for meta_workflows attribute. @@ -101,9 +127,6 @@ def _set_meta_workflows_dict(self): If present, copy that list temporarily and redefine as a dictionary of the form {meta_workflow_name: meta_workflow_step,....} getting rid of duplicates in the process (by MetaWorkflow name) - # TODO: this method doesn't allow for metaworkflows of the same name - # to be utilized in the same handler, even if they have distinct dependencies - # check if this is disastrous lol :return: None, if all MetaWorkflowSteps are created successfully """ @@ -121,16 +144,58 @@ def _set_meta_workflows_dict(self): # then add to the meta_workflows dictionary # of the form {mwf["name"]: MetaWorkflowStep(mwf)} - temp_mwf_step_dict.setdefault(mwf["name"], mwf_step) + if temp_mwf_step_dict.setdefault(mwf["name"], mwf_step) != mwf_step: + raise MetaWorkflowStepDuplicateError(f'"{mwf["name"]}" is a duplicate MetaWorkflow, all MetaWorkflow names must be unique.') # reset the "meta_workflows" attribute as an empty dictionary (rather than array) setattr(self, self.META_WORKFLOWS_ATTR, temp_mwf_step_dict) def _create_ordered_meta_workflows_list(self): - # create "graph" that will be passed into the topological sorter - # graph = self._create_topo_sort_graph() meta_workflows_dict = getattr(self, self.META_WORKFLOWS_ATTR) - sorter = TopologicalSortHandler(meta_workflows_dict) - # # now topologically sort the steps - return sorter.sorted_graph_list() + try: + # create "graph" that will be passed into the topological sorter + sorter = TopologicalSortHandler(meta_workflows_dict) + # now topologically sort the steps + return sorter.sorted_graph_list() + except CycleError: + raise MetaWorkflowStepCycleError() + + # def create_meta_workflow_run_handler_input_dict(self, associated_item_uuid): + # """ + # Completes attributes and other metadata for Metaworkflow Run Handler + + # TODO: should assoc item be a param? because it is a required mwfr handler attr for CGAP + + # :param associated_item_uuid: + # :type associated_item_uuid: str + # :return: input dictionary (JSON?? TODO:) to create MetaWorkflowRunHandler object + # :rtype: dict + # """ + + # #TODO: when should i do the check on the duplication flag? + + # #TODO: use getattr with constants rather than self references + + # input_dict = { + # #TODO: what's commented out will be taken care of in create_metawfr_handler.py + # # "project": self.project, + # # "institution": self.institution, + # #TODO: can you use a mwf handler from one proj/inst to + # # create a handler with a different proj/inst? + # "meta_workflow_handler": self.uuid, # assuming it's already converted from uuid to str? + # "associated_item": associated_item_uuid, + # "final_status": "pending", #TODO: make this a constant + # "meta_workflow_runs": [] + # } + + # # this goes stepwise, in order, and accessing mwf metadata through corresponding dict lookup by name + # for meta_workflow_name in self.ordered_meta_workflows: + # meta_workflow_run_dict = {} + # meta_workflow_run_dict.setdefault("name", meta_workflow_name) + # meta_workflow_run_dict.setdefault("items_for_creation", self.meta_workflows[meta_workflow_name].ITEMS_CREATION_UUID) + # meta_workflow_run_dict.setdefault("dependencies", self.meta_workflows[meta_workflow_name].dependencies) + # meta_workflow_run_dict.setdefault("status", "pending") #TODO: constant here + # #TODO: meta_workflow_run uuid taken care of in ff creation of run handler + + # input_dict["meta_workflow_runs"].append(meta_workflow_run_dict) diff --git a/magma/metawflrun_handler.py b/magma/metawflrun_handler.py new file mode 100644 index 0000000..7954c6e --- /dev/null +++ b/magma/metawflrun_handler.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python3 + +################################################ +# Libraries +################################################ +from magma.validated_dictionary import ValidatedDictionary + +################################################ +# MetaWorkflowRunStep +################################################ +class MetaWorkflowRunStep(ValidatedDictionary): + """ + Class to represent a MetaWorkflow Run object, + as a step within a MetaWorkflow Run Handler object. + Assumption that this is based on ordered_meta_workflows list + from a MetaWorfklow Handler. + """ + + NAME_ATTR = "name" # name of metaworkflow corresponding to the metaworkflow run + STATUS_ATTR = "status" + DEP_ATTR = "dependencies" + MWF_RUN_ATTR = "meta_workflow_run" #TODO: used within the handler itself + # ITEMS_CREATION_ATTR = "items_for_creation" #TODO: do this embedding in ff. BUT. make req? + + def __init__(self, input_dict): + """ + Constructor method, initialize object and attributes. + + :param input_dict: a MetaWorkflow step (object) and accompanying info within handler, defined by json file + :type input_dict: dict + """ + super().__init__(input_dict) + + # for automatically setting initial status to "pending", unless explicitly defined not to + if not hasattr(self, self.STATUS_ATTR): + setattr(self, self.STATUS_ATTR, "pending") + + # Validate presence of basic attributes of this MetaWorkflow step + self._validate_basic_attributes(self.NAME_ATTR, self.DEP_ATTR) + +################################################ +# MetaWorkflowRunHandler +################################################ +#TODO: what about associated item on the MWF handler itself, not just items for creation? +class MetaWorkflowRunHandler(ValidatedDictionary): + """ + Class representing a MetaWorkflowRun Handler object, + a list of MetaWorkflowsRuns with specified dependencies, + and their status + """ + + UUID_ATTR = "uuid" + ASSOCIATED_METAWORKFLOW_HANDLER_ATTR = "meta_workflow_handler" + META_WORKFLOW_RUNS_ATTR = "meta_workflow_runs" + FINAL_STATUS_ATTR = "final_status" + + def __init__(self, input_dict): + """ + Constructor method, initialize object and attributes. + + :param input_dict: MetaWorkflow Handler Run object + :type input_dict: dict + """ + + ### Basic attributes ### + + super().__init__(input_dict) + + self._validate_basic_attributes(self.UUID_ATTR, self.ASSOCIATED_METAWORKFLOW_HANDLER_ATTR, self.META_WORKFLOW_RUNS_ATTR) + + # initial final status attribute upon creation + setattr(self, self.FINAL_STATUS_ATTR, "pending") + + ### Calculated attributes ### + + # by nature of how a MetaWorkflowRun Handler is created from the MetaWorkflow Handler, + # the array "meta_workflow_runs" will already be in some valid topologically sorted order + #(based on topologically sorted list "meta_workflows" in the regular handler) + # here, though, we create a dictionary of the form {mwf_name: MetaWorkflowRunStep_object,...} + # for faster lookup and updating of steps + self.meta_workflow_run_step_dict = self._create_meta_workflow_run_step_objects() + + + def _create_meta_workflow_run_step_objects(self): + meta_workflow_run_step_dict = {} + for meta_workflow_run in self.meta_workflow_runs: + meta_workflow_run_step_object = MetaWorkflowRunStep(meta_workflow_run) + step_name = meta_workflow_run["name"] + meta_workflow_run_step_dict[step_name] = meta_workflow_run_step_object + return meta_workflow_run_step_dict + + # to update final_status attribute of the handler + def update_final_status(self): + """ + Check status for all MetaWorkflowRunStep objects. + Initial final status = pending + If a step is running and none others have failed or stopped, final_status = running + If all steps are completed, final_status = completed + If a step has failed, final_status = failed + If a step has been stopped, final_status = stopped + + :return: final_status + :rtype: str + """ + # options for mwf runs: pending, running, completed, failed, stopped + # TODO: additional final_status possibilities from mwf run schema --> inactive, quality metric failed (how to handle these??) + # TODO: use setattr method consistently + + all_steps_completed = True + + for meta_workflow_run_step in self.meta_workflow_run_step_dict.values(): + if meta_workflow_run_step.status != "completed": + all_steps_completed = False + if meta_workflow_run_step.status == "running": + setattr(self, self.FINAL_STATUS_ATTR, "running") + elif meta_workflow_run_step.status == "failed": + setattr(self, self.FINAL_STATUS_ATTR, "failed") + break + elif meta_workflow_run_step.status == "stopped": + setattr(self, self.FINAL_STATUS_ATTR, "stopped") + break + + # if all the steps were successfully completed + if all_steps_completed: + setattr(self, self.FINAL_STATUS_ATTR, "completed") + + # the following allows for resetting a MetaWorkflow Run Step + # this can happen only when the duplication flag is set to True + def reset_meta_workflow_run_step(self, meta_workflow_run_name): + """ + Resets status and meta_workflow_run attributes of a MetaWorkflowRunStep, given its name + + :param meta_workflow_run_name: name attribute of a MetaWorkflowRunStep + :type meta_workflow_run_name: str + """ + try: + step_obj = self.meta_workflow_run_step_dict[meta_workflow_run_name] + # Reset the status of the MetaWorkflow Run + setattr(step_obj, step_obj.STATUS_ATTR, "pending") + # Remove and reset the attribute for the LinkTo to the corresponding MetaWorkflow Run + setattr(step_obj, step_obj.MWF_RUN_ATTR, None) + except KeyError as key_err: + raise KeyError("{0} is not a valid MetaWorkflowRun Step name.\n" + .format(key_err.args[0])) + + # this is a more generalized version of the above + # this is for redefining any attribute of a MetaWorkflow Run Step + def update_meta_workflow_run_step(self, meta_workflow_run_name, attribute, value): + try: + step_obj = self.meta_workflow_run_step_dict[meta_workflow_run_name] + # Reset the given attribute + setattr(step_obj, attribute, value) + except KeyError as key_err: + raise KeyError("{0} is not a valid MetaWorkflowRun Step name.\n" + .format(key_err.args[0])) + + def pending_steps(self): + """ + returns a list of pending steps (by name) + if no more pending, return empty list + """ + pending_steps_list = [] + + for meta_workflow_run_step in self.meta_workflow_runs: + step_name = meta_workflow_run_step["name"] + #TODO: make pending a global var + if self.meta_workflow_run_step_dict[step_name].status == "pending": + pending_steps_list.append(step_name) + + return pending_steps_list + + def running_steps(self): + running_steps_list = [] + for meta_workflow_run in self.meta_workflow_runs: + associated_meta_workflow_name = meta_workflow_run["name"] + if self.meta_workflow_run_step_dict[associated_meta_workflow_name].status == "running": + running_steps_list.append(associated_meta_workflow_name) + + return running_steps_list \ No newline at end of file diff --git a/test/test_metawfl_handler.py b/test/test_metawfl_handler.py index 79f175a..4345e03 100644 --- a/test/test_metawfl_handler.py +++ b/test/test_metawfl_handler.py @@ -6,7 +6,9 @@ import pytest from copy import deepcopy -from magma.metawfl_handler import MetaWorkflowStep #, MetaWorkflowHandler +from magma.metawfl_handler import * + +#TODO: throw error on self dependency ################################################################# # Vars @@ -49,14 +51,14 @@ def mwf_with_added_attrs(metaworkflow_dict, items_for_creation_property_trace=No # meta_workflows arrays for MetaWorkflow Handler # handler without uuid -- fails validation of basic attributes -HANDLER_WITHOUT_UUID = { +HANDLER_WITHOUT_UUID_DICT = { "name": MWF_HANDLER_NAME, "project": PROJECT, "institution": INSTITUTION } # handler without metaworkflows array -- passes validation, should set empty metaworkflows array -HANDLER_WITHOUT_MWF_ARRAY = { +HANDLER_WITHOUT_MWF_ARRAY_DICT = { "name": MWF_HANDLER_NAME, "project": PROJECT, "institution": INSTITUTION, @@ -67,6 +69,7 @@ def mwf_with_added_attrs(metaworkflow_dict, items_for_creation_property_trace=No # A B -----> C MWF_A_DAG_0 = mwf_with_added_attrs(MWF_A, None, TESTER_UUID, [], True) MWF_B_DAG_0 = mwf_with_added_attrs(MWF_B, None, TESTER_UUID, [], True) +MWF_B_DAG_0_W_DEP = mwf_with_added_attrs(MWF_B, None, TESTER_UUID, DEP_ON_A, True) MWF_C_DAG_0 = mwf_with_added_attrs(MWF_C, None, TESTER_UUID, DEP_ON_B, True) DAG_0_MWF_ARRAY = [MWF_B_DAG_0, MWF_A_DAG_0, MWF_C_DAG_0] # purposely in this order to test toposort HANDLER_DAG_0 = { @@ -84,7 +87,15 @@ def mwf_with_added_attrs(metaworkflow_dict, items_for_creation_property_trace=No "uuid": MWF_HANDLER_UUID, "meta_workflows": DAG_0_MWF_ARRAY_W_DUPLICATES } -REORDERED_MWF_ARRAY_DAG_0 = [MWF_B_DAG_0, MWF_A_DAG_0, MWF_C_DAG_0] +DAG_0_MWF_ARRAY_W_DUPLICATES_BY_MWF_NAME = [MWF_B_DAG_0, MWF_A_DAG_0, MWF_C_DAG_0, MWF_B_DAG_0_W_DEP] +HANDLER_DAG_0_W_DUPLICATES_BY_MWF_NAME = { + "name": MWF_HANDLER_NAME, + "project": PROJECT, + "institution": INSTITUTION, + "uuid": MWF_HANDLER_UUID, + "meta_workflows": DAG_0_MWF_ARRAY_W_DUPLICATES_BY_MWF_NAME +} +REORDERED_MWFS_DAG_0 = [["A", "B", "C"], ["B", "A", "C"], ["B", "C", "A"]] # DAG_1 # B -----> D @@ -104,16 +115,7 @@ def mwf_with_added_attrs(metaworkflow_dict, items_for_creation_property_trace=No "uuid": MWF_HANDLER_UUID, "meta_workflows": DAG_1_MWF_ARRAY } -MWF_B_DAG_1_W_SELF_DEP = mwf_with_added_attrs(MWF_B, None, TESTER_UUID, DEP_ON_B, True) -DAG_1_MWF_ARRAY_W_SELF_DEP = [MWF_A_DAG_1, MWF_B_DAG_1_W_SELF_DEP, MWF_C_DAG_1, MWF_D_DAG_1] -HANDLER_DAG_1_W_SELF_DEP = { - "name": MWF_HANDLER_NAME, - "project": PROJECT, - "institution": INSTITUTION, - "uuid": MWF_HANDLER_UUID, - "meta_workflows": DAG_1_MWF_ARRAY_W_SELF_DEP -} -REORDERED_MWF_ARRAY_DAG_1 = [MWF_B_DAG_1, MWF_C_DAG_1, MWF_A_DAG_1, MWF_D_DAG_1] +REORDERED_MWFS_DAG_1 = [["B", "C", "A", "D"], ["C", "B", "A", "D"]] # CYCLIC_0 # A B__ @@ -195,312 +197,86 @@ def test_attribute_validation_attribute_errors(self, mwf_step_dict, items_for_cr MetaWorkflowStep(completed_dict) assert "Object validation error" in str(attr_err_info.value) + @pytest.mark.parametrize( + "mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag", + [ + (MWF_A, None, TESTER_UUID, DEP_ON_A, True) + ] + ) + def test_check_self_dep(self, mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag): + """ + """ + with pytest.raises(MetaWorkflowStepSelfDependencyError) as self_dep_err_err_info: + completed_dict = mwf_with_added_attrs(mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag) + MetaWorkflowStep(completed_dict) + assert mwf_step_dict["name"] in str(self_dep_err_err_info.value) -# class TestMetaWorkflowHandler: -# def test_attribute_validation_mwf_handler(self): -# with pytest.raises(ValueError) as val_err_info: -# meta_workflow_handler = MetaWorkflowHandler(HANDLER_WITHOUT_UUID) -# assert "validation error" in str(val_err_info) - -# @pytest.mark.parametrize( -# "mwf_handler_dict, length_of_mwf_list", -# [ -# (HANDLER_WITHOUT_MWF_ARRAY, 0), # sets empty list if attr not present -# (HANDLER_DAG_0, 3), -# (HANDLER_DAG_0_W_DUPLICATES, 3) # gets rid of duplicates -# ] -# ) -# def test_set_meta_workflows_list(self, mwf_handler_dict, length_of_mwf_list): -# meta_workflow_handler = MetaWorkflowHandler(mwf_handler_dict) -# assert len(getattr(meta_workflow_handler, "meta_workflows")) == length_of_mwf_list - -# @pytest.mark.parametrize( -# "mwf_handler_dict, reordered_mwf_list", -# [ -# (HANDLER_WITHOUT_MWF_ARRAY, []), -# (HANDLER_DAG_0, REORDERED_MWF_ARRAY_DAG_0), -# (HANDLER_DAG_0_W_DUPLICATES, REORDERED_MWF_ARRAY_DAG_0), -# (HANDLER_DAG_1, REORDERED_MWF_ARRAY_DAG_1), -# (HANDLER_DAG_1_W_SELF_DEP, REORDERED_MWF_ARRAY_DAG_1) -# ] -# ) -# def test_create_ordered_meta_workflows_list(self, mwf_handler_dict, reordered_mwf_list): -# meta_workflow_handler = MetaWorkflowHandler(mwf_handler_dict) -# assert getattr(meta_workflow_handler, "ordered_meta_workflows") == reordered_mwf_list - -# @pytest.mark.parametrize( -# "mwf_handler_dict", -# [ -# (HANDLER_CYCLIC_0), -# (HANDLER_CYCLIC_1) -# ] -# ) -# def test_cycles(self, mwf_handler_dict): -# with pytest.raises(Exception) as exc_info: -# meta_workflow_handler = MetaWorkflowHandler(mwf_handler_dict) -# assert "Cycle in graph: node" in str(exc_info) - -# @pytest.mark.parametrize( -# "mwf_handler_dict", -# [ -# (HANDLER_WITHOUT_MWF_ARRAY), -# (HANDLER_DAG_0), -# (HANDLER_DAG_0_W_DUPLICATES), -# (HANDLER_DAG_1), -# (HANDLER_DAG_1_W_SELF_DEP) -# ] -# ) -# def test_create_ordered_meta_workflow_steps_list(self, mwf_handler_dict): -# meta_workflow_handler = MetaWorkflowHandler(mwf_handler_dict) -# ordered_meta_workflow_steps = getattr(meta_workflow_handler, "ordered_meta_workflow_steps") -# for step in ordered_meta_workflow_steps: -# assert isinstance(step, MetaWorkflowStep) - - - - - - - -# # dummy class for creating simple objects -# class ClassTester: -# """ -# Class for creation of simple objects, based on an input dictionary -# """ - -# def __init__(self, input_dict): -# """ -# Constructor method, initialize object and attributes. - -# :param input_dict: dictionary defining the basic attributes of object to be created -# :type input_dict: dict -# """ -# for key in input_dict: -# setattr(self, key, input_dict[key]) - -# # TODO: is there a way to functionalize this? -# # input dicts to create ClassTester objects -# INPUT_DICT_SINGLE_SIMPLE_ATTR = {"test_0": 0} -# INPUT_DICT_SINGLE_SIMPLE_ATTR_1 = {"test_1": 0} -# INPUT_DICT_SINGLE_SIMPLE_ATTR_2 = {"test_2": 0} -# INPUT_DICT_SEVERAL_SIMPLE_ATTRS = {"test_0": 0, "test_1": 1, "test_2": 2} -# INPUT_DICT_SINGLE_EMPTY_LIST_ATTR = {"list_empty_0": []} -# INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS = { -# "list_empty_0": [], -# "list_empty_1": [], -# "list_empty_2": [], -# } -# INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR = {"list_simple_0": [1, 2, 3]} -# INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS = { -# "list_simple_0": [1, 2, 3], -# "list_simple_1": ["a", "b", "c"], -# } -# INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR_W_DUP = {"list_simple_0": [1, 2, 3, 4, 3]} -# INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP = { -# "list_simple_0": [1, "a", 3, 3], -# "list_simple_1": ["a", "b", "c"], -# "list_simple_2": ["c", 1, "c"], -# } - -# LIST_OF_EMPTY_DICTS = [INPUT_DICT_SINGLE_EMPTY_LIST_ATTR, INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS] -# LIST_OF_SIMPLE_ATTR_DICTS = [ -# INPUT_DICT_SINGLE_SIMPLE_ATTR, -# INPUT_DICT_SINGLE_SIMPLE_ATTR_1, -# INPUT_DICT_SINGLE_SIMPLE_ATTR_2, -# ] -# LIST_OF_SIMPLE_ATTR_DICTS_REORDERED = [ -# INPUT_DICT_SINGLE_SIMPLE_ATTR, -# INPUT_DICT_SINGLE_SIMPLE_ATTR_2, -# INPUT_DICT_SINGLE_SIMPLE_ATTR_1, -# ] -# LIST_OF_SIMPLE_ATTR_DICTS_W_DUP = [ -# INPUT_DICT_SINGLE_SIMPLE_ATTR, -# INPUT_DICT_SINGLE_SIMPLE_ATTR_1, -# INPUT_DICT_SINGLE_SIMPLE_ATTR, -# INPUT_DICT_SINGLE_SIMPLE_ATTR_2, -# INPUT_DICT_SINGLE_SIMPLE_ATTR, -# INPUT_DICT_SINGLE_SIMPLE_ATTR, -# ] -# LIST_OF_SIMPLE_ATTR_DICTS_W_DUP_2 = [ -# INPUT_DICT_SINGLE_SIMPLE_ATTR, -# INPUT_DICT_SINGLE_SIMPLE_ATTR, -# INPUT_DICT_SINGLE_SIMPLE_ATTR, -# INPUT_DICT_SINGLE_SIMPLE_ATTR_1, -# INPUT_DICT_SINGLE_SIMPLE_ATTR, -# ] -# LIST_OF_SIMPLE_ATTR_DICTS_W_DUP_3 = [ -# INPUT_DICT_SINGLE_SIMPLE_ATTR_2, -# INPUT_DICT_SINGLE_SIMPLE_ATTR_2, -# INPUT_DICT_SINGLE_SIMPLE_ATTR_2, -# ] - -# INPUT_DICT_SINGLE_LIST_OF_DICTS = { -# "list_of_dicts": LIST_OF_SIMPLE_ATTR_DICTS -# } -# INPUT_DICT_SEVERAL_LISTS_OF_DICTS = { -# "list_of_dicts_0": LIST_OF_SIMPLE_ATTR_DICTS, -# "list_of_dicts_1": LIST_OF_SIMPLE_ATTR_DICTS_REORDERED, -# } -# INPUT_DICT_SINGLE_LIST_OF_DICTS_W_DUP = { -# "list_of_dicts": LIST_OF_SIMPLE_ATTR_DICTS_W_DUP -# } -# INPUT_DICT_SEVERAL_LISTS_OF_DICTS_W_DUP = { -# "list_of_dicts_0": LIST_OF_SIMPLE_ATTR_DICTS, -# "list_of_dicts_1": LIST_OF_SIMPLE_ATTR_DICTS_REORDERED, -# "list_of_dicts_2": LIST_OF_SIMPLE_ATTR_DICTS_W_DUP_2, -# "list_of_dicts_3": LIST_OF_SIMPLE_ATTR_DICTS_W_DUP_3 -# } - - -# # ClassTester objects -# CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR = ClassTester(INPUT_DICT_SINGLE_SIMPLE_ATTR) -# CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS = ClassTester(INPUT_DICT_SEVERAL_SIMPLE_ATTRS) -# CLASSTESTER_OBJ_SINGLE_EMPTY_LIST_ATTR = ClassTester(INPUT_DICT_SINGLE_EMPTY_LIST_ATTR) -# CLASSTESTER_OBJ_SEVERAL_EMPTY_LIST_ATTRS = ClassTester(INPUT_DICT_SEVERAL_EMPTY_LIST_ATTRS) -# CLASSTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR = ClassTester(INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR) -# CLASSTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS = ClassTester(INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS) -# CLASSTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR_W_DUP = ClassTester( -# INPUT_DICT_SINGLE_SIMPLE_LIST_ATTR_W_DUP -# ) -# CLASSTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP = ClassTester( -# INPUT_DICT_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP -# ) -# CLASSTESTER_OBJ_SINGLE_LIST_OF_DICTS = ClassTester(INPUT_DICT_SINGLE_LIST_OF_DICTS) -# CLASSTESTER_OBJ_SEVERAL_LISTS_OF_DICTS = ClassTester(INPUT_DICT_SEVERAL_LISTS_OF_DICTS) -# CLASSTESTER_OBJ_SINGLE_LIST_OF_DICTS_W_DUP = ClassTester(INPUT_DICT_SINGLE_LIST_OF_DICTS_W_DUP) -# CLASSTESTER_OBJ_SEVERAL_LISTS_OF_DICTS_W_DUP = ClassTester( -# INPUT_DICT_SEVERAL_LISTS_OF_DICTS_W_DUP -# ) - -# class TestSetUniqueListAttributes: -# @pytest.mark.parametrize( -# "input_object, attributes_to_set", -# [ -# (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, None), -# (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, []), -# (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_0"]), -# (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1"]), -# (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_1", "test_2"]), -# (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2"]), -# (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2", "test_1"]), -# ], -# ) -# def test_set_unique_list_attributes_of_existing_nonlist_attributes( -# self, input_object, attributes_to_set -# ): -# """ -# Test for function that gets rid of duplicates within object attributes that are lists, -# or sets attributes to empty list if not present within the object. -# Cases where the attributes to set are existent and are NOT lists, no action done. -# """ -# original_object = deepcopy(input_object) -# result = set_unique_list_attributes(input_object, attributes_to_set) -# assert result is None -# assert vars(input_object) == vars(original_object) # no attributes changed -# #TODO: double check the above "vars" functionality - -# @pytest.mark.parametrize( -# "input_object, attributes_to_set, orig_lengths, reset_lengths", -# [ -# (CLASSTESTER_OBJ_SINGLE_EMPTY_LIST_ATTR, ["list_empty_0"], [0], [0]), -# ( -# CLASSTESTER_OBJ_SEVERAL_EMPTY_LIST_ATTRS, -# ["list_empty_0", "list_empty_1", "list_empty_2"], -# [0, 0, 0], -# [0, 0, 0], -# ), -# (CLASSTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR, ["list_simple_0"], [3], [3]), -# ( -# CLASSTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS, -# ["list_simple_0", "list_simple_1"], -# [3, 3], -# [3, 3], -# ), -# (CLASSTESTER_OBJ_SINGLE_SIMPLE_LIST_ATTR_W_DUP, ["list_simple_0"], [5], [4]), -# ( -# CLASSTESTER_OBJ_SEVERAL_SIMPLE_LIST_ATTRS_W_DUP, -# ["list_simple_0", "list_simple_2", "list_simple_1"], -# [4, 3, 3], -# [3, 2, 3], -# ), -# (CLASSTESTER_OBJ_SINGLE_LIST_OF_DICTS, ["list_of_dicts"], [3], [3]), -# ( -# CLASSTESTER_OBJ_SEVERAL_LISTS_OF_DICTS, -# ["list_of_dicts_1", "list_of_dicts_0"], -# [3, 3], -# [3, 3], -# ), -# (CLASSTESTER_OBJ_SINGLE_LIST_OF_DICTS_W_DUP, ["list_of_dicts"], [6], [3]), -# ( -# CLASSTESTER_OBJ_SEVERAL_LISTS_OF_DICTS_W_DUP, -# [ -# "list_of_dicts_1", -# "list_of_dicts_0", -# "list_of_dicts_2", -# "list_of_dicts_3", -# ], -# [3, 3, 5, 3], -# [3, 3, 2, 1], -# ), -# ], -# ) -# def test_set_unique_list_attributes_of_existing_list_attributes( -# self, input_object, attributes_to_set, orig_lengths, reset_lengths -# ): -# """ -# Test for function that gets rid of duplicates within object attributes that are lists, -# or sets attributes to empty list if not present within the object. -# Cases where the attributes to set are existent and are lists. -# """ -# # import pdb; pdb.set_trace() -# # check original length of attributes_to_set -# for idx, attribute in enumerate(attributes_to_set): -# assert len(getattr(input_object, attribute)) == orig_lengths[idx] - -# result = set_unique_list_attributes(input_object, attributes_to_set) -# # check length of "reset" attributes_to_set -# for idx, attribute in enumerate(attributes_to_set): -# assert len(getattr(input_object, attribute)) == reset_lengths[idx] +class TestMetaWorkflowHandler: + @pytest.mark.parametrize( + "mwf_handler_dict", + [(HANDLER_WITHOUT_UUID_DICT), (HANDLER_WITHOUT_MWF_ARRAY_DICT)] + ) + def test_attribute_validation_mwf_handler(self, mwf_handler_dict): + try: + handler_obj = MetaWorkflowHandler(mwf_handler_dict) + except AttributeError as attr_err_info: + assert "Object validation error" in str(attr_err_info) + else: + assert hasattr(handler_obj, "uuid") == True -# assert result is None + -# @pytest.mark.parametrize( -# "input_object, attributes_to_set, num_added_attributes", -# [ -# (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_0"], 0), -# (CLASSTESTER_OBJ_SINGLE_SIMPLE_ATTR, ["test_1"], 1), -# (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_2", "test_3"], 1), -# (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_5", "test_0", "test_4"], 2), -# (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2"], 0), -# (CLASSTESTER_OBJ_SEVERAL_SIMPLE_ATTRS, ["test_0", "test_2", "test_1"], 0), -# ], -# ) -# def test_set_unique_list_attributes_of_nonexistent_attributes( -# self, input_object, attributes_to_set, num_added_attributes -# ): -# """ -# Test for function that gets rid of duplicates within object attributes that are lists, -# or sets attributes to empty list if not present within the object. -# Cases where the attributes to set are nonexistent, so they are added with the value []. -# """ -# # TODO: this changes the objects permanently since I'm setting attrs -# # but I don't think this will affect further testing (specifically, fourth example) + @pytest.mark.parametrize( + "mwf_handler_dict, length_of_mwf_dict", + [ + (HANDLER_WITHOUT_MWF_ARRAY_DICT, 0), # sets empty dict if attr not present + (HANDLER_DAG_0, 3), + ] + ) + def test_set_meta_workflows_dict(self, mwf_handler_dict, length_of_mwf_dict): + meta_workflow_handler = MetaWorkflowHandler(mwf_handler_dict) + assert len(getattr(meta_workflow_handler, "meta_workflows")) == length_of_mwf_dict -# original_attributes_set = set(dir(input_object)) -# num_original_attributes = len(original_attributes_set) + meta_workflow_steps_dict = getattr(meta_workflow_handler, "meta_workflows") + assert isinstance(meta_workflow_steps_dict, dict) + for step in meta_workflow_steps_dict.values(): + assert isinstance(step, MetaWorkflowStep) -# result = set_unique_list_attributes(input_object, attributes_to_set) -# assert result is None + @pytest.mark.parametrize( + "mwf_handler_dict", + [ + (HANDLER_DAG_0_W_DUPLICATES), # complete duplicates + (HANDLER_DAG_0_W_DUPLICATES_BY_MWF_NAME) # duplicates by mwf name + ] + ) + def test_set_meta_workflows_dict_w_error(self, mwf_handler_dict): + with pytest.raises(MetaWorkflowStepDuplicateError) as dup_err_info: + MetaWorkflowHandler(mwf_handler_dict) + assert '"B" is a duplicate MetaWorkflow' in str(dup_err_info) -# reset_attributes_set = set(dir(input_object)) -# num_reset_attributes = len(reset_attributes_set) -# assert num_added_attributes == (num_reset_attributes - num_original_attributes) + @pytest.mark.parametrize( + "mwf_handler_dict, possible_reordered_mwf_lists", + [ + (HANDLER_WITHOUT_MWF_ARRAY_DICT, [[]]), + (HANDLER_DAG_0, REORDERED_MWFS_DAG_0), + (HANDLER_DAG_1, REORDERED_MWFS_DAG_1) + ] + ) + def test_create_ordered_meta_workflows_list(self, mwf_handler_dict, possible_reordered_mwf_lists): + meta_workflow_handler = MetaWorkflowHandler(mwf_handler_dict) + assert getattr(meta_workflow_handler, "ordered_meta_workflows") in possible_reordered_mwf_lists -# added_attributes = reset_attributes_set.difference(original_attributes_set) -# for attribute in added_attributes: -# assert attribute in attributes_to_set -# assert getattr(input_object, attribute) == [] -# # TODO: add a test for mixed cases? (nonexistent + lists + empties, etc.) \ No newline at end of file + @pytest.mark.parametrize( + "mwf_handler_dict", + [ + (HANDLER_CYCLIC_0), + (HANDLER_CYCLIC_1) + ] + ) + def test_cycles(self, mwf_handler_dict): + with pytest.raises(MetaWorkflowStepCycleError) as cycle_err_info: + MetaWorkflowHandler(mwf_handler_dict) + assert "nodes are in a cycle" in str(cycle_err_info) \ No newline at end of file diff --git a/test/test_metawflrun_handler.py b/test/test_metawflrun_handler.py new file mode 100644 index 0000000..a2c24c0 --- /dev/null +++ b/test/test_metawflrun_handler.py @@ -0,0 +1,312 @@ +#!/usr/bin/env python3 + +################################################################# +# Libraries +################################################################# +import pytest +from copy import deepcopy + +from magma.metawflrun_handler import MetaWorkflowRunStep, MetaWorkflowRunHandler + +################################################################# +# Vars +################################################################# + +#TODO: make globals for attributes that you're checking in the tests + +MWF_RUN_HANDLER_NAME = "test_mwf_run_handler" +PROJECT = "test_project" +INSTITUTION = "test_institution" +MWF_RUN_HANDLER_UUID = "test_mwf_run_handler_uuid" + +TESTER_UUID = "test_item_uuid" + +# statuses +PENDING = "pending" +RUNNING = "running" +COMPLETED = "completed" +FAILED = "failed" +STOPPED = "stopped" + +# basic meta_workflow steps (dicts) used in meta_workflows array +#TODO: for validation of basic attributes, what if the value of an attribute is None? +# e.g. name or meta_workflow in metaworkflowRunstep? (because my helper function +# only checks that you can get the actual attribute, but getattr works still +# if the value is None) +MWFR_A = {"name": "A"} +MWFR_B = {"name": "B"} +MWFR_C = {"name": "C"} +MWFR_D = {"name": "D"} + +MWF_NAMES_LIST = ["B", "C", "A", "D"] + +DEP_ON_A = ["A"] +DEP_ON_B = ["B"] +DEP_ON_C = ["C"] +DEP_ON_D = ["D"] + +def mwf_run_with_added_attrs(metaworkflow_dict, dependencies=None, items_for_creation=None, status=None): + dict_copy = deepcopy(metaworkflow_dict) + if dependencies is not None: + dict_copy["dependencies"] = dependencies + if items_for_creation is not None: + dict_copy["items_for_creation"] = items_for_creation + if status is not None: + dict_copy["status"] = status + return dict_copy + +def mwfr_handler_dict_generator(meta_workflow_runs_array): + return { + "name": MWF_RUN_HANDLER_NAME, + "project": PROJECT, + "institution": INSTITUTION, + "uuid": MWF_RUN_HANDLER_UUID, + "meta_workflow_handler": TESTER_UUID, + "meta_workflow_runs": meta_workflow_runs_array + } + + +# handler without uuid -- fails validation of basic attributes +HANDLER_WITHOUT_UUID_DICT = { + "name": MWF_RUN_HANDLER_NAME, + "project": PROJECT, + "institution": INSTITUTION, + "meta_workflow_handler": TESTER_UUID, + "meta_workflow_runs": [] +} + +# handler without associated MetaWorkflow Handler uuid -- fails validation of basic attributes +HANDLER_WITHOUT_ASSOC_MWFH_DICT = { + "name": MWF_RUN_HANDLER_NAME, + "project": PROJECT, + "institution": INSTITUTION, + "uuid": MWF_RUN_HANDLER_UUID, + "meta_workflow_runs": [] +} + +# handler without meta_workflow_runs array -- fails validation of basic attributes +HANDLER_WITHOUT_META_WORKFLOW_RUNS_ARRAY = { + "name": MWF_RUN_HANDLER_NAME, + "project": PROJECT, + "institution": INSTITUTION, + "uuid": MWF_RUN_HANDLER_UUID, + "meta_workflow_handler": TESTER_UUID +} + +# B -----> D +# | ⋀ ⋀ +# | / | +# ⋁ / | +# A <----- C +MWFR_A_PENDING = mwf_run_with_added_attrs(MWFR_A, DEP_ON_B + DEP_ON_C, TESTER_UUID, PENDING) +MWFR_B_PENDING = mwf_run_with_added_attrs(MWFR_B, [], TESTER_UUID, PENDING) +MWFR_C_PENDING = mwf_run_with_added_attrs(MWFR_C, [], TESTER_UUID, PENDING) +MWFR_D_PENDING = mwf_run_with_added_attrs(MWFR_D, DEP_ON_A + DEP_ON_B + DEP_ON_C, TESTER_UUID, PENDING) + +MWFR_A_RUNNING = mwf_run_with_added_attrs(MWFR_A, DEP_ON_B + DEP_ON_C, TESTER_UUID, RUNNING) +MWFR_B_RUNNING = mwf_run_with_added_attrs(MWFR_B, [], TESTER_UUID, RUNNING) +MWFR_C_RUNNING = mwf_run_with_added_attrs(MWFR_C, [], TESTER_UUID, RUNNING) +MWFR_D_RUNNING = mwf_run_with_added_attrs(MWFR_D, DEP_ON_A + DEP_ON_B + DEP_ON_C, TESTER_UUID, RUNNING) + +MWFR_A_FAILED = mwf_run_with_added_attrs(MWFR_A, DEP_ON_B + DEP_ON_C, TESTER_UUID, FAILED) + +MWFR_A_STOPPED = mwf_run_with_added_attrs(MWFR_A, DEP_ON_B + DEP_ON_C, TESTER_UUID, STOPPED) + +MWFR_A_COMPLETED = mwf_run_with_added_attrs(MWFR_A, DEP_ON_B + DEP_ON_C, TESTER_UUID, COMPLETED) +MWFR_B_COMPLETED = mwf_run_with_added_attrs(MWFR_B, [], TESTER_UUID, COMPLETED) +MWFR_C_COMPLETED = mwf_run_with_added_attrs(MWFR_C, [], TESTER_UUID, COMPLETED) +MWFR_D_COMPLETED = mwf_run_with_added_attrs(MWFR_D, DEP_ON_A + DEP_ON_B + DEP_ON_C, TESTER_UUID, COMPLETED) + + +PENDING_ARRAY = [MWFR_B_PENDING, MWFR_C_PENDING, MWFR_A_PENDING, MWFR_D_PENDING] +HANDLER_PENDING = mwfr_handler_dict_generator(PENDING_ARRAY) + +RUNNING_MWFR_ARRAY = [MWFR_B_RUNNING, MWFR_C_RUNNING, MWFR_A_PENDING, MWFR_D_PENDING] +RUNNING_MWFR_ARRAY_2 = [MWFR_B_COMPLETED, MWFR_C_RUNNING, MWFR_A_PENDING, MWFR_D_PENDING] +# this wouldn't happen with THIS dag in particular, +# but could in other cases (made for the sake of the final_status test for the handler) +RUNNING_MWFR_ARRAY_3 = [MWFR_B_COMPLETED, MWFR_C_PENDING, MWFR_A_RUNNING, MWFR_D_PENDING] +HANDLER_STEPS_RUNNING = mwfr_handler_dict_generator(RUNNING_MWFR_ARRAY) +HANDLER_STEPS_RUNNING_2 = mwfr_handler_dict_generator(RUNNING_MWFR_ARRAY_2) +HANDLER_STEPS_RUNNING_3 = mwfr_handler_dict_generator(RUNNING_MWFR_ARRAY_3) + +HALFWAY_DONE_N_FAIL_ARRAY = [MWFR_B_COMPLETED, MWFR_C_COMPLETED, MWFR_A_FAILED, MWFR_D_PENDING] +HALFWAY_DONE_N_FAIL_ARRAY_2 = [MWFR_B_COMPLETED, MWFR_C_COMPLETED, MWFR_A_FAILED, MWFR_D_RUNNING] +HANDLER_FAILED = mwfr_handler_dict_generator(HALFWAY_DONE_N_FAIL_ARRAY) +HANDLER_FAILED_2 = mwfr_handler_dict_generator(HALFWAY_DONE_N_FAIL_ARRAY_2) + +HALFWAY_DONE_N_STOPPED_ARRAY = [MWFR_B_COMPLETED, MWFR_C_COMPLETED, MWFR_A_STOPPED, MWFR_D_PENDING] +HANDLER_STOPPED = mwfr_handler_dict_generator(HALFWAY_DONE_N_STOPPED_ARRAY) + +COMPLETED_ARRAY = [MWFR_B_COMPLETED, MWFR_C_COMPLETED, MWFR_A_COMPLETED, MWFR_D_COMPLETED] +HANDLER_COMPLETED = mwfr_handler_dict_generator(COMPLETED_ARRAY) + +################################################################# +# Tests +################################################################# +class TestMetaWorkflowRunStep: + @pytest.mark.parametrize( + "mwf_run_step_dict, dependencies, items_for_creation, num_attributes", + [ + (MWFR_A, [], [TESTER_UUID], 4), # successfully creates + (MWFR_A, [], None, 3) # TODO: for now, doesn't fail if no items for creation + ] + ) + def test_attribute_validation(self, mwf_run_step_dict, dependencies, items_for_creation, num_attributes): + """ + Tests creation of appropriate MetaWorkflowRunStep objects, + no errors raised. + """ + completed_dict = mwf_run_with_added_attrs(mwf_run_step_dict, dependencies, items_for_creation) + meta_workflow_run_step_object = MetaWorkflowRunStep(completed_dict) + # import pdb; pdb.set_trace() + assert num_attributes == len(meta_workflow_run_step_object.__dict__) + assert meta_workflow_run_step_object.status == PENDING + + required_attributes = ["name", "dependencies"]#, "items_for_creation"] + for attr in required_attributes: + assert hasattr(meta_workflow_run_step_object, attr) == True + + @pytest.mark.parametrize( + "mwf_run_step_dict, dependencies, items_for_creation", + [ + ({}, [], [TESTER_UUID]), # fails because no name + (MWFR_A, None, [TESTER_UUID]), # fails because no dependencies + ] + ) + def test_attribute_validation_attribute_errors(self, mwf_run_step_dict, dependencies, items_for_creation): + """ + Tests creation of appropriate MetaWorkflowRunStep objects, + no errors raised. + """ + with pytest.raises(AttributeError) as attr_err_info: + completed_dict = mwf_run_with_added_attrs(mwf_run_step_dict, dependencies, items_for_creation) + MetaWorkflowRunStep(completed_dict) + + +class TestMetaWorkflowRunHandler: + def test_attribute_validation(self): + """ + Tests creation of appropriate MetaWorkflowRun Handler objects, + no errors raised. + # TODO: for now, doesn't fail if no associated_item -- could make this check in ff + """ + meta_workflow_run_handler = MetaWorkflowRunHandler(HANDLER_PENDING) + assert getattr(meta_workflow_run_handler, "final_status") == PENDING + required_attributes = ["uuid", "meta_workflow_handler"] + for attr in required_attributes: + assert hasattr(meta_workflow_run_handler, attr) == True + + @pytest.mark.parametrize( + "input_dict", + [ + (HANDLER_WITHOUT_UUID_DICT), # fails because no uuid + (HANDLER_WITHOUT_ASSOC_MWFH_DICT), # fails because no associated metaworkflow handler + (HANDLER_WITHOUT_META_WORKFLOW_RUNS_ARRAY) # fails because no meta_workflow_runs array + ] + ) + def test_attribute_validation_attribute_errors(self, input_dict): + """ + Tests creation of appropriate MetaWorkflowRunHandler objects, + no errors raised. + """ + with pytest.raises(AttributeError) as attr_err_info: + MetaWorkflowRunHandler(input_dict) + assert "Object validation error" in str(attr_err_info.value) + + def test_create_meta_workflow_run_step_objects(self): + meta_workflow_run_handler = MetaWorkflowRunHandler(HANDLER_PENDING) + meta_workflow_run_step_dict = getattr(meta_workflow_run_handler, "meta_workflow_run_step_dict") + assert len(meta_workflow_run_step_dict) == 4 + for mwf_name, mwf_run_step in meta_workflow_run_step_dict.items(): + assert mwf_name in MWF_NAMES_LIST + assert isinstance(mwf_run_step, MetaWorkflowRunStep) + + @pytest.mark.parametrize( + "input_dict, updated_final_status", + [ + (HANDLER_PENDING, PENDING), + (HANDLER_STEPS_RUNNING, RUNNING), + (HANDLER_STEPS_RUNNING_2, RUNNING), + (HANDLER_STEPS_RUNNING_3, RUNNING), + (HANDLER_FAILED, FAILED), + (HANDLER_FAILED_2, FAILED), + (HANDLER_STOPPED, STOPPED), + (HANDLER_COMPLETED, COMPLETED) + ] + ) + def test_update_final_status(self, input_dict, updated_final_status): + meta_workflow_run_handler = MetaWorkflowRunHandler(input_dict) + assert meta_workflow_run_handler.final_status == PENDING + meta_workflow_run_handler.update_final_status() + assert meta_workflow_run_handler.final_status == updated_final_status + + @pytest.mark.parametrize( + "input_dict, mwfr_step_name_to_reset", + [ + (HANDLER_COMPLETED, "A"), + (HANDLER_COMPLETED, "non_existent_mwf_run_step") + ] + ) + def test_reset_meta_workflow_run_step(self, input_dict, mwfr_step_name_to_reset): + try: + handler_obj = MetaWorkflowRunHandler(input_dict) + prior_step_status = handler_obj.meta_workflow_run_step_dict[mwfr_step_name_to_reset].status + handler_obj.reset_meta_workflow_run_step(mwfr_step_name_to_reset) + updated_step_status = handler_obj.meta_workflow_run_step_dict[mwfr_step_name_to_reset].status + assert prior_step_status != updated_step_status + assert updated_step_status == PENDING + updated_step_run = handler_obj.meta_workflow_run_step_dict[mwfr_step_name_to_reset].meta_workflow_run + assert updated_step_run is None + except KeyError as key_err_info: + assert mwfr_step_name_to_reset in str(key_err_info) + + @pytest.mark.parametrize( + "input_dict, mwfr_step_name_to_update, attribute, value", + [ + (HANDLER_COMPLETED, "A", "status", FAILED), + (HANDLER_COMPLETED, "non_existent_mwf_run_step", None, None) + ] + ) + def test_update_meta_workflow_run_step(self, input_dict, mwfr_step_name_to_update, attribute, value): + try: + handler_obj = MetaWorkflowRunHandler(input_dict) + attr_value_before_change = getattr(handler_obj.meta_workflow_run_step_dict[mwfr_step_name_to_update], attribute) + handler_obj.update_meta_workflow_run_step(mwfr_step_name_to_update, attribute, value) + attr_value_after_change = getattr(handler_obj.meta_workflow_run_step_dict[mwfr_step_name_to_update], attribute) + assert attr_value_before_change != attr_value_after_change + assert attr_value_after_change == value + except KeyError as key_err_info: + assert mwfr_step_name_to_update in str(key_err_info) + + @pytest.mark.parametrize( + "input_dict, steps_to_run", + [ + (HANDLER_PENDING, MWF_NAMES_LIST), + (HANDLER_STEPS_RUNNING, ["A", "D"]), + (HANDLER_STEPS_RUNNING_2, ["A", "D"]), + (HANDLER_FAILED, ["D"]), + (HANDLER_FAILED_2, []), + (HANDLER_COMPLETED, []) + ] + ) + def test_pending_steps(self, input_dict, steps_to_run): + handler_obj = MetaWorkflowRunHandler(input_dict) + result = handler_obj.pending_steps() + assert result == steps_to_run + + @pytest.mark.parametrize( + "input_dict, steps_currently_running", + [ + (HANDLER_PENDING, []), + (HANDLER_STEPS_RUNNING, ["B", "C"]), + (HANDLER_STEPS_RUNNING_2, ["C"]), + (HANDLER_FAILED, []), + (HANDLER_FAILED_2, ["D"]), + (HANDLER_COMPLETED, []) + ] + ) + def test_running_steps(self, input_dict, steps_currently_running): + handler_obj = MetaWorkflowRunHandler(input_dict) + result = handler_obj.running_steps() + assert result == steps_currently_running \ No newline at end of file From 6321fbfa9a1579638c00602098aae81a7661d950 Mon Sep 17 00:00:00 2001 From: vstevensf Date: Thu, 20 Apr 2023 22:36:23 -0400 Subject: [PATCH 22/38] Check in --- magma/metawfl_handler.py | 42 +----- magma/metawflrun_handler.py | 3 + magma/update_handler.py | 82 ++++++++++++ magma_ff/checkstatus.py | 130 +++++++++++++++++- magma_ff/create_metawflrun_handler.py | 186 ++++++++++++++++++++++++++ magma_ff/metawfl_handler.py | 21 +-- magma_ff/metawflrun_handler.py | 27 ++++ magma_ff/wfrutils.py | 75 +++++++++++ 8 files changed, 510 insertions(+), 56 deletions(-) create mode 100644 magma/update_handler.py create mode 100644 magma_ff/create_metawflrun_handler.py create mode 100644 magma_ff/metawflrun_handler.py diff --git a/magma/metawfl_handler.py b/magma/metawfl_handler.py index 7565d3b..4572b0c 100644 --- a/magma/metawfl_handler.py +++ b/magma/metawfl_handler.py @@ -1,5 +1,8 @@ #!/usr/bin/env python3 +# TODO: use getattr with constants rather than self references +# TODO: parsing necessary to get rid of extra attributes? + ################################################ # Libraries ################################################ @@ -160,42 +163,3 @@ def _create_ordered_meta_workflows_list(self): return sorter.sorted_graph_list() except CycleError: raise MetaWorkflowStepCycleError() - - # def create_meta_workflow_run_handler_input_dict(self, associated_item_uuid): - # """ - # Completes attributes and other metadata for Metaworkflow Run Handler - - # TODO: should assoc item be a param? because it is a required mwfr handler attr for CGAP - - # :param associated_item_uuid: - # :type associated_item_uuid: str - # :return: input dictionary (JSON?? TODO:) to create MetaWorkflowRunHandler object - # :rtype: dict - # """ - - # #TODO: when should i do the check on the duplication flag? - - # #TODO: use getattr with constants rather than self references - - # input_dict = { - # #TODO: what's commented out will be taken care of in create_metawfr_handler.py - # # "project": self.project, - # # "institution": self.institution, - # #TODO: can you use a mwf handler from one proj/inst to - # # create a handler with a different proj/inst? - # "meta_workflow_handler": self.uuid, # assuming it's already converted from uuid to str? - # "associated_item": associated_item_uuid, - # "final_status": "pending", #TODO: make this a constant - # "meta_workflow_runs": [] - # } - - # # this goes stepwise, in order, and accessing mwf metadata through corresponding dict lookup by name - # for meta_workflow_name in self.ordered_meta_workflows: - # meta_workflow_run_dict = {} - # meta_workflow_run_dict.setdefault("name", meta_workflow_name) - # meta_workflow_run_dict.setdefault("items_for_creation", self.meta_workflows[meta_workflow_name].ITEMS_CREATION_UUID) - # meta_workflow_run_dict.setdefault("dependencies", self.meta_workflows[meta_workflow_name].dependencies) - # meta_workflow_run_dict.setdefault("status", "pending") #TODO: constant here - # #TODO: meta_workflow_run uuid taken care of in ff creation of run handler - - # input_dict["meta_workflow_runs"].append(meta_workflow_run_dict) diff --git a/magma/metawflrun_handler.py b/magma/metawflrun_handler.py index 7954c6e..a63c8ef 100644 --- a/magma/metawflrun_handler.py +++ b/magma/metawflrun_handler.py @@ -124,6 +124,9 @@ def update_final_status(self): if all_steps_completed: setattr(self, self.FINAL_STATUS_ATTR, "completed") + #TODO: update pytests here + return self.FINAL_STATUS_ATTR + # the following allows for resetting a MetaWorkflow Run Step # this can happen only when the duplication flag is set to True def reset_meta_workflow_run_step(self, meta_workflow_run_name): diff --git a/magma/update_handler.py b/magma/update_handler.py new file mode 100644 index 0000000..38161f3 --- /dev/null +++ b/magma/update_handler.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 + +################################################ +# Libraries +################################################ +import sys, os + +################################################ +# UpdateHandler +################################################ +class UpdateHandler(object): + """ + Class to handle MetaWorkflowRunHandler and MetaWorkflowRun object updates. + """ + + def __init__(self, run_handler_obj): + """ + Constructor method. + Initialize object and attributes. + + :param run_handler_obj: MetaWorkflowRunHandler magma object, representing a MetaWorkflowRunHandler JSON from CGAP portal + :type run_handler_obj: object + """ + # Basic attributes + self.run_handler_obj = run_handler_obj + + def reset_steps(self, step_names): + """ + Reset MetaWorkflowRun object in step_names list. + + :param step_names: List of names for MetaWorkflowRun steps to be reset + :type step_names: list[str] + :return: Updated meta_workflow_runs and handler final_status information + :rtype: dict + """ + for name in step_names: + self.wflrun_obj.reset_step(name) + + # used later to PATCH onto the portal + return {'final_status': self.run_handler_obj.update_final_status(), + 'workflow_runs': self.wflrun_obj.runs_to_json()} + + # def import_steps(self, wflrun_obj, steps_name, import_input=True): + # """Update current MetaWorkflowRun[obj] information. + # Import and use information from specified wflrun_obj. + # Update WorkflowRun[obj] up to steps specified by steps_name + + # :param wflrun_obj: MetaWorkflowRun[obj] to import information from + # :type wflrun_obj: object + # :param steps_name: List of names for steps to import + # :type steps_name: list(str) + # :return: MetaWorkflowRun[json] + # :rtype: dict + # """ + # ## Import input + # if import_input: + # self.wflrun_obj.input = wflrun_obj.input + # ## Import WorkflowRun objects + # for name in steps_name: + # queue = [] # queue of steps to import + # # name step and its dependencies + # # Get workflow-runs corresponding to name step + # for shard_name, run_obj in self.wflrun_obj.runs.items(): + # if name == shard_name.split(':')[0]: + # queue.append(run_obj) + # # Iterate queue, get dependencies and import workflow-runs + # while queue: + # run_obj = queue.pop(0) + # shard_name = run_obj.shard_name + # dependencies = run_obj.dependencies + # try: + # self.wflrun_obj.runs[shard_name] = wflrun_obj.runs[shard_name] + # except KeyError as e: + # # raise ValueError('JSON content error, missing information for workflow-run "{0}"\n' + # # .format(e.args[0])) + # continue + # for dependency in dependencies: + # queue.append(self.wflrun_obj.runs[dependency]) + # # Update final_status + # self.wflrun_obj.update_status() + + # return self.wflrun_obj.to_json() diff --git a/magma_ff/checkstatus.py b/magma_ff/checkstatus.py index 0ec6b70..e8fc160 100644 --- a/magma_ff/checkstatus.py +++ b/magma_ff/checkstatus.py @@ -13,7 +13,7 @@ # magma from magma.checkstatus import AbstractCheckStatus -from magma_ff.wfrutils import FFWfrUtils +from magma_ff.wfrutils import FFWfrUtils, FFMetaWfrUtils ################################################ # CheckStatusFF @@ -34,7 +34,7 @@ def __init__(self, wflrun_obj, env=None): # Portal-related attributes self._env = env - # Cache for FFWfrUtils object + # For FFMetaWfrUtils object self._ff = None #end def @@ -86,3 +86,129 @@ def ff(self): return self._ff #end class + + +################################################ +# CheckStatusRunHandlerFF +################################################ +#TODO: not using an abstract class +class CheckStatusRunHandlerFF(object): + """ + Customized CheckStatus class for MetaWorkflow Run Handler for the CGAP portal. + """ + + def __init__(self, mwfr_handler_obj, env=None): + """ + Initialize object and attributes. + + :param mwfr_handler_obj: MetaWorkflowRunHandler[obj] representing a MetaWorkflowRunHandler[json] + :type mwfr_handler_obj: object + :param env: Name of the environment to use (e.g. fourfront-cgap) + :type env: str + """ + # Basic attributes + self.mwfr_handler_obj = mwfr_handler_obj + + # Used for searching CGAP portal-related attributes + self._env = env + # For FFMetaWfrUtils object, to search CGAP portal-related attributes + self._ff = None + + # @property + # def status_map(self): + # """Mapping from get_status output to magma status. + # """ + # return { + # 'started': 'running', + # 'complete': 'completed', + # 'error': 'failed' + # } + + # return { +# 'pending': 'pending', +# 'running': 'running', +# 'completed': 'completed', +# 'failed' : 'failed' +# } + +# "pending", +# "running", +# "completed", +# "failed", +# // "inactive", +# "stopped", +# // "quality metric failed" + +# Handler"pending", +# "running", +# "completed", +# "failed", +# "stopped" + + def check_running_mwfr_steps(self): + """ + Check the currently running MetaWorkflowRun steps and update + statuses accordingly. + """ + # Iterate through list of running MetaWorkflow Run steps (array of objects) + for mwfr_step in self.mwfr_handler_obj.running_steps(): + + # Check current status from MWF run name + status_ = self.get_status(run_obj.jobid) + status = self.status_map[status_] + + # Update run status no matter what + self.wflrun_obj.update_attribute(run_obj.shard_name, 'status', status) + + # Get run uuid + run_uuid = self.get_uuid(run_obj.jobid) + + # Update run uuid regardless of the status + if run_uuid: # some failed runs don't have run uuid + self.wflrun_obj.update_attribute(run_obj.shard_name, 'workflow_run', run_uuid) + + if status == 'completed': + + # Get formatted output + output = self.get_output(run_obj.jobid) + + # Update output + if output: + self.wflrun_obj.update_attribute(run_obj.shard_name, 'output', output) + + elif status == 'running': + yield None # yield None so that it doesn't terminate iteration + continue + else: # failed + # handle error status - anything to do before yielding the updated json + self.handle_error(run_obj) + #end if + + # Return the json to patch workflow_runs for both completed and failed + # and keep going so that it can continue updating status for other runs + yield {'final_status': self.wflrun_obj.update_status(), + 'workflow_runs': self.wflrun_obj.runs_to_json()} + + for patch_dict in super().check_running(): + if patch_dict: + failed_jobs = self.wflrun_obj.update_failed_jobs() + if len(failed_jobs) > 0: + patch_dict['failed_jobs'] = failed_jobs + cost = self.wflrun_obj.update_cost() + if cost is not None and cost > 0: + patch_dict['cost'] = cost + yield patch_dict + + def get_status(self, jobid): + """ + Returns the status of the given MetaWorkflow Run, from CGAP portal + """ + return self.ff.wfr_run_status(jobid) + + @property + def ff(self): + """Internal property used for get_status from CGAP portal for given MetaWorkflow Run + """ + if not self._ff: + self._ff = FFMetaWfrUtils(self._env) + return self._ff \ No newline at end of file diff --git a/magma_ff/create_metawflrun_handler.py b/magma_ff/create_metawflrun_handler.py new file mode 100644 index 0000000..0ac817e --- /dev/null +++ b/magma_ff/create_metawflrun_handler.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python3 + +################################################ +# Libraries +################################################ +import datetime +import json +import uuid + +from dcicutils import ff_utils + +# magma +from magma_ff.metawfl_handler import MetaWorkflowHandler +from magma_ff.metawflrun_handler import MetaWorkflowRunHandler +from magma_ff.utils import make_embed_request + +################################################ +# Constants +################################################ +# UUID = "uuid" + +################################################ +# Custom Exception class(es) +################################################ +class MetaWorkflowRunHandlerCreationError(Exception): + pass + +################################################ +# MetaWorkflow Run Handler from Item +################################################ +class MetaWorkflowRunHandlerFromItem: + """ + Base class to hold common methods required to create and POST a + MetaWorkflowRun Handler, and PATCH the Item used to create it (the "associated item"). + """ + # Schema constants #TODO: make these constants throughout all files? and where to put that file? + # or a file with different constant classes? + PROJECT = "project" + INSTITUTION = "institution" + UUID = "uuid" + TITLE = "title" + ASSOCIATED_META_WORKFLOW_HANDLER = "meta_workflow_handler" + ASSOCIATED_ITEM = "associated_item" + FINAL_STATUS = "final_status" + META_WORKFLOW_RUNS = "meta_workflow_runs" + + # specific to a mwf run step #TODO: called later on in this class, right? + META_WORKFLOW_RUN = "meta_workflow_run" + NAME = "name" + MWFR_STATUS = "status" + DEPENDENCIES = "dependencies" + ITEMS_FOR_CREATION = "items_for_creation" + ERROR = "error" + + PENDING = "pending" + + # Embedding API Fields -- used for embedding request on associated item of the run handler + FIELDS_TO_GET = [ + PROJECT, + INSTITUTION, + UUID, + ] + + def __init__(self, associated_item_identifier, meta_workflow_handler_identifier, auth_key): + """ + Initialize the MWF Run Handler object, set all attributes. + + :param associated_item_identifier: Item identifier (UUID, @id, or accession) + on which this MetaWorkflow Run Handler is being created + :type associated_item_identifier: str + :param meta_workflow_handler_identifier: Associated MetaWorkflow Handler identifier + (UUID, @id, or accession) + :type meta_workflow_handler_identifier: str + :param auth_key: Portal authorization key + :type auth_key: dict + :raises MetaWorkflowRunHandlerCreationError: If required item (associated_item) cannot + be found on environment of authorization key + """ + self.auth_key = auth_key + # this calls for the specified metadata on the associated_item of this MWF Run Handler to be created + #TODO: use normal get request (ffutils get metadata) + # embedding pulls outta postgres, which is slower than elasticsearch + # use embedding for the property tracing and duplication flag checks + #TODO: make this change for the mwfr data structure too? + self.associated_item = make_embed_request( + associated_item_identifier, + self.FIELDS_TO_GET, + self.auth_key, + single_item=True, + ) + if not self.associated_item: + raise MetaWorkflowRunHandlerCreationError( + "No Item found for given identifier: %s" % associated_item_identifier + ) + + # check that the specified identifier for the associated MWF Handler does indeed exist on portal + #TODO: a check to make sure it is indeed of mwf handler type? does this function exist on ff_utils? + self.meta_workflow_handler = self.get_item_properties(meta_workflow_handler_identifier) + if not self.meta_workflow_handler: + raise MetaWorkflowRunHandlerCreationError( + "No MetaWorkflow Handler found for given identifier: %s" + % meta_workflow_handler_identifier + ) + + # now fill in the rest of the attributes of this MWF Run Handler + self.project = self.associated_item.get(self.PROJECT) # project is same as associated item + self.institution = self.associated_item.get(self.INSTITUTION) # institution is same as associated item + self.associated_item_id = self.associated_item.get(self.UUID) # get uuid of associated item + self.meta_workflow_handler_id = self.meta_workflow_handler.get(self.UUID) # get uuid of the template mwf handler + self.meta_workflow_run_handler_uuid = str(uuid.uuid4()) #TODO: put exception to catch duplicates? i think the portal handles this + + # self.existing_meta_workflow_runs = self.associated_item.get( + # self.META_WORKFLOW_RUNS, [] + # ) #TODO: this is to check for duplicating metaworkflows + # come back to this + + # and now create the actual MetaWorkflow Run Handler + # this returns the dict itself, not just an ID + self.meta_workflow_run_handler = self.create_meta_workflow_run_handler() + + def get_item_properties(self, item_identifier): + """ + Retrieve item from given environment without raising + Exception if not found, rather, returns None. + + :param item_identifier: Item identifier (UUID, @id, or accession) on the portal + :type item_identifier: str + :return: Raw view of item if found + :rtype: dict or None + """ + # TODO: same as create_metawfr.py --> make a generalized function? + try: + result = ff_utils.get_metadata( + item_identifier, key=self.auth_key, add_on="frame=raw" + ) + except Exception: + result = None + return result + + def create_meta_workflow_run_handler(self): + """ + Create MetaWorkflowRun Handler, which will later be POSTed to the CGAP portal. + + :return: MetaWorkflowRun Handler dictionary (for the portal JSON object) + :rtype: dict + """ + + #TODO: check Doug's prior comments on title + meta_workflow_handler_title = self.meta_workflow_handler.get(self.TITLE) + creation_date = datetime.date.today().isoformat() + title = "MetaWorkflowRun Handler %s created %s" % ( + meta_workflow_handler_title, + creation_date + ) + + meta_workflow_run_handler = { + self.PROJECT: self.project, + self.INSTITUTION: self.institution, + self.UUID: self.meta_workflow_run_handler_uuid, + self.TITLE: title, + self.ASSOCIATED_META_WORKFLOW_HANDLER: self.meta_workflow_handler_id, + self.ASSOCIATED_ITEM: self.associated_item_id, + self.FINAL_STATUS: self.PENDING, + self.META_WORKFLOW_RUNS: [] + } + + # now call helper function to populate and create the MetaWorkflow Runs + # TODO: handle duplication flag?? + # TODO: should duplication only happen when the status of the original + # mwfr is not successful? + self.create_meta_workflow_runs_array(meta_workflow_run_handler) + + # return the completed MWFR Handler dictionary, following the CGAP schema + return meta_workflow_run_handler + + def create_meta_workflow_runs_array(self, meta_workflow_run_handler_dict): + #TODO: item for creation prop trace + #TODO: handle duplication flag + # TODO: case where mwf run already exists? and dup flag = F? reset the run? + pass + + # TODO: for POST and PATCH, will there be changes to schemas other than handlers + # in order to accomodate this? like maybe within the mixins schemas file + # which can then be easily integrated within other schemas in the future? + # because the mwfr handler will now be living on whatever item, rather than + # a sample or a sample processing \ No newline at end of file diff --git a/magma_ff/metawfl_handler.py b/magma_ff/metawfl_handler.py index 03d333d..aea4584 100644 --- a/magma_ff/metawfl_handler.py +++ b/magma_ff/metawfl_handler.py @@ -3,33 +3,24 @@ ################################################ # Libraries ################################################ -import copy # magma from magma.metawfl_handler import MetaWorkflowHandler as MetaWorkflowHandlerFromMagma -from magma_ff.parser import ParserFF ################################################ # MetaWorkflow Handler, Fourfront ################################################ class MetaWorkflowHandler(MetaWorkflowHandlerFromMagma): - def __init__(self, input_json): + def __init__(self, input_dict): """ Constructor method, initialize object and attributes. - :param input_json: MetaWorkflow Handler object defined by json file, from portal - :type input_json: dict + :param input_dict: MetaWorkflow Handler object defined by json file, from portal + :type input_dict: dict """ - input_json_ = copy.deepcopy(input_json) - - # To handle compatibility between portal and magma json formats - # TODO: necessary? - ParserFF(input_json_).arguments_to_json() - - super().__init__(input_json_) + super().__init__(input_dict) #TODO: name filling with property traces - #end def - -#end class \ No newline at end of file + #change design so mwf handler from magma only has uuids + #prop trace handled here (change may be within mwf steps) \ No newline at end of file diff --git a/magma_ff/metawflrun_handler.py b/magma_ff/metawflrun_handler.py new file mode 100644 index 0000000..be7426b --- /dev/null +++ b/magma_ff/metawflrun_handler.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 + +################################################ +# Libraries +################################################ + +# magma +from magma.metawfl_handler import MetaWorkflowRunHandler as MetaWorkflowRunHandlerFromMagma + +# from magma import metawfl #TODO: do this in FF +# from magma_ff.utils import make_embed_request #check_status, chunk_ids + +################################################ +# MetaWorkflow Handler, Fourfront +################################################ +class MetaWorkflowHandler(MetaWorkflowRunHandlerFromMagma): + + def __init__(self, input_dict): + """ + Constructor method, initialize object and attributes. + + :param input_dict: MetaWorkflow Handler object defined by json file, from portal + :type input_dict: dict + """ + super().__init__(input_dict) + + #TODO: update cost ---- embed jobid ... \ No newline at end of file diff --git a/magma_ff/wfrutils.py b/magma_ff/wfrutils.py index c0f932b..6b9163a 100644 --- a/magma_ff/wfrutils.py +++ b/magma_ff/wfrutils.py @@ -127,3 +127,78 @@ class FdnConnectionException(Exception): pass #end class + + +################################################ +# FFMetaWfrUtils +################################################ +class FFMetaWfrUtils(object): + def __init__(self, env): + """ + :param env: e.g. 'fourfront-cgap', 'fourfront-cgap-wolf' + :type env: str + """ + self.env = env + + # Cache for metadata + self._metadata = dict() + # Cache for access key + self._ff_key = None + + def wfr_metadata(self, job_id): + """Get portal run metadata from job_id. + Return None if a run associated with job id cannot be found. + """ + # Use cache + if job_id in self._metadata: + return self._metadata[job_id] + # Search by job id + query='/search/?type=WorkflowRun&awsem_job_id=%s' % job_id + try: + search_res = ff_utils.search_metadata(query, key=self.ff_key) + except Exception as e: + raise FdnConnectionException(e) + if search_res: + self._metadata[job_id] = search_res[0] + return self._metadata[job_id] + else: + # find it from dynamoDB + job_info = Job.info(job_id) + if not job_info: + return None + wfr_uuid = job_info.get('WorkflowRun uuid', '') + if not wfr_uuid: + return None + self._metadata[job_id] = ff_utils.get_metadata(wfr_uuid, key=self.ff_key) + return self._metadata[job_id] + + # def wfr_run_uuid(self, job_id): + # """This is the function to be used by Magma. + # """ + # wfr_meta = self.wfr_metadata(job_id) + # if not wfr_meta: + # return None + # return wfr_meta['uuid'] + + # def wfr_run_status(self, job_id): + # """This is the function to be used by Magma. + # Return the status of the run associated with specified job_id. + # If run associated with job_id is not found, we consider it failed. + # """ + # wfr_meta = self.wfr_metadata(job_id) + # if not wfr_meta: + # return 'error' + # else: + # return wfr_meta['run_status'] + + + + @property + def ff_key(self): + """Get access key for the portal. + """ + # Use cache + if not self._ff_key: + # Use tibanna key for now + self._ff_key = s3Utils(env=self.env).get_access_keys('access_key_tibanna') + return self._ff_key \ No newline at end of file From f4a47ba6f8c46d899d7f98052ad1472d01b1d07e Mon Sep 17 00:00:00 2001 From: vstevensf Date: Fri, 21 Apr 2023 08:08:24 -0400 Subject: [PATCH 23/38] Main changes to create mwfr handler function --- magma/metawfl_handler.py | 2 +- magma/metawflrun_handler.py | 2 +- magma/update_handler.py | 28 ++++---- magma_ff/checkstatus.py | 24 +++++-- magma_ff/create_metawflrun_handler.py | 99 ++++++++++++++++++--------- magma_ff/wfrutils.py | 59 ++++++++-------- 6 files changed, 131 insertions(+), 83 deletions(-) diff --git a/magma/metawfl_handler.py b/magma/metawfl_handler.py index 4572b0c..ba5d765 100644 --- a/magma/metawfl_handler.py +++ b/magma/metawfl_handler.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # TODO: use getattr with constants rather than self references -# TODO: parsing necessary to get rid of extra attributes? +# TODO: parsing necessary to get rid of extra attributes? i dont think so ################################################ # Libraries diff --git a/magma/metawflrun_handler.py b/magma/metawflrun_handler.py index a63c8ef..0aa9149 100644 --- a/magma/metawflrun_handler.py +++ b/magma/metawflrun_handler.py @@ -41,7 +41,6 @@ def __init__(self, input_dict): ################################################ # MetaWorkflowRunHandler ################################################ -#TODO: what about associated item on the MWF handler itself, not just items for creation? class MetaWorkflowRunHandler(ValidatedDictionary): """ Class representing a MetaWorkflowRun Handler object, @@ -82,6 +81,7 @@ def __init__(self, input_dict): def _create_meta_workflow_run_step_objects(self): + # creates dict: {name_1: step_obj_1, name_2: step_obj_2,...} meta_workflow_run_step_dict = {} for meta_workflow_run in self.meta_workflow_runs: meta_workflow_run_step_object = MetaWorkflowRunStep(meta_workflow_run) diff --git a/magma/update_handler.py b/magma/update_handler.py index 38161f3..d994970 100644 --- a/magma/update_handler.py +++ b/magma/update_handler.py @@ -1,16 +1,11 @@ #!/usr/bin/env python3 -################################################ -# Libraries -################################################ -import sys, os - ################################################ # UpdateHandler ################################################ class UpdateHandler(object): """ - Class to handle MetaWorkflowRunHandler and MetaWorkflowRun object updates. + Class to handle MetaWorkflowRunHandler and MetaWorkflowRun Step object updates. """ def __init__(self, run_handler_obj): @@ -24,21 +19,24 @@ def __init__(self, run_handler_obj): # Basic attributes self.run_handler_obj = run_handler_obj - def reset_steps(self, step_names): + def reset_specified_steps(self, metaworkflow_run_step_names): """ - Reset MetaWorkflowRun object in step_names list. + Reset MetaWorkflowRun Step object(s) in step_names list, and update final_status of MetaWorkflowRunHandler. - :param step_names: List of names for MetaWorkflowRun steps to be reset - :type step_names: list[str] + :param metaworkflow_run_step_names: List of MetaWorkflowRun steps to be reset, by name + :type metaworkflow_run_step_names: list[str] :return: Updated meta_workflow_runs and handler final_status information :rtype: dict """ - for name in step_names: - self.wflrun_obj.reset_step(name) + # Resets each MetaWorkflowRun step in the list (status and meta_workflow_run LinkTo) + for metaworkflow_run_name in metaworkflow_run_step_names: + self.run_handler_obj.reset_meta_workflow_run_step(metaworkflow_run_name) - # used later to PATCH onto the portal - return {'final_status': self.run_handler_obj.update_final_status(), - 'workflow_runs': self.wflrun_obj.runs_to_json()} + # Return dictionary of attributes to be PATCHed for Run Handler on the CGAP portal + # TODO: put this in ff? since it is CGAP portal specific? + + # return {'final_status': self.run_handler_obj.update_final_status(), + # 'workflow_runs': self.wflrun_obj.runs_to_json()} TODO: is this right.... # def import_steps(self, wflrun_obj, steps_name, import_input=True): # """Update current MetaWorkflowRun[obj] information. diff --git a/magma_ff/checkstatus.py b/magma_ff/checkstatus.py index e8fc160..c9c807d 100644 --- a/magma_ff/checkstatus.py +++ b/magma_ff/checkstatus.py @@ -9,7 +9,6 @@ ################################################ # Libraries ################################################ -import sys, os # magma from magma.checkstatus import AbstractCheckStatus @@ -34,7 +33,7 @@ def __init__(self, wflrun_obj, env=None): # Portal-related attributes self._env = env - # For FFMetaWfrUtils object + # For FFWfrUtils object self._ff = None #end def @@ -65,17 +64,17 @@ def check_running(self): def get_uuid(self, jobid): """ """ - return self.ff.wfr_run_uuid(jobid) + return self._ff.wfr_run_uuid(jobid) def get_status(self, jobid): """ """ - return self.ff.wfr_run_status(jobid) + return self._ff.wfr_run_status(jobid) def get_output(self, jobid): """ """ - return self.ff.get_minimal_processed_output(jobid) + return self._ff.get_minimal_processed_output(jobid) @property def ff(self): @@ -114,6 +113,18 @@ def __init__(self, mwfr_handler_obj, env=None): # For FFMetaWfrUtils object, to search CGAP portal-related attributes self._ff = None + @property + def status_map(self): + """Mapping from get_status output to magma status. + Set to property so that inherited classes can overwrite it. + """ + return { + 'pending': 'pending', + 'running': 'running', + 'completed': 'completed', + 'failed' : 'failed' + } + # @property # def status_map(self): # """Mapping from get_status output to magma status. @@ -207,7 +218,8 @@ def get_status(self, jobid): @property def ff(self): - """Internal property used for get_status from CGAP portal for given MetaWorkflow Run + """ + Internal property used for get_status from CGAP portal for given MetaWorkflow Run """ if not self._ff: self._ff = FFMetaWfrUtils(self._env) diff --git a/magma_ff/create_metawflrun_handler.py b/magma_ff/create_metawflrun_handler.py index 0ac817e..0603817 100644 --- a/magma_ff/create_metawflrun_handler.py +++ b/magma_ff/create_metawflrun_handler.py @@ -54,13 +54,6 @@ class MetaWorkflowRunHandlerFromItem: PENDING = "pending" - # Embedding API Fields -- used for embedding request on associated item of the run handler - FIELDS_TO_GET = [ - PROJECT, - INSTITUTION, - UUID, - ] - def __init__(self, associated_item_identifier, meta_workflow_handler_identifier, auth_key): """ Initialize the MWF Run Handler object, set all attributes. @@ -82,42 +75,64 @@ def __init__(self, associated_item_identifier, meta_workflow_handler_identifier, # embedding pulls outta postgres, which is slower than elasticsearch # use embedding for the property tracing and duplication flag checks #TODO: make this change for the mwfr data structure too? - self.associated_item = make_embed_request( - associated_item_identifier, - self.FIELDS_TO_GET, - self.auth_key, - single_item=True, - ) - if not self.associated_item: + + self.associated_item_json = self.get_item_properties(associated_item_identifier) + if not self.associated_item_json: # TODO: restructure so this creation error is in method get_item_properties raise MetaWorkflowRunHandlerCreationError( - "No Item found for given identifier: %s" % associated_item_identifier + "No Item found for given 'associated item' identifier: %s" % associated_item_identifier ) # check that the specified identifier for the associated MWF Handler does indeed exist on portal #TODO: a check to make sure it is indeed of mwf handler type? does this function exist on ff_utils? - self.meta_workflow_handler = self.get_item_properties(meta_workflow_handler_identifier) - if not self.meta_workflow_handler: + self.meta_workflow_handler_json = self.get_item_properties(meta_workflow_handler_identifier) + if not self.meta_workflow_handler_json: raise MetaWorkflowRunHandlerCreationError( "No MetaWorkflow Handler found for given identifier: %s" % meta_workflow_handler_identifier ) # now fill in the rest of the attributes of this MWF Run Handler - self.project = self.associated_item.get(self.PROJECT) # project is same as associated item - self.institution = self.associated_item.get(self.INSTITUTION) # institution is same as associated item - self.associated_item_id = self.associated_item.get(self.UUID) # get uuid of associated item - self.meta_workflow_handler_id = self.meta_workflow_handler.get(self.UUID) # get uuid of the template mwf handler + self.project = self.associated_item_json.get(self.PROJECT) # project is same as associated item + self.institution = self.associated_item_json.get(self.INSTITUTION) # institution is same as associated item + self.associated_item_id = self.associated_item_json.get(self.UUID) # get uuid of associated item + self.meta_workflow_handler_id = self.meta_workflow_handler_json.get(self.UUID) # get uuid of the template mwf handler self.meta_workflow_run_handler_uuid = str(uuid.uuid4()) #TODO: put exception to catch duplicates? i think the portal handles this - # self.existing_meta_workflow_runs = self.associated_item.get( - # self.META_WORKFLOW_RUNS, [] - # ) #TODO: this is to check for duplicating metaworkflows - # come back to this + #TODO: this is to check for duplicating metaworkflows + existing_meta_workflow_runs_linktos = self.associated_item_json.get(self.META_WORKFLOW_RUNS, []) + # above returns [] if no existing mwfr, else returns list of linktos + + # this is a dict of linkTos and corresponding aliases {linkTo: [aliases]} + # self.existing_meta_workflow_runs = self.extract_mwfr_names(existing_meta_workflow_runs_linktos) + self.existing_meta_workflows_on_assoc_item = self.extract_mwf_linktos(existing_meta_workflow_runs_linktos) # and now create the actual MetaWorkflow Run Handler # this returns the dict itself, not just an ID + # this attribute is later used to run the thang self.meta_workflow_run_handler = self.create_meta_workflow_run_handler() + # def extract_mwfr_names(self, existing_linktos_list): + # linkto_alias_dict = {} + # for linkto in existing_linktos_list: + # #TODO: does embed request work with @ids and uuids + # #TODO: match user submitted names to existing aliases...or.... + # # because there is no existing "name" attr on mwfr schema at the moment + # # also is it common for an item to have several aliases + # aliases = make_embed_request(linkto, ["aliases"], self.auth_key, single_item=True) + # if not aliases: + # aliases = [] + # linkto_alias_dict[linkto] = aliases + # return linkto_alias_dict + + def extract_mwf_linktos(self, existing_meta_workflow_runs_linktos): + existing_mwfs = [] + for mwfr_id in existing_meta_workflow_runs_linktos: + corresponding_mwf = make_embed_request(mwfr_id, ["meta_workflow"], self.auth_key, single_item=True) + if not corresponding_mwf: + continue #TODO: error check tho?? + existing_mwfs.append(corresponding_mwf) + return existing_mwfs + def get_item_properties(self, item_identifier): """ Retrieve item from given environment without raising @@ -146,7 +161,7 @@ def create_meta_workflow_run_handler(self): """ #TODO: check Doug's prior comments on title - meta_workflow_handler_title = self.meta_workflow_handler.get(self.TITLE) + meta_workflow_handler_title = self.meta_workflow_handler_json.get(self.TITLE) creation_date = datetime.date.today().isoformat() title = "MetaWorkflowRun Handler %s created %s" % ( meta_workflow_handler_title, @@ -160,23 +175,45 @@ def create_meta_workflow_run_handler(self): self.TITLE: title, self.ASSOCIATED_META_WORKFLOW_HANDLER: self.meta_workflow_handler_id, self.ASSOCIATED_ITEM: self.associated_item_id, - self.FINAL_STATUS: self.PENDING, - self.META_WORKFLOW_RUNS: [] + self.FINAL_STATUS: self.PENDING } # now call helper function to populate and create the MetaWorkflow Runs # TODO: handle duplication flag?? # TODO: should duplication only happen when the status of the original # mwfr is not successful? - self.create_meta_workflow_runs_array(meta_workflow_run_handler) + meta_workflow_runs_array = self.create_meta_workflow_runs_array() + + meta_workflow_run_handler[self.META_WORKFLOW_RUNS] = meta_workflow_runs_array + #TODO: check for whether this is empty or nah? # return the completed MWFR Handler dictionary, following the CGAP schema + #TODO: or the object itself?? return meta_workflow_run_handler - def create_meta_workflow_runs_array(self, meta_workflow_run_handler_dict): + def create_meta_workflow_runs_array(self): + # create MetaWorkflowHandler object + associated_meta_workflow_handler_object = MetaWorkflowHandler(self.meta_workflow_handler_json) + + # then extract the ordered list of metaworkflows + #TODO: constants list, and error catching with this call + ordered_meta_workflows = getattr(associated_meta_workflow_handler_object, "ordered_meta_workflows") + + for meta_workflow_step_obj in ordered_meta_workflows: + + # mwf attrs: meta_workflow, name, items_for_creation (proptrace/uuid), dependencies, duplication_flag + # mwfr attrs: meta_workflow_run, name, status, dependencies, items_for_creation, error + # attrs that stay the same and are passed in: name, dependencies + # run attrs that are automatically set already: status (pending) + + + + # and there is where you can check the duplication flag thing + # and also items for creation prop trace? + #TODO: item for creation prop trace #TODO: handle duplication flag - # TODO: case where mwf run already exists? and dup flag = F? reset the run? + # TODO: case where mwf run already exists? and dup flag = F? reset the run? or just redefine? yikes pass # TODO: for POST and PATCH, will there be changes to schemas other than handlers diff --git a/magma_ff/wfrutils.py b/magma_ff/wfrutils.py index 6b9163a..0d51c14 100644 --- a/magma_ff/wfrutils.py +++ b/magma_ff/wfrutils.py @@ -145,32 +145,32 @@ def __init__(self, env): # Cache for access key self._ff_key = None - def wfr_metadata(self, job_id): - """Get portal run metadata from job_id. - Return None if a run associated with job id cannot be found. - """ - # Use cache - if job_id in self._metadata: - return self._metadata[job_id] - # Search by job id - query='/search/?type=WorkflowRun&awsem_job_id=%s' % job_id - try: - search_res = ff_utils.search_metadata(query, key=self.ff_key) - except Exception as e: - raise FdnConnectionException(e) - if search_res: - self._metadata[job_id] = search_res[0] - return self._metadata[job_id] - else: - # find it from dynamoDB - job_info = Job.info(job_id) - if not job_info: - return None - wfr_uuid = job_info.get('WorkflowRun uuid', '') - if not wfr_uuid: - return None - self._metadata[job_id] = ff_utils.get_metadata(wfr_uuid, key=self.ff_key) - return self._metadata[job_id] + # def wfr_metadata(self, job_id): + # """Get portal run metadata from job_id. + # Return None if a run associated with job id cannot be found. + # """ + # # Use cache + # if job_id in self._metadata: + # return self._metadata[job_id] + # # Search by job id + # query='/search/?type=WorkflowRun&awsem_job_id=%s' % job_id + # try: + # search_res = ff_utils.search_metadata(query, key=self.ff_key) + # except Exception as e: + # raise FdnConnectionException(e) + # if search_res: + # self._metadata[job_id] = search_res[0] + # return self._metadata[job_id] + # else: + # # find it from dynamoDB + # job_info = Job.info(job_id) + # if not job_info: + # return None + # wfr_uuid = job_info.get('WorkflowRun uuid', '') + # if not wfr_uuid: + # return None + # self._metadata[job_id] = ff_utils.get_metadata(wfr_uuid, key=self.ff_key) + # return self._metadata[job_id] # def wfr_run_uuid(self, job_id): # """This is the function to be used by Magma. @@ -195,10 +195,11 @@ def wfr_metadata(self, job_id): @property def ff_key(self): - """Get access key for the portal. """ - # Use cache + Get access key for the portal. + """ if not self._ff_key: - # Use tibanna key for now + # Use tibanna key for now -- TODO: is this correct? don't really understand why + # https://github.com/4dn-dcic/utils/blob/master/dcicutils/s3_utils.py#L276 self._ff_key = s3Utils(env=self.env).get_access_keys('access_key_tibanna') return self._ff_key \ No newline at end of file From ae249bbe396ce96c69ceb2ef1eea86d87a19be8e Mon Sep 17 00:00:00 2001 From: dbmi Date: Mon, 24 Apr 2023 11:00:22 -0400 Subject: [PATCH 24/38] More updates the mwfr handler creation --- magma_ff/create_metawflrun_handler.py | 40 +++++++++++++++++++++------ 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/magma_ff/create_metawflrun_handler.py b/magma_ff/create_metawflrun_handler.py index 0603817..a68c129 100644 --- a/magma_ff/create_metawflrun_handler.py +++ b/magma_ff/create_metawflrun_handler.py @@ -51,6 +51,10 @@ class MetaWorkflowRunHandlerFromItem: DEPENDENCIES = "dependencies" ITEMS_FOR_CREATION = "items_for_creation" ERROR = "error" + + # mwf step + ITEMS_FOR_CREATION_UUID = "items_for_creation_uuid" + ITEMS_FOR_CREATION_PROP_TRACE = "items_for_creation_property_trace" PENDING = "pending" @@ -104,6 +108,7 @@ def __init__(self, associated_item_identifier, meta_workflow_handler_identifier, # this is a dict of linkTos and corresponding aliases {linkTo: [aliases]} # self.existing_meta_workflow_runs = self.extract_mwfr_names(existing_meta_workflow_runs_linktos) + # this is a dict of MWF linkTos (UUIDs TODO:) and corresponding MWFR linkTos {mwf uuid: mwfr uuid} self.existing_meta_workflows_on_assoc_item = self.extract_mwf_linktos(existing_meta_workflow_runs_linktos) # and now create the actual MetaWorkflow Run Handler @@ -125,12 +130,12 @@ def __init__(self, associated_item_identifier, meta_workflow_handler_identifier, # return linkto_alias_dict def extract_mwf_linktos(self, existing_meta_workflow_runs_linktos): - existing_mwfs = [] + existing_mwfs = {} for mwfr_id in existing_meta_workflow_runs_linktos: - corresponding_mwf = make_embed_request(mwfr_id, ["meta_workflow"], self.auth_key, single_item=True) - if not corresponding_mwf: + mwf_id = make_embed_request(mwfr_id, ["meta_workflow"], self.auth_key, single_item=True) + if not mwf_id: continue #TODO: error check tho?? - existing_mwfs.append(corresponding_mwf) + existing_mwfs[mwf_id] = mwfr_id return existing_mwfs def get_item_properties(self, item_identifier): @@ -199,22 +204,39 @@ def create_meta_workflow_runs_array(self): #TODO: constants list, and error catching with this call ordered_meta_workflows = getattr(associated_meta_workflow_handler_object, "ordered_meta_workflows") + ordered_meta_workflow_runs = [] # will eventually be the completed pending MWFRs array, in order for meta_workflow_step_obj in ordered_meta_workflows: - + meta_workflow_run_step_obj = {} # will become the populated MWFR step object # mwf attrs: meta_workflow, name, items_for_creation (proptrace/uuid), dependencies, duplication_flag # mwfr attrs: meta_workflow_run, name, status, dependencies, items_for_creation, error # attrs that stay the same and are passed in: name, dependencies + meta_workflow_run_step_obj[self.NAME] = meta_workflow_step_obj[self.NAME] + meta_workflow_run_step_obj[self.DEPENDENCIES] = meta_workflow_step_obj[self.DEPENDENCIES] # run attrs that are automatically set already: status (pending) + # now check duplication flag (rename -- make new if exists) + # if there is no existing mwfr for this mwf, don't even worry about it (make new one) + + # when False --> do not duplicate an existing mwfr for this mwf + # TODO: if False but mwfr exists + # use existing one regardless of status + + + # when True --> duplicate existing mwfr for this mwf (TODO: does this include the status??) + # --> run another w same mwf as template -- new uuid and new status (so overall new item) + # overall make a new one regardless of anything + + # now check if items for creation is prop trace(s) or uuid(s) + # make embed request as necessary + if not getattr(meta_workflow_step_obj, ) + + + # and there is where you can check the duplication flag thing # and also items for creation prop trace? - #TODO: item for creation prop trace - #TODO: handle duplication flag - # TODO: case where mwf run already exists? and dup flag = F? reset the run? or just redefine? yikes - pass # TODO: for POST and PATCH, will there be changes to schemas other than handlers # in order to accomodate this? like maybe within the mixins schemas file From 58add8160dc9518532a28c9fa58109834510eed0 Mon Sep 17 00:00:00 2001 From: vstevensf Date: Wed, 3 May 2023 13:30:39 -0400 Subject: [PATCH 25/38] Almost final draft of create MWFR handler functionality --- magma_ff/create_metawflrun_handler.py | 239 +++++++++++++++----------- 1 file changed, 137 insertions(+), 102 deletions(-) diff --git a/magma_ff/create_metawflrun_handler.py b/magma_ff/create_metawflrun_handler.py index a68c129..2e262f6 100644 --- a/magma_ff/create_metawflrun_handler.py +++ b/magma_ff/create_metawflrun_handler.py @@ -13,11 +13,23 @@ from magma_ff.metawfl_handler import MetaWorkflowHandler from magma_ff.metawflrun_handler import MetaWorkflowRunHandler from magma_ff.utils import make_embed_request +from magma_ff.create_metawfr import create_meta_workflow_run, MetaWorkflowRunCreationError ################################################ # Constants ################################################ # UUID = "uuid" +#TODO: make a file of these + +MWFR_TO_HANDLER_STEP_STATUS_DICT = { + "pending": "pending", + "running": "running", + "completed": "completed", + "failed": "failed", + "inactive": "pending", + "stopped": "stopped", + "quality metric failed": "failed" +} ################################################ # Custom Exception class(es) @@ -33,8 +45,7 @@ class MetaWorkflowRunHandlerFromItem: Base class to hold common methods required to create and POST a MetaWorkflowRun Handler, and PATCH the Item used to create it (the "associated item"). """ - # Schema constants #TODO: make these constants throughout all files? and where to put that file? - # or a file with different constant classes? + # Schema constants PROJECT = "project" INSTITUTION = "institution" UUID = "uuid" @@ -44,19 +55,45 @@ class MetaWorkflowRunHandlerFromItem: FINAL_STATUS = "final_status" META_WORKFLOW_RUNS = "meta_workflow_runs" - # specific to a mwf run step #TODO: called later on in this class, right? + # specific to a mwf run step #TODO: called later on in this class, right? right. META_WORKFLOW_RUN = "meta_workflow_run" NAME = "name" MWFR_STATUS = "status" DEPENDENCIES = "dependencies" ITEMS_FOR_CREATION = "items_for_creation" ERROR = "error" + DUP_FLAG = "duplication_flag" - # mwf step + # mwf step (from template mwf handler) + MWF_UUID = "meta_workflow" ITEMS_FOR_CREATION_UUID = "items_for_creation_uuid" ITEMS_FOR_CREATION_PROP_TRACE = "items_for_creation_property_trace" PENDING = "pending" + FAILED = "failed" + + # for embed requests + #TODO: use from constants file plz + ASSOC_ITEM_FIELDS = [ + "project", + "institution", + "uuid", + "meta_workflow_runs.uuid", + "meta_workflow_runs.meta_workflow", #TODO: this is sometimes an @id?? + "meta_workflow_runs.final_status" + ] + + # MWFH_FIELDS = [ + # "uuid", + # "meta_workflows", + # "meta_workflows.items_for_creation_property_trace", #TODO: same as above?? + # "meta_workflows.items_for_creation_uuid" + # ] + + + # TODO: is this correct?? also, will we end up patching on assoc item?? + # TODO: if so, create a schema mixin (seems unnecessary, for now) + self.META_WORKFLOW_RUN_HANDLER_ENDPOINT = "meta-workflow-run-handlers" def __init__(self, associated_item_identifier, meta_workflow_handler_identifier, auth_key): """ @@ -66,7 +103,7 @@ def __init__(self, associated_item_identifier, meta_workflow_handler_identifier, on which this MetaWorkflow Run Handler is being created :type associated_item_identifier: str :param meta_workflow_handler_identifier: Associated MetaWorkflow Handler identifier - (UUID, @id, or accession) + (UUID, @id, or accession) -- TODO: does embed request work with an accession :type meta_workflow_handler_identifier: str :param auth_key: Portal authorization key :type auth_key: dict @@ -74,88 +111,64 @@ def __init__(self, associated_item_identifier, meta_workflow_handler_identifier, be found on environment of authorization key """ self.auth_key = auth_key - # this calls for the specified metadata on the associated_item of this MWF Run Handler to be created - #TODO: use normal get request (ffutils get metadata) - # embedding pulls outta postgres, which is slower than elasticsearch - # use embedding for the property tracing and duplication flag checks - #TODO: make this change for the mwfr data structure too? - - self.associated_item_json = self.get_item_properties(associated_item_identifier) - if not self.associated_item_json: # TODO: restructure so this creation error is in method get_item_properties + + self.associated_item_attributes = make_embed_request( + associated_item_identifier, + self.ASSOC_ITEM_FIELDS, + self.auth_key, + single_item=True + ) + if not self.associated_item_attributes: raise MetaWorkflowRunHandlerCreationError( "No Item found for given 'associated item' identifier: %s" % associated_item_identifier ) # check that the specified identifier for the associated MWF Handler does indeed exist on portal - #TODO: a check to make sure it is indeed of mwf handler type? does this function exist on ff_utils? - self.meta_workflow_handler_json = self.get_item_properties(meta_workflow_handler_identifier) + # TODO: a check to make sure it is indeed of mwf handler type? does this function exist on ff_utils? + # same for above associated item request + #TODO: is this even necessary?? is it too complicated of a call to + # just check it exists? what about just a get request? + # self.meta_workflow_handler_json = make_embed_request( + # meta_workflow_handler_identifier, + # self.MWFH_FIELDS, + # self.auth_key, + # single_item=True + # ) + self.meta_workflow_handler_json = ff_utils.get_metadata( + meta_workflow_handler_identifier, + key=self.auth_key, + add_on="frame=raw" + ) if not self.meta_workflow_handler_json: raise MetaWorkflowRunHandlerCreationError( "No MetaWorkflow Handler found for given identifier: %s" % meta_workflow_handler_identifier - ) + ) # now fill in the rest of the attributes of this MWF Run Handler - self.project = self.associated_item_json.get(self.PROJECT) # project is same as associated item - self.institution = self.associated_item_json.get(self.INSTITUTION) # institution is same as associated item - self.associated_item_id = self.associated_item_json.get(self.UUID) # get uuid of associated item + self.project = self.associated_item_attributes.get(self.PROJECT) # project is same as associated item + self.institution = self.associated_item_attributes.get(self.INSTITUTION) # institution is same as associated item + self.associated_item_id = self.associated_item_attributes.get(self.UUID) # get uuid of associated item self.meta_workflow_handler_id = self.meta_workflow_handler_json.get(self.UUID) # get uuid of the template mwf handler self.meta_workflow_run_handler_uuid = str(uuid.uuid4()) #TODO: put exception to catch duplicates? i think the portal handles this #TODO: this is to check for duplicating metaworkflows - existing_meta_workflow_runs_linktos = self.associated_item_json.get(self.META_WORKFLOW_RUNS, []) + existing_meta_workflow_runs_on_assoc_item = self.associated_item_attributes.get(self.META_WORKFLOW_RUNS, []) # above returns [] if no existing mwfr, else returns list of linktos + existing_mwfs = {} + existing_mwfrs = {} + for mwfr in existing_meta_workflow_runs_on_assoc_item: + existing_mwfs[mwfr["meta_workflow"]] = mwfr["uuid"] + existing_mwfrs[mwfr["uuid"]] = mwfr["final_status"] - # this is a dict of linkTos and corresponding aliases {linkTo: [aliases]} - # self.existing_meta_workflow_runs = self.extract_mwfr_names(existing_meta_workflow_runs_linktos) - # this is a dict of MWF linkTos (UUIDs TODO:) and corresponding MWFR linkTos {mwf uuid: mwfr uuid} - self.existing_meta_workflows_on_assoc_item = self.extract_mwf_linktos(existing_meta_workflow_runs_linktos) + self.existing_meta_workflows_on_assoc_item = existing_mwfs + self.statuses_of_existing_mwfrs = existing_mwfrs # and now create the actual MetaWorkflow Run Handler # this returns the dict itself, not just an ID # this attribute is later used to run the thang self.meta_workflow_run_handler = self.create_meta_workflow_run_handler() - # def extract_mwfr_names(self, existing_linktos_list): - # linkto_alias_dict = {} - # for linkto in existing_linktos_list: - # #TODO: does embed request work with @ids and uuids - # #TODO: match user submitted names to existing aliases...or.... - # # because there is no existing "name" attr on mwfr schema at the moment - # # also is it common for an item to have several aliases - # aliases = make_embed_request(linkto, ["aliases"], self.auth_key, single_item=True) - # if not aliases: - # aliases = [] - # linkto_alias_dict[linkto] = aliases - # return linkto_alias_dict - - def extract_mwf_linktos(self, existing_meta_workflow_runs_linktos): - existing_mwfs = {} - for mwfr_id in existing_meta_workflow_runs_linktos: - mwf_id = make_embed_request(mwfr_id, ["meta_workflow"], self.auth_key, single_item=True) - if not mwf_id: - continue #TODO: error check tho?? - existing_mwfs[mwf_id] = mwfr_id - return existing_mwfs - - def get_item_properties(self, item_identifier): - """ - Retrieve item from given environment without raising - Exception if not found, rather, returns None. - - :param item_identifier: Item identifier (UUID, @id, or accession) on the portal - :type item_identifier: str - :return: Raw view of item if found - :rtype: dict or None - """ - # TODO: same as create_metawfr.py --> make a generalized function? - try: - result = ff_utils.get_metadata( - item_identifier, key=self.auth_key, add_on="frame=raw" - ) - except Exception: - result = None - return result def create_meta_workflow_run_handler(self): """ @@ -184,62 +197,84 @@ def create_meta_workflow_run_handler(self): } # now call helper function to populate and create the MetaWorkflow Runs - # TODO: handle duplication flag?? - # TODO: should duplication only happen when the status of the original - # mwfr is not successful? meta_workflow_runs_array = self.create_meta_workflow_runs_array() meta_workflow_run_handler[self.META_WORKFLOW_RUNS] = meta_workflow_runs_array #TODO: check for whether this is empty or nah? - # return the completed MWFR Handler dictionary, following the CGAP schema - #TODO: or the object itself?? + # return the completed MWFR Handler dictionary, which follows the CGAP schema return meta_workflow_run_handler def create_meta_workflow_runs_array(self): # create MetaWorkflowHandler object associated_meta_workflow_handler_object = MetaWorkflowHandler(self.meta_workflow_handler_json) + # this'll make sure all necessary attrs are present in the following run handler creation # then extract the ordered list of metaworkflows - #TODO: constants list, and error catching with this call + #TODO: add ordered_meta_workflows to constants file + # and error catching with this call ordered_meta_workflows = getattr(associated_meta_workflow_handler_object, "ordered_meta_workflows") ordered_meta_workflow_runs = [] # will eventually be the completed pending MWFRs array, in order for meta_workflow_step_obj in ordered_meta_workflows: meta_workflow_run_step_obj = {} # will become the populated MWFR step object - # mwf attrs: meta_workflow, name, items_for_creation (proptrace/uuid), dependencies, duplication_flag - # mwfr attrs: meta_workflow_run, name, status, dependencies, items_for_creation, error + + # mwfr attrs: meta_workflow_run # attrs that stay the same and are passed in: name, dependencies meta_workflow_run_step_obj[self.NAME] = meta_workflow_step_obj[self.NAME] meta_workflow_run_step_obj[self.DEPENDENCIES] = meta_workflow_step_obj[self.DEPENDENCIES] - # run attrs that are automatically set already: status (pending) - - # now check duplication flag (rename -- make new if exists) - # if there is no existing mwfr for this mwf, don't even worry about it (make new one) - # when False --> do not duplicate an existing mwfr for this mwf - # TODO: if False but mwfr exists - # use existing one regardless of status - - - # when True --> duplicate existing mwfr for this mwf (TODO: does this include the status??) - # --> run another w same mwf as template -- new uuid and new status (so overall new item) - # overall make a new one regardless of anything - - # now check if items for creation is prop trace(s) or uuid(s) - # make embed request as necessary - if not getattr(meta_workflow_step_obj, ) - - - - - - # and there is where you can check the duplication flag thing - # and also items for creation prop trace? - - - # TODO: for POST and PATCH, will there be changes to schemas other than handlers - # in order to accomodate this? like maybe within the mixins schemas file - # which can then be easily integrated within other schemas in the future? - # because the mwfr handler will now be living on whatever item, rather than - # a sample or a sample processing \ No newline at end of file + # handle items_for_creation attribute + if self.ITEMS_FOR_CREATION_UUID in meta_workflow_step_obj.keys(): + meta_workflow_run_step_obj[self.ITEMS_FOR_CREATION] = meta_workflow_step_obj[self.ITEMS_FOR_CREATION_UUID] + else: # make embed requests as necessary + items_for_creation_uuids = [] + for item_prop_trace in meta_workflow_step_obj[self.ITEMS_FOR_CREATION_PROP_TRACE]: + item_uuid = make_embed_request( + self.associated_item_id, + [item_prop_trace], + self.auth_key, + single_item=True + ) + items_for_creation_uuids.append(item_uuid) + meta_workflow_run_step_obj[self.ITEMS_FOR_CREATION] = items_for_creation_uuids + + # now handle duplication flag (TODO: todo at the end --> rename -- make new if exists) + try: + meta_workflow_linkto = generated_mwfr_obj[self.UUID] + # if False and a mwfr for that mwf template exists, use existing one regardless of status + # i.e. do not duplicate the existing mwfr and linkTo the existing one + # TODO: copy over the status, right? + if (meta_workflow_step_obj[self.DUP_FLAG] == False) \ + and (meta_workflow_linkto in self.existing_meta_workflows_on_assoc_item.keys()): + meta_workflow_run_step_obj[self.META_WORKFLOW_RUN] = self.existing_meta_workflows_on_assoc_item[meta_workflow_linkto] # the linkTo + curr_mwfr_uuid = meta_workflow_run_step_obj[self.META_WORKFLOW_RUN] + meta_workflow_run_step_obj[self.MWFR_STATUS] = MWFR_TO_HANDLER_STEP_STATUS_DICT[self.statuses_of_existing_mwfrs[curr_mwfr_uuid]] # copy over its status + else: # if True, make a new MWFR for the MWF template regardless of if one exists + # or it could be False, but if there's no existing mwfr for this mwf, make new one + generated_mwfr_obj = create_meta_workflow_run(self.associated_item_id, meta_workflow_step_obj[self.MWF_UUID], self.auth_key) + meta_workflow_run_step_obj[self.META_WORKFLOW_RUN] = meta_workflow_linkto # the linkTo + meta_workflow_run_step_obj[self.MWFR_STATUS] = self.PENDING + except MetaWorkflowRunCreationError as err: + # here the error attribute is handled, if applicable + #TODO: not saving full traceback here + # also TODO: catching and not reraising the error. is this correct? + meta_workflow_run_step_obj[self.MWFR_STATUS] = self.FAILED + meta_workflow_run_step_obj[self.ERROR] = err + + ordered_meta_workflow_runs.append(meta_workflow_run_step_obj) + + return ordered_meta_workflow_runs + + + def post_meta_workflow_run_handler(self): + try: + ff_utils.post_metadata( + self.meta_workflow_run_handler, + self.META_WORKFLOW_RUN_HANDLER_ENDPOINT, + key=self.auth_key, + ) + except Exception as error_msg: + raise MetaWorkflowRunHandlerCreationError( + "MetaWorkflowRunHandler not POSTed: \n%s" % str(error_msg) + ) \ No newline at end of file From e411ceb8dc3e652f82c526a1cb867b2acb1e0b17 Mon Sep 17 00:00:00 2001 From: vstevensf Date: Thu, 4 May 2023 23:40:25 -0400 Subject: [PATCH 26/38] Got rid of duplication flag, for now --- magma_ff/create_metawflrun_handler.py | 40 ++------------------------- magma_ff/metawflrun_handler.py | 4 +-- 2 files changed, 5 insertions(+), 39 deletions(-) diff --git a/magma_ff/create_metawflrun_handler.py b/magma_ff/create_metawflrun_handler.py index 2e262f6..c5f3d17 100644 --- a/magma_ff/create_metawflrun_handler.py +++ b/magma_ff/create_metawflrun_handler.py @@ -55,7 +55,7 @@ class MetaWorkflowRunHandlerFromItem: FINAL_STATUS = "final_status" META_WORKFLOW_RUNS = "meta_workflow_runs" - # specific to a mwf run step #TODO: called later on in this class, right? right. + # specific to a mwf run step META_WORKFLOW_RUN = "meta_workflow_run" NAME = "name" MWFR_STATUS = "status" @@ -137,7 +137,7 @@ def __init__(self, associated_item_identifier, meta_workflow_handler_identifier, self.meta_workflow_handler_json = ff_utils.get_metadata( meta_workflow_handler_identifier, key=self.auth_key, - add_on="frame=raw" + add_on="frame=raw" #TODO: or request object view ) if not self.meta_workflow_handler_json: raise MetaWorkflowRunHandlerCreationError( @@ -152,18 +152,6 @@ def __init__(self, associated_item_identifier, meta_workflow_handler_identifier, self.meta_workflow_handler_id = self.meta_workflow_handler_json.get(self.UUID) # get uuid of the template mwf handler self.meta_workflow_run_handler_uuid = str(uuid.uuid4()) #TODO: put exception to catch duplicates? i think the portal handles this - #TODO: this is to check for duplicating metaworkflows - existing_meta_workflow_runs_on_assoc_item = self.associated_item_attributes.get(self.META_WORKFLOW_RUNS, []) - # above returns [] if no existing mwfr, else returns list of linktos - existing_mwfs = {} - existing_mwfrs = {} - for mwfr in existing_meta_workflow_runs_on_assoc_item: - existing_mwfs[mwfr["meta_workflow"]] = mwfr["uuid"] - existing_mwfrs[mwfr["uuid"]] = mwfr["final_status"] - - self.existing_meta_workflows_on_assoc_item = existing_mwfs - self.statuses_of_existing_mwfrs = existing_mwfrs - # and now create the actual MetaWorkflow Run Handler # this returns the dict itself, not just an ID # this attribute is later used to run the thang @@ -235,32 +223,10 @@ def create_meta_workflow_runs_array(self): [item_prop_trace], self.auth_key, single_item=True - ) + ) #TODO: add check items_for_creation_uuids.append(item_uuid) meta_workflow_run_step_obj[self.ITEMS_FOR_CREATION] = items_for_creation_uuids - # now handle duplication flag (TODO: todo at the end --> rename -- make new if exists) - try: - meta_workflow_linkto = generated_mwfr_obj[self.UUID] - # if False and a mwfr for that mwf template exists, use existing one regardless of status - # i.e. do not duplicate the existing mwfr and linkTo the existing one - # TODO: copy over the status, right? - if (meta_workflow_step_obj[self.DUP_FLAG] == False) \ - and (meta_workflow_linkto in self.existing_meta_workflows_on_assoc_item.keys()): - meta_workflow_run_step_obj[self.META_WORKFLOW_RUN] = self.existing_meta_workflows_on_assoc_item[meta_workflow_linkto] # the linkTo - curr_mwfr_uuid = meta_workflow_run_step_obj[self.META_WORKFLOW_RUN] - meta_workflow_run_step_obj[self.MWFR_STATUS] = MWFR_TO_HANDLER_STEP_STATUS_DICT[self.statuses_of_existing_mwfrs[curr_mwfr_uuid]] # copy over its status - else: # if True, make a new MWFR for the MWF template regardless of if one exists - # or it could be False, but if there's no existing mwfr for this mwf, make new one - generated_mwfr_obj = create_meta_workflow_run(self.associated_item_id, meta_workflow_step_obj[self.MWF_UUID], self.auth_key) - meta_workflow_run_step_obj[self.META_WORKFLOW_RUN] = meta_workflow_linkto # the linkTo - meta_workflow_run_step_obj[self.MWFR_STATUS] = self.PENDING - except MetaWorkflowRunCreationError as err: - # here the error attribute is handled, if applicable - #TODO: not saving full traceback here - # also TODO: catching and not reraising the error. is this correct? - meta_workflow_run_step_obj[self.MWFR_STATUS] = self.FAILED - meta_workflow_run_step_obj[self.ERROR] = err ordered_meta_workflow_runs.append(meta_workflow_run_step_obj) diff --git a/magma_ff/metawflrun_handler.py b/magma_ff/metawflrun_handler.py index be7426b..dbfe4da 100644 --- a/magma_ff/metawflrun_handler.py +++ b/magma_ff/metawflrun_handler.py @@ -13,7 +13,7 @@ ################################################ # MetaWorkflow Handler, Fourfront ################################################ -class MetaWorkflowHandler(MetaWorkflowRunHandlerFromMagma): +class MetaWorkflowRunHandler(MetaWorkflowRunHandlerFromMagma): def __init__(self, input_dict): """ @@ -24,4 +24,4 @@ def __init__(self, input_dict): """ super().__init__(input_dict) - #TODO: update cost ---- embed jobid ... \ No newline at end of file + #TODO: update cost \ No newline at end of file From 2db5bff0265353c1f533475d929d51694ff10366 Mon Sep 17 00:00:00 2001 From: vstevensf Date: Fri, 5 May 2023 03:27:50 -0400 Subject: [PATCH 27/38] Basic running of mwfr handler. --- magma/metawfl_handler.py | 6 +- magma/metawflrun_handler.py | 54 +++++++++++++-- magma_ff/create_metawflrun_handler.py | 4 +- magma_ff/run_metawflrun_handler.py | 94 +++++++++++++++++++++++++++ magma_ff/utils.py | 19 +++--- 5 files changed, 156 insertions(+), 21 deletions(-) create mode 100644 magma_ff/run_metawflrun_handler.py diff --git a/magma/metawfl_handler.py b/magma/metawfl_handler.py index ba5d765..7241606 100644 --- a/magma/metawfl_handler.py +++ b/magma/metawfl_handler.py @@ -112,10 +112,8 @@ def __init__(self, input_dict): super()._validate_basic_attributes(self.UUID_ATTR) ### Calculated attributes ### - # set meta_workflows attribute - # TODO: is this redefinition into a dictionary allowed? - # or should I just make a new attribute? I dunno how this would affect json in portal - # except maybe in patching + # set meta_workflows attribute -- the following used to create ordered step list + # and creates step objects self._set_meta_workflows_dict() # order the meta_workflows list based on dependencies diff --git a/magma/metawflrun_handler.py b/magma/metawflrun_handler.py index 0aa9149..9cfc56a 100644 --- a/magma/metawflrun_handler.py +++ b/magma/metawflrun_handler.py @@ -20,7 +20,8 @@ class MetaWorkflowRunStep(ValidatedDictionary): STATUS_ATTR = "status" DEP_ATTR = "dependencies" MWF_RUN_ATTR = "meta_workflow_run" #TODO: used within the handler itself - # ITEMS_CREATION_ATTR = "items_for_creation" #TODO: do this embedding in ff. BUT. make req? + # ITEMS_CREATION_ATTR = "items_for_creation" #TODO: do this embedding in ff. BUT. make req in schema? + # this above TODO: is very important (unless checked elsewhere) def __init__(self, input_dict): """ @@ -127,6 +128,11 @@ def update_final_status(self): #TODO: update pytests here return self.FINAL_STATUS_ATTR + #TODO: add this to pytests + def retrieve_meta_workflow_run_step_by_name(self, meta_workflow_run_name): + step_obj = self.meta_workflow_run_step_dict[meta_workflow_run_name] + return step_obj + # the following allows for resetting a MetaWorkflow Run Step # this can happen only when the duplication flag is set to True def reset_meta_workflow_run_step(self, meta_workflow_run_name): @@ -137,7 +143,7 @@ def reset_meta_workflow_run_step(self, meta_workflow_run_name): :type meta_workflow_run_name: str """ try: - step_obj = self.meta_workflow_run_step_dict[meta_workflow_run_name] + step_obj = self.retrieve_meta_workflow_run_step_by_name(meta_workflow_run_name) # Reset the status of the MetaWorkflow Run setattr(step_obj, step_obj.STATUS_ATTR, "pending") # Remove and reset the attribute for the LinkTo to the corresponding MetaWorkflow Run @@ -150,13 +156,23 @@ def reset_meta_workflow_run_step(self, meta_workflow_run_name): # this is for redefining any attribute of a MetaWorkflow Run Step def update_meta_workflow_run_step(self, meta_workflow_run_name, attribute, value): try: - step_obj = self.meta_workflow_run_step_dict[meta_workflow_run_name] + step_obj = self.retrieve_meta_workflow_run_step_by_name(meta_workflow_run_name) # Reset the given attribute setattr(step_obj, attribute, value) except KeyError as key_err: raise KeyError("{0} is not a valid MetaWorkflowRun Step name.\n" .format(key_err.args[0])) + # TODO: also have to add this to pytests -- nonexistent attr? check w other fxn too + def get_step_attr(self, meta_workflow_run_name, attribute_to_fetch): + try: + step_obj = self.retrieve_meta_workflow_run_step_by_name(meta_workflow_run_name) + # Return the status + return getattr(step_obj, attribute_to_fetch, None) + except KeyError as key_err: + raise KeyError("{0} is not a valid MetaWorkflowRun Step name.\n" + .format(key_err.args[0])) + def pending_steps(self): """ returns a list of pending steps (by name) @@ -167,16 +183,42 @@ def pending_steps(self): for meta_workflow_run_step in self.meta_workflow_runs: step_name = meta_workflow_run_step["name"] #TODO: make pending a global var - if self.meta_workflow_run_step_dict[step_name].status == "pending": + if self.get_step_attr(step_name, "status") == "pending": pending_steps_list.append(step_name) return pending_steps_list def running_steps(self): + """ + returns a list of running steps (by name) + if no more running, return empty list + """ running_steps_list = [] for meta_workflow_run in self.meta_workflow_runs: associated_meta_workflow_name = meta_workflow_run["name"] - if self.meta_workflow_run_step_dict[associated_meta_workflow_name].status == "running": + if self.get_step_attr(associated_meta_workflow_name, "status") == "running": running_steps_list.append(associated_meta_workflow_name) - return running_steps_list \ No newline at end of file + return running_steps_list + + # TODO: move to ff because portal specific + # and test out + def update_meta_workflows_array(self): + """ + updates run_uuid, status, error attrs + for mwfr dicts for patching mwfr steps array + """ + for meta_workflow_run_dict in self.meta_workflow_runs: + associated_meta_workflow_name = meta_workflow_run_dict["name"] + meta_workflow_run_uuid = self.get_step_attr(associated_meta_workflow_name, "run_uuid") + status = self.get_step_attr(associated_meta_workflow_name, "status") + error = self.get_step_attr(associated_meta_workflow_name, "error") + + if meta_workflow_run_uuid: + meta_workflow_run_dict["run_uuid"] = meta_workflow_run_uuid + if status: + meta_workflow_run_dict["status"] = status + if error: + meta_workflow_run_dict["error"] = error + + return self.meta_workflow_runs \ No newline at end of file diff --git a/magma_ff/create_metawflrun_handler.py b/magma_ff/create_metawflrun_handler.py index c5f3d17..6ef3c06 100644 --- a/magma_ff/create_metawflrun_handler.py +++ b/magma_ff/create_metawflrun_handler.py @@ -13,7 +13,6 @@ from magma_ff.metawfl_handler import MetaWorkflowHandler from magma_ff.metawflrun_handler import MetaWorkflowRunHandler from magma_ff.utils import make_embed_request -from magma_ff.create_metawfr import create_meta_workflow_run, MetaWorkflowRunCreationError ################################################ # Constants @@ -188,7 +187,8 @@ def create_meta_workflow_run_handler(self): meta_workflow_runs_array = self.create_meta_workflow_runs_array() meta_workflow_run_handler[self.META_WORKFLOW_RUNS] = meta_workflow_runs_array - #TODO: check for whether this is empty or nah? + #TODO: check for whether this is empty or nah? no for now + # putting the burden of this error on the user # return the completed MWFR Handler dictionary, which follows the CGAP schema return meta_workflow_run_handler diff --git a/magma_ff/run_metawflrun_handler.py b/magma_ff/run_metawflrun_handler.py new file mode 100644 index 0000000..aa93996 --- /dev/null +++ b/magma_ff/run_metawflrun_handler.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 + +################################################ +# Libraries +################################################ +from dcicutils import ff_utils + +# magma +# from magma_ff.metawfl_handler import MetaWorkflowHandler + +from magma_ff.metawflrun_handler import MetaWorkflowRunHandler +from magma_ff.utils import make_embed_request, check_status +from magma_ff.create_metawfr import create_meta_workflow_run, MetaWorkflowRunCreationError +# from magma_ff.run_metawfr import run_metawfr + +################################################ +# MetaWorkflowRunStep Generator Class +################################################ +class MetaWorkflowRunStepGenerator: + def __init__(self, mwfr_handler_input_dict, auth_key): + self.auth_key = auth_key + self.mwfr_handler_obj = MetaWorkflowRunHandler(mwfr_handler_input_dict) + + def run_step_generator(self): + """this goes through pending steps + if all dependencies are complete, creates mwfr and runs it""" + # going through all steps that are ready to run (pending) + for pending_mwfr_step_name in self.mwfr_handler_obj.pending_steps(): + curr_pending_step_obj = self.mwfr_handler_obj.retrieve_meta_workflow_run_step_by_name(pending_mwfr_step_name) + + # check that all dependencies are completed before running current step + curr_dependencies = getattr(curr_pending_step_obj, "dependencies", []) + deps_completed = True + for dependency_name in curr_dependencies: + dependency_step_status = self.mwfr_handler_obj.get_step_attr(dependency_name, "status") + if dependency_step_status != "completed": + deps_completed = False #TODO: add break here maybe + + # if all dependencies have run to completion + if deps_completed: + # create the metaworkflow run + #TODO: iterate through all items for creation, + # and use handler method instead of getattr? error catching n all dat + # oh a good idea is to add method to step class.... + try: + meta_workflow_run_portal_obj = create_meta_workflow_run( + getattr(curr_pending_step_obj, "items_for_creation"), + getattr(curr_pending_step_obj, "meta_workflow"), + self.auth_key + ) #TODO: !!! have to add run_uuid attr to schema!! arrray? to match items_for_creation + + # update the meta_workflow_run/run_uuid linkTo + setattr(curr_pending_step_obj, "run_uuid", meta_workflow_run_portal_obj["uuid"]) + # update the status to running + setattr(curr_pending_step_obj, "status", "running") + except MetaWorkflowRunCreationError as err: + # update error attr + setattr(curr_pending_step_obj, "error", err) + # update status to failed + setattr(curr_pending_step_obj, "status", "failed") + + + + # update final status & mwfr array of overall handler and yield for PATCHING + yield {'final_status': self.mwfr_handler_obj.update_final_status(), + 'meta_workflow_runs': self.mwfr_handler_obj.update_meta_workflows_array()} + + +################################################ +# Running Function: +# Calls MWFR creation/run fxns and patches handler +################################################ +def run_metawflrun_handler( + metawfr_handler_uuid, + auth_key, + verbose=False, + # sfn="tibanna_zebra", #TODO: just copying -- keeps option open + # env="fourfront-cgap", + # maxcount=None, # TODO: remnant of run metawfr -- no limit on mwfr steps per handler? -- this + valid_final_status=None +): + fields_to_embed = ["*", "meta_workflow_runs.*"] #TODO: double check this with integrated testing + mwfr_handler_input_dict = make_embed_request( + metawfr_handler_uuid, fields_to_embed, auth_key, single_item=True + ) #TODO: add error check here + perform_action = check_status(mwfr_handler_input_dict, valid_final_status) + if perform_action: + # this will create handler object which has checking status methods + mwfr_step_generator = MetaWorkflowRunStepGenerator(mwfr_handler_input_dict, auth_key).run_step_generator() + + for patch_dict in mwfr_step_generator: + response_from_patch = ff_utils.patch_metadata(patch_dict, metawfr_handler_uuid, key=auth_key) + if verbose: + print(response_from_patch) \ No newline at end of file diff --git a/magma_ff/utils.py b/magma_ff/utils.py index 3e6470a..cdd8593 100644 --- a/magma_ff/utils.py +++ b/magma_ff/utils.py @@ -81,25 +81,26 @@ def chunk_ids(ids): result.append(ids[idx : idx + chunk_size]) return result +#TODO: add to tests for the handler too, and add constants +def check_status(portal_json, valid_final_status=None): + """ + Check if MetaWorkflowRun.status or MetaWorkflowRunHandler.status is valid. -def check_status(meta_workflow_run, valid_final_status=None): - """Check if MetaWorkflowRun.status is valid. - - If given valid final status, check MetaWorkflowRun.final_status + If given valid status, check MetaWorkflowRu(Handler).final_status as well. - :param meta_workflow_run: MetaWorkflowRun[json] - :type meta_workflow_run: dict + :param portal_json: MetaWorkflowRun(Handler)[json] + :type portal_json: dict :param valid_status: Final status considered valid :type valid_status: list - :return: Whether MetaWorkflowRun final_status is valid + :return: Whether MetaWorkflowRun (Handler) status & final_status are valid :rtype: bool """ - item_status = meta_workflow_run.get("status", "deleted") + item_status = portal_json.get("status", "deleted") if item_status not in ["obsolete", "deleted"]: result = True if valid_final_status: - final_status = meta_workflow_run.get("final_status") + final_status = portal_json.get("final_status") if final_status not in valid_final_status: result = False else: From c1d2a0b5d9f9bd41856d7eec9462eb56b885d56a Mon Sep 17 00:00:00 2001 From: vstevensf Date: Fri, 5 May 2023 10:57:14 -0400 Subject: [PATCH 28/38] Draft of status checking and updates of run handler --- magma_ff/checkstatus.py | 130 +++++++++----------------- magma_ff/run_metawflrun_handler.py | 6 +- magma_ff/status_metawflrun_handler.py | 40 ++++++++ magma_ff/wfrutils.py | 107 +++++++++------------ 4 files changed, 130 insertions(+), 153 deletions(-) create mode 100644 magma_ff/status_metawflrun_handler.py diff --git a/magma_ff/checkstatus.py b/magma_ff/checkstatus.py index c9c807d..d7fb341 100644 --- a/magma_ff/checkstatus.py +++ b/magma_ff/checkstatus.py @@ -13,6 +13,7 @@ # magma from magma.checkstatus import AbstractCheckStatus from magma_ff.wfrutils import FFWfrUtils, FFMetaWfrUtils +from magma_ff.metawflrun_handler import MetaWorkflowRunHandler ################################################ # CheckStatusFF @@ -90,137 +91,92 @@ def ff(self): ################################################ # CheckStatusRunHandlerFF ################################################ -#TODO: not using an abstract class +#TODO: not using an abstract class -- will check on this later class CheckStatusRunHandlerFF(object): """ Customized CheckStatus class for MetaWorkflow Run Handler for the CGAP portal. """ - def __init__(self, mwfr_handler_obj, env=None): + def __init__(self, mwfr_handler_input_dict, env=None): """ Initialize object and attributes. - :param mwfr_handler_obj: MetaWorkflowRunHandler[obj] representing a MetaWorkflowRunHandler[json] - :type mwfr_handler_obj: object + :param mwfr_handler_input_dict: MetaWorkflowRunHandler input dict + :type mwfr_handler_input_dict: dict :param env: Name of the environment to use (e.g. fourfront-cgap) :type env: str """ # Basic attributes - self.mwfr_handler_obj = mwfr_handler_obj + #TODO: may do this outside of this class for consistency + self.mwfr_handler_obj = MetaWorkflowRunHandler(mwfr_handler_input_dict) # Used for searching CGAP portal-related attributes self._env = env - # For FFMetaWfrUtils object, to search CGAP portal-related attributes - self._ff = None + + # For FFMetaWfrUtils object + self._ff = FFMetaWfrUtils(self._env) @property def status_map(self): - """Mapping from get_status output to magma status. - Set to property so that inherited classes can overwrite it. + """ + Mapping from MWFR portal final_status output to magma final_status. """ return { - 'pending': 'pending', - 'running': 'running', - 'completed': 'completed', - 'failed' : 'failed' + "pending": "pending", + "running": "running", + "completed": "completed", + "failed": "failed", + "inactive": "pending", + "stopped": "stopped", + "quality metric failed": "failed" } - # @property - # def status_map(self): - # """Mapping from get_status output to magma status. - # """ - # return { - # 'started': 'running', - # 'complete': 'completed', - # 'error': 'failed' - # } - - # return { -# 'pending': 'pending', -# 'running': 'running', -# 'completed': 'completed', -# 'failed' : 'failed' -# } - -# "pending", -# "running", -# "completed", -# "failed", -# // "inactive", -# "stopped", -# // "quality metric failed" - -# Handler"pending", -# "running", -# "completed", -# "failed", -# "stopped" def check_running_mwfr_steps(self): """ Check the currently running MetaWorkflowRun steps and update statuses accordingly. + Returns a generator. clever. """ # Iterate through list of running MetaWorkflow Run steps (array of objects) - for mwfr_step in self.mwfr_handler_obj.running_steps(): + for running_mwfr_step_name in self.mwfr_handler_obj.running_steps(): + + # Get run uuid + run_uuid = self.mwfr_handler_obj.get_step_attr(running_mwfr_step_name, uuid) # Check current status from MWF run name - status_ = self.get_status(run_obj.jobid) - status = self.status_map[status_] + status = self.status_map[self.get_mwfr_status(run_uuid)] # Update run status no matter what - self.wflrun_obj.update_attribute(run_obj.shard_name, 'status', status) - - # Get run uuid - run_uuid = self.get_uuid(run_obj.jobid) + self.mwfr_handler_obj.update_meta_workflow_run_step(running_mwfr_step_name, "status", status) # Update run uuid regardless of the status - if run_uuid: # some failed runs don't have run uuid - self.wflrun_obj.update_attribute(run_obj.shard_name, 'workflow_run', run_uuid) - - if status == 'completed': + # if run_uuid: # some failed runs don't have run uuid + # self.wflrun_obj.update_attribute(run_obj.shard_name, 'workflow_run', run_uuid) + # TODO: what's good w a mwfr that failed and may not have uuid?? - # Get formatted output - output = self.get_output(run_obj.jobid) - # Update output - if output: - self.wflrun_obj.update_attribute(run_obj.shard_name, 'output', output) - - elif status == 'running': + if status == 'running': yield None # yield None so that it doesn't terminate iteration continue - else: # failed - # handle error status - anything to do before yielding the updated json - self.handle_error(run_obj) - #end if + # TODO: what about when failed? add to error attr (ik originally for just creation error but still) - # Return the json to patch workflow_runs for both completed and failed - # and keep going so that it can continue updating status for other runs - yield {'final_status': self.wflrun_obj.update_status(), - 'workflow_runs': self.wflrun_obj.runs_to_json()} + # TODO: add part cost check/calculation here? tbd -- rn no, only checks running + # but actually that may work - for patch_dict in super().check_running(): - if patch_dict: - failed_jobs = self.wflrun_obj.update_failed_jobs() - if len(failed_jobs) > 0: - patch_dict['failed_jobs'] = failed_jobs - cost = self.wflrun_obj.update_cost() - if cost is not None and cost > 0: - patch_dict['cost'] = cost - yield patch_dict + # Return the json to PATCH meta_workflow_runs and final_status in handler + yield {'final_status': self.mwfr_handler_obj.update_final_status(), + 'meta_workflow_runs': self.mwfr_handler_obj.update_meta_workflows_array()} - def get_status(self, jobid): + + def get_mwfr_status(self, mwfr_uuid): """ - Returns the status of the given MetaWorkflow Run, from CGAP portal + using portal, gets final_status of given mwfr """ - return self.ff.wfr_run_status(jobid) + return self._ff.get_meta_wfr_current_status(mwfr_uuid) - @property - def ff(self): + def get_mwfr_cost(self, mwfr_uuid): """ - Internal property used for get_status from CGAP portal for given MetaWorkflow Run + using portal, gets cost of given mwfr """ - if not self._ff: - self._ff = FFMetaWfrUtils(self._env) - return self._ff \ No newline at end of file + return self._ff.get_meta_wfr_cost(mwfr_uuid) \ No newline at end of file diff --git a/magma_ff/run_metawflrun_handler.py b/magma_ff/run_metawflrun_handler.py index aa93996..db2adfe 100644 --- a/magma_ff/run_metawflrun_handler.py +++ b/magma_ff/run_metawflrun_handler.py @@ -61,9 +61,9 @@ def run_step_generator(self): - # update final status & mwfr array of overall handler and yield for PATCHING - yield {'final_status': self.mwfr_handler_obj.update_final_status(), - 'meta_workflow_runs': self.mwfr_handler_obj.update_meta_workflows_array()} + # update final status & mwfr array of overall handler and yield for PATCHING + yield {'final_status': self.mwfr_handler_obj.update_final_status(), + 'meta_workflow_runs': self.mwfr_handler_obj.update_meta_workflows_array()} ################################################ diff --git a/magma_ff/status_metawflrun_handler.py b/magma_ff/status_metawflrun_handler.py new file mode 100644 index 0000000..39a075a --- /dev/null +++ b/magma_ff/status_metawflrun_handler.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 + +################################################ +# Libraries +################################################ +from dcicutils import ff_utils + +from magma_ff.checkstatus import CheckStatusRunHandlerFF +from magma_ff.utils import check_status + +################################################ +# Status Function: +# Checks & patches status of MWFR in run handler +################################################ +def status_metawfr_handler( + metawfr_handler_uuid, + auth_key, + env="fourfront-cgap", + verbose=False, + valid_status=None +): + perform_action = True + #TODO: what's good with the add_on here + run_handler_json = ff_utils.get_metadata( + metawfr_handler_uuid, add_on="frame=raw&datastore=database", key=auth_key + ) + perform_action = check_status(run_handler_json, valid_status) + if perform_action: + patch_dict = None + handler_status_check_obj = CheckStatusRunHandlerFF(run_handler_json, env) + + # get list of all updates and isolate most recent update + status_updates = list(handler_status_check_obj.check_running_mwfr_steps()) + if status_updates: + patch_dict = status_updates[-1] + + if patch_dict: + response_from_patch = ff_utils.patch_metadata(patch_dict, metawfr_handler_uuid, key=auth_key) + if verbose: + print(response_from_patch) \ No newline at end of file diff --git a/magma_ff/wfrutils.py b/magma_ff/wfrutils.py index 0d51c14..09cfc54 100644 --- a/magma_ff/wfrutils.py +++ b/magma_ff/wfrutils.py @@ -123,15 +123,10 @@ def filter_wfr_output_minimal_processed(wfr_output): #end class -class FdnConnectionException(Exception): - pass - -#end class - - ################################################ # FFMetaWfrUtils ################################################ +#TODO: make pytests class FFMetaWfrUtils(object): def __init__(self, env): """ @@ -141,65 +136,51 @@ def __init__(self, env): self.env = env # Cache for metadata + # can save several mwfr's metadata dicts at a time self._metadata = dict() - # Cache for access key - self._ff_key = None - - # def wfr_metadata(self, job_id): - # """Get portal run metadata from job_id. - # Return None if a run associated with job id cannot be found. - # """ - # # Use cache - # if job_id in self._metadata: - # return self._metadata[job_id] - # # Search by job id - # query='/search/?type=WorkflowRun&awsem_job_id=%s' % job_id - # try: - # search_res = ff_utils.search_metadata(query, key=self.ff_key) - # except Exception as e: - # raise FdnConnectionException(e) - # if search_res: - # self._metadata[job_id] = search_res[0] - # return self._metadata[job_id] - # else: - # # find it from dynamoDB - # job_info = Job.info(job_id) - # if not job_info: - # return None - # wfr_uuid = job_info.get('WorkflowRun uuid', '') - # if not wfr_uuid: - # return None - # self._metadata[job_id] = ff_utils.get_metadata(wfr_uuid, key=self.ff_key) - # return self._metadata[job_id] - - # def wfr_run_uuid(self, job_id): - # """This is the function to be used by Magma. - # """ - # wfr_meta = self.wfr_metadata(job_id) - # if not wfr_meta: - # return None - # return wfr_meta['uuid'] - - # def wfr_run_status(self, job_id): - # """This is the function to be used by Magma. - # Return the status of the run associated with specified job_id. - # If run associated with job_id is not found, we consider it failed. - # """ - # wfr_meta = self.wfr_metadata(job_id) - # if not wfr_meta: - # return 'error' - # else: - # return wfr_meta['run_status'] - + def get_meta_wfr_current_status(self, mwfr_uuid): + """ + Return the status of the mwfr associated with specified uuid. + If run associated with uuid is not found, return None. + """ + mwfr_meta = self._mwfr_metadata(mwfr_uuid) + if not mwfr_meta: # if an empty list is result + return None + else: + return mwfr_meta['final_status'] - @property - def ff_key(self): + def get_meta_wfr_cost(self, mwfr_uuid): """ - Get access key for the portal. + Return the status of the mwfr associated with specified uuid. + If run associated with uuid is not found, return None. """ - if not self._ff_key: - # Use tibanna key for now -- TODO: is this correct? don't really understand why - # https://github.com/4dn-dcic/utils/blob/master/dcicutils/s3_utils.py#L276 - self._ff_key = s3Utils(env=self.env).get_access_keys('access_key_tibanna') - return self._ff_key \ No newline at end of file + mwfr_meta = self._mwfr_metadata(mwfr_uuid) + if not mwfr_meta: # if an empty list is result + return None + else: + return mwfr_meta['cost'] + + def _mwfr_metadata(self, mwfr_uuid): + """Get portal metawfrun metadata from uuid. + Return [] if a run associated with uuid isn't found. + """ + # Use cache + if mwfr_uuid in self._metadata: + return self._metadata[mwfr_uuid] + + # Search by uuid + query='/search/?type=MetaWorkflowRun&frame=object&uuid=%s' % mwfr_uuid + try: + search_result_list = ff_utils.search_metadata(query, key=self.ff_key) + except Exception as e: + raise FdnConnectionException(e) + + self._metadata[mwfr_uuid] = search_result_list[0] + return self._metadata[mwfr_uuid] + + +class FdnConnectionException(Exception): + pass + +#end class \ No newline at end of file From 3c54a4c398d320e0c478b89226c7d12ed61f9e65 Mon Sep 17 00:00:00 2001 From: vstevensf Date: Mon, 8 May 2023 00:27:29 -0400 Subject: [PATCH 29/38] Added docstrings to toposort files --- magma/magma_constants.py | 11 +++ magma/topological_sort.py | 28 ++++++-- magma/update_handler.py | 80 --------------------- test/test_topological_sort.py | 131 ++++++++++++++++++++++------------ 4 files changed, 121 insertions(+), 129 deletions(-) create mode 100644 magma/magma_constants.py delete mode 100644 magma/update_handler.py diff --git a/magma/magma_constants.py b/magma/magma_constants.py new file mode 100644 index 0000000..4352d35 --- /dev/null +++ b/magma/magma_constants.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 + +################################################################# +# Vars +################################################################# + +META_WORKFLOW = "meta_workflow" +NAME = "name" +DEPENDENCIES = "dependencies" +ITEMS_FOR_CREATION_UUID = "items_for_creation_uuid" +DUP_FLAG = "duplication_flag" diff --git a/magma/topological_sort.py b/magma/topological_sort.py index ae1a7f1..f9af993 100644 --- a/magma/topological_sort.py +++ b/magma/topological_sort.py @@ -4,18 +4,18 @@ # Libraries ################################################ from dcicutils.misc_utils import TopologicalSorter +from magma.magma_constants import DEPENDENCIES ################################################ # Functions ################################################ class TopologicalSortHandler(object): - META_WORKFLOW_DEPENDENCIES_ATTR = "dependencies" - def __init__(self, meta_workflows_dict): """ Constructor method, initialize object and attributes. - Calls method to create graph input for TopologicalSorter from dcicutils + Calls method to create graph input (dict) for TopologicalSorter class, + then sorts this graph, or raises CycleError if sort not possible. :param meta_workflows_dict: input dictionary of meta_workflows from MetaWorkflowHandler :type meta_workflows_dict: dict @@ -27,10 +27,20 @@ def __init__(self, meta_workflows_dict): self.sorter = TopologicalSorter(self.graph) def _create_topo_sort_graph_input(self, meta_workflows_dict): + """ + Using the meta_workflows_dict defined in the MetaWorkflow Handler, + convert to appropriate form to input into a TopologicalSorter. + + :param meta_workflows_dict: input dictionary of meta_workflows from MetaWorkflowHandler + :type meta_workflows_dict: dict + :return: graph input dict for TopologicalSorter + :rtype: dict + """ + # the graph dict should be of the form {mwf_name: set(dependencies),...} graph = {} - # the dict is of the form {mwf_name: MetaWorkflowStep object,...} + # the meta_workflows_dict is of the form {mwf_name: MetaWorkflowStep object,...} for mwf_step_name, mwf_step_obj in meta_workflows_dict.items(): - dependencies = getattr(mwf_step_obj, self.META_WORKFLOW_DEPENDENCIES_ATTR) + dependencies = getattr(mwf_step_obj, DEPENDENCIES) # if there are dependencies for this step, add to the input graph if dependencies: graph[mwf_step_name] = set(dependencies) @@ -39,5 +49,13 @@ def _create_topo_sort_graph_input(self, meta_workflows_dict): return graph def sorted_graph_list(self): + """ + Using the TopologicalSorter object, sorts input graph + and returns list of meta_workflow names in a valid + topological ordering. + + :return: list of meta_workflow names, ordered + :rtype: list[str] + """ sorted_meta_workflows_list = list(self.sorter.static_order()) return sorted_meta_workflows_list \ No newline at end of file diff --git a/magma/update_handler.py b/magma/update_handler.py deleted file mode 100644 index d994970..0000000 --- a/magma/update_handler.py +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env python3 - -################################################ -# UpdateHandler -################################################ -class UpdateHandler(object): - """ - Class to handle MetaWorkflowRunHandler and MetaWorkflowRun Step object updates. - """ - - def __init__(self, run_handler_obj): - """ - Constructor method. - Initialize object and attributes. - - :param run_handler_obj: MetaWorkflowRunHandler magma object, representing a MetaWorkflowRunHandler JSON from CGAP portal - :type run_handler_obj: object - """ - # Basic attributes - self.run_handler_obj = run_handler_obj - - def reset_specified_steps(self, metaworkflow_run_step_names): - """ - Reset MetaWorkflowRun Step object(s) in step_names list, and update final_status of MetaWorkflowRunHandler. - - :param metaworkflow_run_step_names: List of MetaWorkflowRun steps to be reset, by name - :type metaworkflow_run_step_names: list[str] - :return: Updated meta_workflow_runs and handler final_status information - :rtype: dict - """ - # Resets each MetaWorkflowRun step in the list (status and meta_workflow_run LinkTo) - for metaworkflow_run_name in metaworkflow_run_step_names: - self.run_handler_obj.reset_meta_workflow_run_step(metaworkflow_run_name) - - # Return dictionary of attributes to be PATCHed for Run Handler on the CGAP portal - # TODO: put this in ff? since it is CGAP portal specific? - - # return {'final_status': self.run_handler_obj.update_final_status(), - # 'workflow_runs': self.wflrun_obj.runs_to_json()} TODO: is this right.... - - # def import_steps(self, wflrun_obj, steps_name, import_input=True): - # """Update current MetaWorkflowRun[obj] information. - # Import and use information from specified wflrun_obj. - # Update WorkflowRun[obj] up to steps specified by steps_name - - # :param wflrun_obj: MetaWorkflowRun[obj] to import information from - # :type wflrun_obj: object - # :param steps_name: List of names for steps to import - # :type steps_name: list(str) - # :return: MetaWorkflowRun[json] - # :rtype: dict - # """ - # ## Import input - # if import_input: - # self.wflrun_obj.input = wflrun_obj.input - # ## Import WorkflowRun objects - # for name in steps_name: - # queue = [] # queue of steps to import - # # name step and its dependencies - # # Get workflow-runs corresponding to name step - # for shard_name, run_obj in self.wflrun_obj.runs.items(): - # if name == shard_name.split(':')[0]: - # queue.append(run_obj) - # # Iterate queue, get dependencies and import workflow-runs - # while queue: - # run_obj = queue.pop(0) - # shard_name = run_obj.shard_name - # dependencies = run_obj.dependencies - # try: - # self.wflrun_obj.runs[shard_name] = wflrun_obj.runs[shard_name] - # except KeyError as e: - # # raise ValueError('JSON content error, missing information for workflow-run "{0}"\n' - # # .format(e.args[0])) - # continue - # for dependency in dependencies: - # queue.append(self.wflrun_obj.runs[dependency]) - # # Update final_status - # self.wflrun_obj.update_status() - - # return self.wflrun_obj.to_json() diff --git a/test/test_topological_sort.py b/test/test_topological_sort.py index fd02fde..d8a7dbf 100644 --- a/test/test_topological_sort.py +++ b/test/test_topological_sort.py @@ -7,6 +7,7 @@ from magma.metawfl_handler import MetaWorkflowStep from magma.topological_sort import TopologicalSortHandler +from magma.magma_constants import * from dcicutils.misc_utils import CycleError ################################################################# @@ -20,7 +21,8 @@ E_name = "E" # of the form [mwf_uuid, mwf_name] -# used for factory (defined below) to generate lists of dicts (steps with dependencies array) +# used for functions defined below to generate lists of dicts +# (steps with dependencies array) MWF_A = ["test_mwf_uuid_0", A_name] MWF_B = ["test_mwf_uuid_1", B_name] MWF_C = ["test_mwf_uuid_2", C_name] @@ -45,47 +47,80 @@ FIVE_MWF = [MWF_A, MWF_B, MWF_C, MWF_D, MWF_E] -#TODO: add docstring of what this does -- for constructing testing graphs -def construct_array_of_mwf(mwf_metadata_list, dependencies_list): - length = len(mwf_metadata_list) - array_of_mwf = [] +def construct_array_of_meta_workflows(meta_workflow_metadata_list, dependencies_list): + """ + Function to constructs a list of lists for MetaWorkflow steps. + Used to generate dictionaries of MetaWorkflow steps in the + below function meta_workflow_dict. + + :param meta_workflow_metadata_list: list of the form [meta_workflow_linkTo, meta_workflow_name] + :type meta_workflow_metadata_list: list + :param dependencies_list: list of dependencies. Index-matched to meta_workflow_metadata_list + :type dependencies_list: list + :return: list of aggregated meta_workflows with their metadata needed for creation, + of the form [meta_workflow_linkTo_1, meta_workflow_name_1, [dependencies_1],...] + :rtype: list + """ + length = len(meta_workflow_metadata_list) + array_of_meta_workflows = [] for idx in range(length): - array_of_mwf.append(mwf_metadata_list[idx] + dependencies_list[idx]) - return array_of_mwf + array_of_meta_workflows.append(meta_workflow_metadata_list[idx] + dependencies_list[idx]) + return array_of_meta_workflows # a meta_workflow_dict generator of sorts -def meta_workflow_dict(simple_mwf_metadata_list): - mwf_dict = { - "meta_workflow": simple_mwf_metadata_list[0], - "name": simple_mwf_metadata_list[1] +def meta_workflow_dict(simple_meta_workflow_metadata_list): + """ + Constructs dictionary of MetaWorkflow Step metadata, given a list + of the metadata. + Attributes used here are based on MetaWorkflow Handler schema in CGAP portal. + + :param simple_meta_workflow_metadata_list: list of the form + [meta_workflow_linkTo, meta_workflow_name, [meta_workflow_dependencies]] + :type simple_meta_workflow_metadata_list: list + :return: dictionary representing a MetaWorkflow Step + :rtype: dict + """ + meta_workflow_dict = { + META_WORKFLOW: simple_meta_workflow_metadata_list[0], + NAME: simple_meta_workflow_metadata_list[1] } - if len(simple_mwf_metadata_list) == 3: - mwf_dict["dependencies"] = simple_mwf_metadata_list[2] + if len(simple_meta_workflow_metadata_list) == 3: + meta_workflow_dict[DEPENDENCIES] = simple_meta_workflow_metadata_list[2] # just to be able to create MetaWorkflowStep objects without error - mwf_dict["items_for_creation_uuid"] = "foo" - mwf_dict["duplication_flag"] = False - return mwf_dict - -def create_input_meta_workflows_dict(array_of_mwf): + meta_workflow_dict[ITEMS_FOR_CREATION_UUID] = "foo" + meta_workflow_dict[DUP_FLAG] = False + return meta_workflow_dict + +def create_input_meta_workflows_dict(array_of_meta_workflows): + """ + Returns simulation of meta_workflows dictionary of the form + {meta_workflow_name_1: MetaWorkflowStep object 1, ...} + (defined in a MetaWorkflow Handler) + + :param array_of_meta_workflows: list of the form + [[meta_workflow_linkTo_1, meta_workflow_name_1, [meta_workflow_1_dependencies]], ...] + :type array_of_meta_workflows: list + :return: dictionary of MetaWorkflow name-MetaWorkflowStep object key-value pairs + :rtype: dict + """ input_meta_workflows_dict = {} - for mwf in array_of_mwf: - mwf_dictionary = meta_workflow_dict(mwf) - mwf_name = mwf_dictionary["name"] - input_meta_workflows_dict[mwf_name] = MetaWorkflowStep(mwf_dictionary) + for meta_workflow_list in array_of_meta_workflows: + meta_workflow_dictionary = meta_workflow_dict(meta_workflow_list) + meta_workflow_name = meta_workflow_dictionary[NAME] + input_meta_workflows_dict[meta_workflow_name] = MetaWorkflowStep(meta_workflow_dictionary) return input_meta_workflows_dict -#TODO: dawg idk how to draw these # DAGs (directed acyclic graphs, can be typologically sorted) -# TODO: briefly explain how the dependency arrays work -# and are used for construction of steps with dependencies mwf array +# Dependency arrays are index-matched to a list of MetaWorkflow metadata +# See functions above for further detail # ----------------------------------------------------------- # DAG_0 # A B -----> C DEPENDENCIES_DAG_0 = [DEP_EMPTY, DEP_EMPTY, DEP_ON_B] -DAG_0 = construct_array_of_mwf(THREE_MWF, DEPENDENCIES_DAG_0) +DAG_0 = construct_array_of_meta_workflows(THREE_MWF, DEPENDENCIES_DAG_0) # DAG_1 # B -----> D @@ -93,11 +128,8 @@ def create_input_meta_workflows_dict(array_of_mwf): # | / | # ⋁ / | # A <----- C - -#TODO: do something about this nesting of different variables -- consider helper fxn? -#might make it even more confusing to do that though DEPENDENCIES_DAG_1 = [[B+C], DEP_EMPTY, DEP_EMPTY, [A+B+C]] -DAG_1 = construct_array_of_mwf(FOUR_MWF, DEPENDENCIES_DAG_1) +DAG_1 = construct_array_of_meta_workflows(FOUR_MWF, DEPENDENCIES_DAG_1) # Cyclic graphs, cannot be typologically sorted @@ -109,7 +141,7 @@ def create_input_meta_workflows_dict(array_of_mwf): # | ⋁ # D <----- C DEPENDENCIES_CYCLIC_0 = [DEP_EMPTY, DEP_ON_D, DEP_ON_B, DEP_ON_C] -CYCLIC_0 = construct_array_of_mwf(FOUR_MWF, DEPENDENCIES_CYCLIC_0) +CYCLIC_0 = construct_array_of_meta_workflows(FOUR_MWF, DEPENDENCIES_CYCLIC_0) # CYCLIC_1 # A -----> B ----> E @@ -118,7 +150,7 @@ def create_input_meta_workflows_dict(array_of_mwf): # | ⋁ # D <----- C DEPENDENCIES_CYCLIC_1 = [DEP_ON_D, [A+E], DEP_ON_B, DEP_ON_C, DEP_ON_B] -CYCLIC_1 = construct_array_of_mwf(FIVE_MWF, DEPENDENCIES_CYCLIC_1) +CYCLIC_1 = construct_array_of_meta_workflows(FIVE_MWF, DEPENDENCIES_CYCLIC_1) ################################################################# @@ -126,40 +158,51 @@ def create_input_meta_workflows_dict(array_of_mwf): ################################################################# class TestTopologicalSortHandler: @pytest.mark.parametrize( - "array_of_mwf, input_graph_to_topological_sort", + "array_of_meta_workflows, input_graph_to_topological_sort", [ (DAG_0, {A_name: {}, B_name: {}, C_name: {B_name}}), (DAG_1, {A_name: {B_name, C_name}, B_name: {}, C_name: {}, D_name: {A_name, B_name, C_name}}), (CYCLIC_0, {A_name: {}, B_name: {D_name}, C_name: {B_name}, D_name: {C_name}}) ], ) - def test_create_topo_sort_graph_input(self, array_of_mwf, input_graph_to_topological_sort): + def test_create_topo_sort_graph_input(self, array_of_meta_workflows, input_graph_to_topological_sort): + """ + Tests conversion of MetaWorkflow Steps dict from MetaWorkflow Handler to + appropriately formatted input graph for a TopologicalSorter object. + """ # TODO: could make these next two lines a fxn because i reuse over and over - input_mwf_dict = create_input_meta_workflows_dict(array_of_mwf) - sorter = TopologicalSortHandler(input_mwf_dict) + input_meta_workflow_dict = create_input_meta_workflows_dict(array_of_meta_workflows) + sorter = TopologicalSortHandler(input_meta_workflow_dict) assert sorter.graph == input_graph_to_topological_sort @pytest.mark.parametrize( - "array_of_mwf, possible_sorted_lists", + "array_of_meta_workflows, possible_sorted_lists", [ (DAG_0, [[A_name, B_name, C_name], [B_name, A_name, C_name], [B_name, C_name, A_name]]), (DAG_1, [[B_name, C_name, A_name, D_name], [C_name, B_name, A_name, D_name]]) ], ) - def test_sorted_graph_list(self, array_of_mwf, possible_sorted_lists): - input_mwf_dict = create_input_meta_workflows_dict(array_of_mwf) - sorter = TopologicalSortHandler(input_mwf_dict) + def test_sorted_graph_list(self, array_of_meta_workflows, possible_sorted_lists): + """ + Tests topological sorting of sortable MetaWorkflow steps. + """ + input_meta_workflow_dict = create_input_meta_workflows_dict(array_of_meta_workflows) + sorter = TopologicalSortHandler(input_meta_workflow_dict) assert sorter.sorted_graph_list() in possible_sorted_lists @pytest.mark.parametrize( - "array_of_mwf", + "array_of_meta_workflows", [ (CYCLIC_0), (CYCLIC_1) ], ) - def test_sorted_graph_list(self, array_of_mwf): + def test_sorted_graph_list_cycle_error(self, array_of_meta_workflows): + """ + Tests attempts to topologically sort MetaWorkflow steps with circular dependencies. + Raises CycleError. + """ with pytest.raises(CycleError) as cycle_err_info: - input_mwf_dict = create_input_meta_workflows_dict(array_of_mwf) - sorter = TopologicalSortHandler(input_mwf_dict) + input_meta_workflow_dict = create_input_meta_workflows_dict(array_of_meta_workflows) + sorter = TopologicalSortHandler(input_meta_workflow_dict) sorter.sorted_graph_list() assert "nodes are in a cycle" in str(cycle_err_info.value) \ No newline at end of file From 111a0c5f950e748b72e37c1fa86d40f89591548e Mon Sep 17 00:00:00 2001 From: vstevensf Date: Mon, 8 May 2023 01:45:45 -0400 Subject: [PATCH 30/38] Added docstrings to MWF handler files and tests, and added to magma constants. Got rid of duplication_flag. --- magma/magma_constants.py | 7 +- magma/metawfl_handler.py | 120 +++++++++++--------- magma/validated_dictionary.py | 5 +- test/test_metawfl_handler.py | 208 +++++++++++++++++++--------------- test/test_topological_sort.py | 1 - 5 files changed, 194 insertions(+), 147 deletions(-) diff --git a/magma/magma_constants.py b/magma/magma_constants.py index 4352d35..257ad99 100644 --- a/magma/magma_constants.py +++ b/magma/magma_constants.py @@ -4,8 +4,13 @@ # Vars ################################################################# +# MetaWorkflow Handler attributes +PROJECT = "project" +INSTITUTION = "institution" +UUID = "uuid" +META_WORKFLOWS = "meta_workflows" META_WORKFLOW = "meta_workflow" NAME = "name" DEPENDENCIES = "dependencies" +ITEMS_FOR_CREATION_PROP_TRACE = "items_for_creation_property_trace" ITEMS_FOR_CREATION_UUID = "items_for_creation_uuid" -DUP_FLAG = "duplication_flag" diff --git a/magma/metawfl_handler.py b/magma/metawfl_handler.py index 7241606..fbe9bdc 100644 --- a/magma/metawfl_handler.py +++ b/magma/metawfl_handler.py @@ -1,8 +1,5 @@ #!/usr/bin/env python3 -# TODO: use getattr with constants rather than self references -# TODO: parsing necessary to get rid of extra attributes? i dont think so - ################################################ # Libraries ################################################ @@ -10,6 +7,7 @@ from magma.validated_dictionary import ValidatedDictionary from magma.topological_sort import TopologicalSortHandler +from magma.magma_constants import * from dcicutils.misc_utils import CycleError ################################################ @@ -20,9 +18,11 @@ class MetaWorkflowStepCycleError(CycleError): pass class MetaWorkflowStepDuplicateError(ValueError): + """Custom ValueError when MetaWorkflows don't have unique name attributes.""" pass class MetaWorkflowStepSelfDependencyError(ValueError): + """Custom ValueError when MetaWorkflow Step has a dependency on itself.""" pass ################################################ @@ -30,58 +30,64 @@ class MetaWorkflowStepSelfDependencyError(ValueError): ################################################ class MetaWorkflowStep(ValidatedDictionary): """ - Class to represent a MetaWorkflow object, + Class to represent a MetaWorkflow, as a step within a MetaWorkflow Handler object """ - META_WORKFLOW_ATTR = "meta_workflow" - NAME_ATTR = "name" - DEPENDENCIES_ATTR = "dependencies" - DUP_FLAG_ATTR = "duplication_flag" - ITEMS_CREATION_PROP_TRACE = "items_for_creation_property_trace" - ITEMS_CREATION_UUID = "items_for_creation_uuid" - def __init__(self, input_dict): """ Constructor method, initialize object and attributes. - :param input_dict: a MetaWorkflow step (object) and accompanying info within handler, defined by json file + :param input_dict: a dictionary of MetaWorkflow step metadata :type input_dict: dict """ super().__init__(input_dict) # Validate presence of basic attributes of this MetaWorkflow step - self._validate_basic_attributes(self.META_WORKFLOW_ATTR, self.NAME_ATTR, self.DUP_FLAG_ATTR) + self._validate_basic_attributes(META_WORKFLOW, NAME) self._check_self_dependency() def _validate_basic_attributes(self, *list_of_attributes): """ - Validation of the JSON input for the MetaWorkflow step. + Validation of the input dictionary for the MetaWorkflow step. Checks that necessary MetaWorkflow attributes are present for this MetaWorkflow step. + + :param list_of_attributes: attributes that are checked + :type list_of_attributes: str(s) + :return: None, if all specified attributes are present + :raises ValueError: if this object doesn't have a specified attribute + :raises AttributeError: if not one (and only one) of items_for_creation attributes is present """ super()._validate_basic_attributes(*list_of_attributes) - # str, must be unique TODO: name filling in ff + + ## Check that one (and only one) of the following attributes is defined on this step: + ## ITEMS_FOR_CREATION_UUID or ITEMS_FOR_CREATION_PROP_TRACE try: # set None for [default] arg to not throw AttributeError - #TODO: move the differentiation with property trace to FF - # and just handle creation uuids here - if not getattr(self, self.ITEMS_CREATION_UUID, None): - getattr(self, self.ITEMS_CREATION_PROP_TRACE) + #TODO: handle this within ff instead? It is CGAP portal-specific + if not getattr(self, ITEMS_FOR_CREATION_UUID, None): + getattr(self, ITEMS_FOR_CREATION_PROP_TRACE) except AttributeError as e: raise AttributeError("Object validation error, {0}\n" .format(e.args[0])) # for items for creation, this object can only have # either the UUID or property trace, but not both - if hasattr(self, self.ITEMS_CREATION_PROP_TRACE) and hasattr(self, self.ITEMS_CREATION_UUID): + if hasattr(self, ITEMS_FOR_CREATION_PROP_TRACE) and hasattr(self, ITEMS_FOR_CREATION_UUID): raise AttributeError("Object validation error, 'MetaWorkflowStep' object cannot have both of the following attributes: 'items_for_creation_property_trace' and 'items_for_creation_uuid'") def _check_self_dependency(self): - if hasattr(self, self.DEPENDENCIES_ATTR): - dependencies = getattr(self, self.DEPENDENCIES_ATTR) + """ + Check that this MetaWorkflow Step object doesn't have a self-dependency. + + :return: None, if no self-dependencies present + :raises MetaWorkflowStepSelfDependencyError: if there is a self-dependency + """ + if hasattr(self, DEPENDENCIES): + dependencies = getattr(self, DEPENDENCIES) for dependency in dependencies: - if dependency == getattr(self, self.NAME_ATTR): + if dependency == getattr(self, NAME): raise MetaWorkflowStepSelfDependencyError(f'"{dependency}" has a self dependency.') @@ -91,68 +97,76 @@ def _check_self_dependency(self): class MetaWorkflowHandler(ValidatedDictionary): """ Class representing a MetaWorkflow Handler object, - a list of MetaWorkflows with specified dependencies + including a list of MetaWorkflows with specified dependencies & other metadata """ - UUID_ATTR = "uuid" - META_WORKFLOWS_ATTR = "meta_workflows" - META_WORKFLOW_NAME_ATTR = "name" - META_WORKFLOW_DEPENDENCIES_ATTR = "dependencies" - def __init__(self, input_dict): """ Constructor method, initialize object and attributes. - :param input_dict: MetaWorkflow Handler object, defined by json file from portal + :param input_dict: MetaWorkflow Handler dict, defined by json file from CGAP portal :type input_dict: dict """ ### Basic attributes ### super().__init__(input_dict) - super()._validate_basic_attributes(self.UUID_ATTR) + super()._validate_basic_attributes(UUID) ### Calculated attributes ### - # set meta_workflows attribute -- the following used to create ordered step list - # and creates step objects + # set meta_workflows attribute + # Using meta_workflows array of dicts from CGAP MetaWorkflow Handler + # create dict of the form {meta_workflow_name: MetaWorkflow Step object} self._set_meta_workflows_dict() - # order the meta_workflows list based on dependencies - # this ordered list is what's used to create the array of mwf runs in Run handler + # Create ordered MetaWorkflows name list based on dependencies + # This ordered list is what's used to create the array of MetaWorkflow Runs in Run handler self.ordered_meta_workflows = self._create_ordered_meta_workflows_list() def _set_meta_workflows_dict(self): """ - Checks for meta_workflows attribute. + Checks for meta_workflows attribute (an array of MetaWorkflows and their metadata) from CGAP portal. - If nonexistent, set as an empty dictionary - If present, copy that list temporarily and redefine as a dictionary - of the form {meta_workflow_name: meta_workflow_step,....} - getting rid of duplicates in the process (by MetaWorkflow name) + If nonexistent, set handler's meta_workflows attribute as an empty dictionary + If present, copy that list temporarily and redefine meta_workflows attribute + as a dictionary of the form {meta_workflow_name: MetaWorkflow Step object,....} + checking for duplicate steps in the process (i.e. non-unique MetaWorkflow names) :return: None, if all MetaWorkflowSteps are created successfully + :raises MetaWorkflowStepDuplicateError: if there are duplicate MetaWorkflows, by name """ - if not hasattr(self, self.META_WORKFLOWS_ATTR): + if not hasattr(self, META_WORKFLOWS): # if not present, set attribute as empty dictionary - setattr(self, self.META_WORKFLOWS_ATTR, {}) + setattr(self, META_WORKFLOWS, {}) else: - orig_mwf_list_copy = deepcopy(getattr(self, self.META_WORKFLOWS_ATTR)) + orig_meta_workflow_list_copy = deepcopy(getattr(self, META_WORKFLOWS)) - temp_mwf_step_dict = {} + temp_meta_workflow_step_dict = {} - for mwf in orig_mwf_list_copy: - # create MetaWorkflowStep object for this metaworkflow - mwf_step = MetaWorkflowStep(mwf) + for meta_workflow in orig_meta_workflow_list_copy: + # create MetaWorkflowStep object for this MetaWorkflow + meta_workflow_step = MetaWorkflowStep(meta_workflow) # then add to the meta_workflows dictionary - # of the form {mwf["name"]: MetaWorkflowStep(mwf)} - if temp_mwf_step_dict.setdefault(mwf["name"], mwf_step) != mwf_step: - raise MetaWorkflowStepDuplicateError(f'"{mwf["name"]}" is a duplicate MetaWorkflow, all MetaWorkflow names must be unique.') + # of the form {meta_workflow["name"]: MetaWorkflowStep(meta_workflow)} + if temp_meta_workflow_step_dict.setdefault(meta_workflow["name"], meta_workflow_step) != meta_workflow_step: + raise MetaWorkflowStepDuplicateError(f'"{meta_workflow["name"]}" is a duplicate MetaWorkflow, \ + all MetaWorkflow names must be unique.') - # reset the "meta_workflows" attribute as an empty dictionary (rather than array) - setattr(self, self.META_WORKFLOWS_ATTR, temp_mwf_step_dict) + # redefine the "meta_workflows" attribute to this generated dictionary of MetaWorkflowStep objects + setattr(self, META_WORKFLOWS, temp_meta_workflow_step_dict) def _create_ordered_meta_workflows_list(self): - meta_workflows_dict = getattr(self, self.META_WORKFLOWS_ATTR) + """ + Using dictionary of MetaWorkflow name and their corresponding MetaWorkflowStep objects, + generate ordered list of MetaWorkflows, by name. + Uses TopologicalSorter to order these steps based on their defined dependencies. + + :return: list of valid topological sorting of MetaWorkflows (by name) + :rtype: list[str] + :raises MetaWorkflowStepCycleError: if there are cyclic dependencies among MetaWorkflow steps + i.e. no valid topological sorting of steps + """ + meta_workflows_dict = getattr(self, META_WORKFLOWS) try: # create "graph" that will be passed into the topological sorter diff --git a/magma/validated_dictionary.py b/magma/validated_dictionary.py index 7e10015..5422f52 100644 --- a/magma/validated_dictionary.py +++ b/magma/validated_dictionary.py @@ -32,7 +32,10 @@ def _validate_basic_attributes(self, *attributes_to_check): """ for attribute in attributes_to_check: try: - getattr(self, attribute) + retrieved_attr = getattr(self, attribute) + if retrieved_attr is None: + raise AttributeError("attribute %s cannot have value 'None'." % attribute) + # TODO: add this to the pytests except AttributeError as e: raise AttributeError("Object validation error, {0}\n" .format(e.args[0])) \ No newline at end of file diff --git a/test/test_metawfl_handler.py b/test/test_metawfl_handler.py index 4345e03..5c454b8 100644 --- a/test/test_metawfl_handler.py +++ b/test/test_metawfl_handler.py @@ -7,93 +7,101 @@ from copy import deepcopy from magma.metawfl_handler import * - -#TODO: throw error on self dependency +from magma.magma_constants import * ################################################################# # Vars ################################################################# MWF_HANDLER_NAME = "test_mwf_handler" -PROJECT = "test_project" -INSTITUTION = "test_institution" +MWF_HANDLER_PROJECT = "test_project" +MWF_HANDLER_INSTITUTION = "test_institution" MWF_HANDLER_UUID = "test_mwf_handler_uuid" TESTER_UUID = "test_item_uuid" -# basic meta_workflow steps (dicts) used in meta_workflows array -#TODO: for validation of basic attributes, what if the value of an attribute is None? -# e.g. name or meta_workflow in metaworkflowstep? (because my helper function -# only checks that you can get the actual attribute, but getattr works still -# if the value is None) +# Basic meta_workflow steps (dicts) used in meta_workflows array MWF_A = {"meta_workflow": "test_mwf_uuid_0", "name": "A"} MWF_B = {"meta_workflow": "test_mwf_uuid_1", "name": "B"} MWF_C = {"meta_workflow": "test_mwf_uuid_2", "name": "C"} MWF_D = {"meta_workflow": "test_mwf_uuid_3", "name": "D"} +# Dependencies DEP_ON_A = ["A"] DEP_ON_B = ["B"] DEP_ON_C = ["C"] DEP_ON_D = ["D"] -def mwf_with_added_attrs(metaworkflow_dict, items_for_creation_property_trace=None, items_for_creation_uuid=None, dependencies=None, duplication_flag=None): - dict_copy = deepcopy(metaworkflow_dict) +def meta_workflow_with_added_attrs(meta_workflow_dict, items_for_creation_property_trace=None, items_for_creation_uuid=None, dependencies=None): + """ + Helper function used to add non-required attributes to a MetaWorkflow step input dictionary. + Returns new MetaWorkflow step dictionary with added attributes. + + :param meta_workflow_dict: dictionary containing required attributes for MetaWorkflow step ("name" and "meta_workflow"): + :type meta_workflow_dict: dic + :param items_for_creation_property_trace: property trace(s) of item(s) required to create MetaWorkflow Run from MetaWorkflow + :type items_for_creation_property_trace: str or list[str] or None + :param items_for_creation_uuid: uuid(s) of item(s) required to create MetaWorkflow Run from MetaWorkflow + :type items_for_creation_uuid: str or list[str] or None + :param dependencies: list of MetaWorkflows (names) that the current MetaWorkflow is dependent on + :type dependencies: list[str] + :return: reformatted MetaWorkflow dictionary with added attributes + """ + dict_copy = deepcopy(meta_workflow_dict) if items_for_creation_property_trace: - dict_copy["items_for_creation_property_trace"] = items_for_creation_property_trace + dict_copy[ITEMS_FOR_CREATION_PROP_TRACE] = items_for_creation_property_trace if items_for_creation_uuid: - dict_copy["items_for_creation_uuid"] = items_for_creation_uuid + dict_copy[ITEMS_FOR_CREATION_UUID] = items_for_creation_uuid if dependencies is not None: - dict_copy["dependencies"] = dependencies - if duplication_flag is not None: - dict_copy["duplication_flag"] = duplication_flag + dict_copy[DEPENDENCIES] = dependencies return dict_copy # meta_workflows arrays for MetaWorkflow Handler # handler without uuid -- fails validation of basic attributes HANDLER_WITHOUT_UUID_DICT = { - "name": MWF_HANDLER_NAME, - "project": PROJECT, - "institution": INSTITUTION + NAME: MWF_HANDLER_NAME, + PROJECT: MWF_HANDLER_PROJECT, + INSTITUTION: MWF_HANDLER_INSTITUTION } -# handler without metaworkflows array -- passes validation, should set empty metaworkflows array +# handler without meta_workflows array -- passes validation, should set empty metaworkflows array HANDLER_WITHOUT_MWF_ARRAY_DICT = { - "name": MWF_HANDLER_NAME, - "project": PROJECT, - "institution": INSTITUTION, - "uuid": MWF_HANDLER_UUID + NAME: MWF_HANDLER_NAME, + PROJECT: MWF_HANDLER_PROJECT, + INSTITUTION: MWF_HANDLER_INSTITUTION, + UUID: MWF_HANDLER_UUID } # DAG_0 # A B -----> C -MWF_A_DAG_0 = mwf_with_added_attrs(MWF_A, None, TESTER_UUID, [], True) -MWF_B_DAG_0 = mwf_with_added_attrs(MWF_B, None, TESTER_UUID, [], True) -MWF_B_DAG_0_W_DEP = mwf_with_added_attrs(MWF_B, None, TESTER_UUID, DEP_ON_A, True) -MWF_C_DAG_0 = mwf_with_added_attrs(MWF_C, None, TESTER_UUID, DEP_ON_B, True) +MWF_A_DAG_0 = meta_workflow_with_added_attrs(MWF_A, None, TESTER_UUID, []) +MWF_B_DAG_0 = meta_workflow_with_added_attrs(MWF_B, None, TESTER_UUID, []) +MWF_B_DAG_0_W_DEP = meta_workflow_with_added_attrs(MWF_B, None, TESTER_UUID, DEP_ON_A) +MWF_C_DAG_0 = meta_workflow_with_added_attrs(MWF_C, None, TESTER_UUID, DEP_ON_B) DAG_0_MWF_ARRAY = [MWF_B_DAG_0, MWF_A_DAG_0, MWF_C_DAG_0] # purposely in this order to test toposort HANDLER_DAG_0 = { - "name": MWF_HANDLER_NAME, - "project": PROJECT, - "institution": INSTITUTION, - "uuid": MWF_HANDLER_UUID, - "meta_workflows": DAG_0_MWF_ARRAY + NAME: MWF_HANDLER_NAME, + PROJECT: MWF_HANDLER_PROJECT, + INSTITUTION: MWF_HANDLER_INSTITUTION, + UUID: MWF_HANDLER_UUID, + META_WORKFLOWS: DAG_0_MWF_ARRAY } DAG_0_MWF_ARRAY_W_DUPLICATES = [MWF_B_DAG_0, MWF_A_DAG_0, MWF_C_DAG_0, MWF_B_DAG_0] HANDLER_DAG_0_W_DUPLICATES = { - "name": MWF_HANDLER_NAME, - "project": PROJECT, - "institution": INSTITUTION, - "uuid": MWF_HANDLER_UUID, - "meta_workflows": DAG_0_MWF_ARRAY_W_DUPLICATES + NAME: MWF_HANDLER_NAME, + PROJECT: MWF_HANDLER_PROJECT, + INSTITUTION: MWF_HANDLER_INSTITUTION, + UUID: MWF_HANDLER_UUID, + META_WORKFLOWS: DAG_0_MWF_ARRAY_W_DUPLICATES } DAG_0_MWF_ARRAY_W_DUPLICATES_BY_MWF_NAME = [MWF_B_DAG_0, MWF_A_DAG_0, MWF_C_DAG_0, MWF_B_DAG_0_W_DEP] HANDLER_DAG_0_W_DUPLICATES_BY_MWF_NAME = { - "name": MWF_HANDLER_NAME, - "project": PROJECT, - "institution": INSTITUTION, - "uuid": MWF_HANDLER_UUID, - "meta_workflows": DAG_0_MWF_ARRAY_W_DUPLICATES_BY_MWF_NAME + NAME: MWF_HANDLER_NAME, + PROJECT: MWF_HANDLER_PROJECT, + INSTITUTION: MWF_HANDLER_INSTITUTION, + UUID: MWF_HANDLER_UUID, + META_WORKFLOWS: DAG_0_MWF_ARRAY_W_DUPLICATES_BY_MWF_NAME } REORDERED_MWFS_DAG_0 = [["A", "B", "C"], ["B", "A", "C"], ["B", "C", "A"]] @@ -103,17 +111,17 @@ def mwf_with_added_attrs(metaworkflow_dict, items_for_creation_property_trace=No # | / | # ⋁ / | # A <----- C -MWF_A_DAG_1 = mwf_with_added_attrs(MWF_A, None, TESTER_UUID, DEP_ON_B + DEP_ON_C, True) -MWF_B_DAG_1 = mwf_with_added_attrs(MWF_B, None, TESTER_UUID, [], True) -MWF_C_DAG_1 = mwf_with_added_attrs(MWF_C, None, TESTER_UUID, [], True) -MWF_D_DAG_1 = mwf_with_added_attrs(MWF_D, None, TESTER_UUID, DEP_ON_A + DEP_ON_B + DEP_ON_C, True) +MWF_A_DAG_1 = meta_workflow_with_added_attrs(MWF_A, None, TESTER_UUID, DEP_ON_B + DEP_ON_C) +MWF_B_DAG_1 = meta_workflow_with_added_attrs(MWF_B, None, TESTER_UUID, []) +MWF_C_DAG_1 = meta_workflow_with_added_attrs(MWF_C, None, TESTER_UUID, []) +MWF_D_DAG_1 = meta_workflow_with_added_attrs(MWF_D, None, TESTER_UUID, DEP_ON_A + DEP_ON_B + DEP_ON_C) DAG_1_MWF_ARRAY = [MWF_A_DAG_1, MWF_B_DAG_1, MWF_C_DAG_1, MWF_D_DAG_1] HANDLER_DAG_1 = { - "name": MWF_HANDLER_NAME, - "project": PROJECT, - "institution": INSTITUTION, - "uuid": MWF_HANDLER_UUID, - "meta_workflows": DAG_1_MWF_ARRAY + NAME: MWF_HANDLER_NAME, + PROJECT: MWF_HANDLER_PROJECT, + INSTITUTION: MWF_HANDLER_INSTITUTION, + UUID: MWF_HANDLER_UUID, + META_WORKFLOWS: DAG_1_MWF_ARRAY } REORDERED_MWFS_DAG_1 = [["B", "C", "A", "D"], ["C", "B", "A", "D"]] @@ -123,16 +131,16 @@ def mwf_with_added_attrs(metaworkflow_dict, items_for_creation_property_trace=No # | | # | | # C <----- | -MWF_A_CYCLIC_0 = mwf_with_added_attrs(MWF_A, None, TESTER_UUID, [], True) -MWF_B_CYCLIC_0 = mwf_with_added_attrs(MWF_B, None, TESTER_UUID, DEP_ON_C, True) -MWF_C_CYCLIC_0 = mwf_with_added_attrs(MWF_C, None, TESTER_UUID, DEP_ON_B, True) +MWF_A_CYCLIC_0 = meta_workflow_with_added_attrs(MWF_A, None, TESTER_UUID, []) +MWF_B_CYCLIC_0 = meta_workflow_with_added_attrs(MWF_B, None, TESTER_UUID, DEP_ON_C) +MWF_C_CYCLIC_0 = meta_workflow_with_added_attrs(MWF_C, None, TESTER_UUID, DEP_ON_B) CYCLIC_0_MWF_ARRAY = [MWF_A_CYCLIC_0, MWF_B_CYCLIC_0, MWF_C_CYCLIC_0] HANDLER_CYCLIC_0 = { - "name": MWF_HANDLER_NAME, - "project": PROJECT, - "institution": INSTITUTION, - "uuid": MWF_HANDLER_UUID, - "meta_workflows": CYCLIC_0_MWF_ARRAY + NAME: MWF_HANDLER_NAME, + PROJECT: MWF_HANDLER_PROJECT, + INSTITUTION: MWF_HANDLER_INSTITUTION, + UUID: MWF_HANDLER_UUID, + META_WORKFLOWS: CYCLIC_0_MWF_ARRAY } # CYCLIC_1 @@ -141,17 +149,17 @@ def mwf_with_added_attrs(metaworkflow_dict, items_for_creation_property_trace=No # | | # | ⋁ # D <----- C -MWF_A_CYCLIC_1 = mwf_with_added_attrs(MWF_A, None, TESTER_UUID, DEP_ON_D, True) -MWF_B_CYCLIC_1 = mwf_with_added_attrs(MWF_B, None, TESTER_UUID, DEP_ON_A, True) -MWF_C_CYCLIC_1 = mwf_with_added_attrs(MWF_C, None, TESTER_UUID, DEP_ON_B, True) -MWF_D_CYCLIC_1 = mwf_with_added_attrs(MWF_D, None, TESTER_UUID, DEP_ON_C, True) +MWF_A_CYCLIC_1 = meta_workflow_with_added_attrs(MWF_A, None, TESTER_UUID, DEP_ON_D) +MWF_B_CYCLIC_1 = meta_workflow_with_added_attrs(MWF_B, None, TESTER_UUID, DEP_ON_A) +MWF_C_CYCLIC_1 = meta_workflow_with_added_attrs(MWF_C, None, TESTER_UUID, DEP_ON_B) +MWF_D_CYCLIC_1 = meta_workflow_with_added_attrs(MWF_D, None, TESTER_UUID, DEP_ON_C) CYCLIC_1_MWF_ARRAY = [MWF_A_CYCLIC_1, MWF_B_CYCLIC_1, MWF_C_CYCLIC_1, MWF_D_CYCLIC_1] HANDLER_CYCLIC_1 = { - "name": MWF_HANDLER_NAME, - "project": PROJECT, - "institution": INSTITUTION, - "uuid": MWF_HANDLER_UUID, - "meta_workflows": CYCLIC_1_MWF_ARRAY + NAME: MWF_HANDLER_NAME, + PROJECT: MWF_HANDLER_PROJECT, + INSTITUTION: MWF_HANDLER_INSTITUTION, + UUID: MWF_HANDLER_UUID, + META_WORKFLOWS: CYCLIC_1_MWF_ARRAY } ################################################################# @@ -159,57 +167,57 @@ def mwf_with_added_attrs(metaworkflow_dict, items_for_creation_property_trace=No ################################################################# class TestMetaWorkflowStep: @pytest.mark.parametrize( - "mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag, num_attributes", + "mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, num_attributes", [ - (MWF_A, "sample_processing.samples", None, None, True, 4), - (MWF_B, None, TESTER_UUID, None, False, 4), - (MWF_B, None, TESTER_UUID, DEP_ON_A, True, 5) + (MWF_A, "sample_processing.samples", None, None, 3), + (MWF_B, None, TESTER_UUID, None, 3), + (MWF_B, None, TESTER_UUID, DEP_ON_A, 4) ] ) - def test_attribute_validation_no_errors(self, mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag, num_attributes): + def test_attribute_validation_no_errors(self, mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, num_attributes): """ Tests creation of appropriate MetaWorkflowStep objects, no errors raised. """ - completed_dict = mwf_with_added_attrs(mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag) + completed_dict = meta_workflow_with_added_attrs(mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies) meta_workflow_step_object = MetaWorkflowStep(completed_dict) assert num_attributes == len(meta_workflow_step_object.__dict__) - required_attributes = ["meta_workflow", "name", "duplication_flag"] + required_attributes = [META_WORKFLOW, NAME] for attr in required_attributes: assert hasattr(meta_workflow_step_object, attr) == True @pytest.mark.parametrize( - "mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag", + "mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies", [ - (MWF_C, "sample_processing.samples", TESTER_UUID, None, True), # has both uuid and property trace for items for creation - (MWF_A, None, None, None, True), # missing items for creation - (MWF_A, None, TESTER_UUID, None, None) # missing duplication flag + (MWF_C, "sample_processing.samples", TESTER_UUID, None), # has both uuid and property trace for items for creation + (MWF_A, None, None, None), # missing items for creation ] ) - def test_attribute_validation_attribute_errors(self, mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag): + def test_attribute_validation_attribute_errors(self, mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies): """ Tests creation of appropriate MetaWorkflowStep objects, - no errors raised. + Attribute Error raised due to missing required attributes. """ with pytest.raises(AttributeError) as attr_err_info: - completed_dict = mwf_with_added_attrs(mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag) + completed_dict = meta_workflow_with_added_attrs(mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies) MetaWorkflowStep(completed_dict) assert "Object validation error" in str(attr_err_info.value) @pytest.mark.parametrize( - "mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag", + "mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies", [ - (MWF_A, None, TESTER_UUID, DEP_ON_A, True) + (MWF_A, None, TESTER_UUID, DEP_ON_A) ] ) - def test_check_self_dep(self, mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag): + def test_check_self_dep(self, mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies): """ + Tests the method that checks that a MetaWorkflow Step doesn't depend on itself. """ with pytest.raises(MetaWorkflowStepSelfDependencyError) as self_dep_err_err_info: - completed_dict = mwf_with_added_attrs(mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies, duplication_flag) + completed_dict = meta_workflow_with_added_attrs(mwf_step_dict, items_for_creation_property_trace, items_for_creation_uuid, dependencies) MetaWorkflowStep(completed_dict) - assert mwf_step_dict["name"] in str(self_dep_err_err_info.value) + assert mwf_step_dict[NAME] in str(self_dep_err_err_info.value) class TestMetaWorkflowHandler: @@ -218,12 +226,15 @@ class TestMetaWorkflowHandler: [(HANDLER_WITHOUT_UUID_DICT), (HANDLER_WITHOUT_MWF_ARRAY_DICT)] ) def test_attribute_validation_mwf_handler(self, mwf_handler_dict): + """ + Tests that makes sure handler has all required attributes ("uuid"). + """ try: handler_obj = MetaWorkflowHandler(mwf_handler_dict) except AttributeError as attr_err_info: assert "Object validation error" in str(attr_err_info) else: - assert hasattr(handler_obj, "uuid") == True + assert hasattr(handler_obj, UUID) == True @@ -235,10 +246,13 @@ def test_attribute_validation_mwf_handler(self, mwf_handler_dict): ] ) def test_set_meta_workflows_dict(self, mwf_handler_dict, length_of_mwf_dict): + """ + Tests the creation of MetaWorkflow Step(s) dictionary. + """ meta_workflow_handler = MetaWorkflowHandler(mwf_handler_dict) - assert len(getattr(meta_workflow_handler, "meta_workflows")) == length_of_mwf_dict + assert len(getattr(meta_workflow_handler, META_WORKFLOWS)) == length_of_mwf_dict - meta_workflow_steps_dict = getattr(meta_workflow_handler, "meta_workflows") + meta_workflow_steps_dict = getattr(meta_workflow_handler, META_WORKFLOWS) assert isinstance(meta_workflow_steps_dict, dict) for step in meta_workflow_steps_dict.values(): assert isinstance(step, MetaWorkflowStep) @@ -251,6 +265,10 @@ def test_set_meta_workflows_dict(self, mwf_handler_dict, length_of_mwf_dict): ] ) def test_set_meta_workflows_dict_w_error(self, mwf_handler_dict): + """ + Tests for the check of duplicate MetaWorkflow Steps, by name, during + creation of the MetaWorkflow Step(s) dictionary. + """ with pytest.raises(MetaWorkflowStepDuplicateError) as dup_err_info: MetaWorkflowHandler(mwf_handler_dict) assert '"B" is a duplicate MetaWorkflow' in str(dup_err_info) @@ -265,8 +283,12 @@ def test_set_meta_workflows_dict_w_error(self, mwf_handler_dict): ] ) def test_create_ordered_meta_workflows_list(self, mwf_handler_dict, possible_reordered_mwf_lists): + """ + Tests the topological sorting of MetaWorkflow steps. + """ meta_workflow_handler = MetaWorkflowHandler(mwf_handler_dict) assert getattr(meta_workflow_handler, "ordered_meta_workflows") in possible_reordered_mwf_lists + # TODO: add to constants file? @pytest.mark.parametrize( @@ -277,6 +299,10 @@ def test_create_ordered_meta_workflows_list(self, mwf_handler_dict, possible_reo ] ) def test_cycles(self, mwf_handler_dict): + """ + Tests the topological sorting of MetaWorkflow steps, + raising MetaWorkflowStepCycleError because of presence of cycles. + """ with pytest.raises(MetaWorkflowStepCycleError) as cycle_err_info: MetaWorkflowHandler(mwf_handler_dict) assert "nodes are in a cycle" in str(cycle_err_info) \ No newline at end of file diff --git a/test/test_topological_sort.py b/test/test_topological_sort.py index d8a7dbf..6079db4 100644 --- a/test/test_topological_sort.py +++ b/test/test_topological_sort.py @@ -90,7 +90,6 @@ def meta_workflow_dict(simple_meta_workflow_metadata_list): # just to be able to create MetaWorkflowStep objects without error meta_workflow_dict[ITEMS_FOR_CREATION_UUID] = "foo" - meta_workflow_dict[DUP_FLAG] = False return meta_workflow_dict def create_input_meta_workflows_dict(array_of_meta_workflows): From ffdabb6de641d7a3023b413fe1c82fafd794597e Mon Sep 17 00:00:00 2001 From: vstevensf Date: Mon, 8 May 2023 06:13:19 -0400 Subject: [PATCH 31/38] docstrings for mwfr handler class and tests --- magma/magma_constants.py | 15 ++ magma/metawfl_handler.py | 2 +- magma/metawflrun_handler.py | 243 +++++++++++++++++------------- test/test_metawflrun_handler.py | 259 +++++++++++++++++++++----------- 4 files changed, 323 insertions(+), 196 deletions(-) diff --git a/magma/magma_constants.py b/magma/magma_constants.py index 257ad99..2a6b655 100644 --- a/magma/magma_constants.py +++ b/magma/magma_constants.py @@ -14,3 +14,18 @@ DEPENDENCIES = "dependencies" ITEMS_FOR_CREATION_PROP_TRACE = "items_for_creation_property_trace" ITEMS_FOR_CREATION_UUID = "items_for_creation_uuid" + +# MetaWorkflow Run Handler attributes +STATUS = "status" +FINAL_STATUS = "final_status" +ASSOCIATED_META_WORKFLOW_HANDLER = "meta_workflow_handler" +META_WORKFLOW_RUN = "meta_workflow_run" +META_WORKFLOW_RUNS = "meta_workflow_runs" +ITEMS_FOR_CREATION = "items_for_creation" +ERROR = "error" +# statuses +PENDING = "pending" +RUNNING = "running" +COMPLETED = "completed" +FAILED = "failed" +STOPPED = "stopped" diff --git a/magma/metawfl_handler.py b/magma/metawfl_handler.py index fbe9bdc..fff39bb 100644 --- a/magma/metawfl_handler.py +++ b/magma/metawfl_handler.py @@ -149,7 +149,7 @@ def _set_meta_workflows_dict(self): # then add to the meta_workflows dictionary # of the form {meta_workflow["name"]: MetaWorkflowStep(meta_workflow)} if temp_meta_workflow_step_dict.setdefault(meta_workflow["name"], meta_workflow_step) != meta_workflow_step: - raise MetaWorkflowStepDuplicateError(f'"{meta_workflow["name"]}" is a duplicate MetaWorkflow, \ + raise MetaWorkflowStepDuplicateError(f'"{meta_workflow["name"]}" is a duplicate MetaWorkflow, \ all MetaWorkflow names must be unique.') # redefine the "meta_workflows" attribute to this generated dictionary of MetaWorkflowStep objects diff --git a/magma/metawflrun_handler.py b/magma/metawflrun_handler.py index 9cfc56a..3a41a78 100644 --- a/magma/metawflrun_handler.py +++ b/magma/metawflrun_handler.py @@ -4,6 +4,7 @@ # Libraries ################################################ from magma.validated_dictionary import ValidatedDictionary +from magma.magma_constants import * ################################################ # MetaWorkflowRunStep @@ -12,32 +13,27 @@ class MetaWorkflowRunStep(ValidatedDictionary): """ Class to represent a MetaWorkflow Run object, as a step within a MetaWorkflow Run Handler object. - Assumption that this is based on ordered_meta_workflows list - from a MetaWorfklow Handler. + Assumption that this is based on ordered_meta_workflows (name) list + from a MetaWorkflow Handler. """ - NAME_ATTR = "name" # name of metaworkflow corresponding to the metaworkflow run - STATUS_ATTR = "status" - DEP_ATTR = "dependencies" - MWF_RUN_ATTR = "meta_workflow_run" #TODO: used within the handler itself - # ITEMS_CREATION_ATTR = "items_for_creation" #TODO: do this embedding in ff. BUT. make req in schema? - # this above TODO: is very important (unless checked elsewhere) - def __init__(self, input_dict): """ Constructor method, initialize object and attributes. - :param input_dict: a MetaWorkflow step (object) and accompanying info within handler, defined by json file + :param input_dict: dictionary representing a MetaWorkflow step (object) and accompanying info within handler :type input_dict: dict """ super().__init__(input_dict) # for automatically setting initial status to "pending", unless explicitly defined not to - if not hasattr(self, self.STATUS_ATTR): - setattr(self, self.STATUS_ATTR, "pending") + if not hasattr(self, STATUS): + setattr(self, STATUS, PENDING) # Validate presence of basic attributes of this MetaWorkflow step - self._validate_basic_attributes(self.NAME_ATTR, self.DEP_ATTR) + # TODO: make items_for_creation a required attr? + # !!!AND!!! meta_workflow_run --> not necessarily, not defined until creation of mwfr + self._validate_basic_attributes(NAME, DEPENDENCIES) ################################################ # MetaWorkflowRunHandler @@ -46,14 +42,9 @@ class MetaWorkflowRunHandler(ValidatedDictionary): """ Class representing a MetaWorkflowRun Handler object, a list of MetaWorkflowsRuns with specified dependencies, - and their status + and their status. """ - UUID_ATTR = "uuid" - ASSOCIATED_METAWORKFLOW_HANDLER_ATTR = "meta_workflow_handler" - META_WORKFLOW_RUNS_ATTR = "meta_workflow_runs" - FINAL_STATUS_ATTR = "final_status" - def __init__(self, input_dict): """ Constructor method, initialize object and attributes. @@ -66,159 +57,197 @@ def __init__(self, input_dict): super().__init__(input_dict) - self._validate_basic_attributes(self.UUID_ATTR, self.ASSOCIATED_METAWORKFLOW_HANDLER_ATTR, self.META_WORKFLOW_RUNS_ATTR) + self._validate_basic_attributes(UUID, ASSOCIATED_META_WORKFLOW_HANDLER, META_WORKFLOW_RUNS) - # initial final status attribute upon creation - setattr(self, self.FINAL_STATUS_ATTR, "pending") + # initial final_status attribute upon creation should be "pending" + setattr(self, FINAL_STATUS, PENDING) ### Calculated attributes ### # by nature of how a MetaWorkflowRun Handler is created from the MetaWorkflow Handler, # the array "meta_workflow_runs" will already be in some valid topologically sorted order - #(based on topologically sorted list "meta_workflows" in the regular handler) - # here, though, we create a dictionary of the form {mwf_name: MetaWorkflowRunStep_object,...} + # here, though, we create a dictionary of the form {mwfr_name: MetaWorkflowRunStep_object,...} # for faster lookup and updating of steps - self.meta_workflow_run_step_dict = self._create_meta_workflow_run_step_objects() + self.meta_workflow_run_steps_dict = self._set_meta_workflow_runs_dict() - def _create_meta_workflow_run_step_objects(self): - # creates dict: {name_1: step_obj_1, name_2: step_obj_2,...} + def _set_meta_workflow_runs_dict(self): + """ + Using meta_workflow_runs attribute (an array of MetaWorkflow Runs and their metadata), + create a dictionary of the form {meta_workflow_run_name_a: meta_workflow_run_step_obj_a, ...}, + allowing for quicker lookup and updating of MetaWorkflowRunStep objects. + + :return: dictionary containing {MetaWorkflowRun name: MetaWorkflowRunStep object} key-value pairs + """ meta_workflow_run_step_dict = {} for meta_workflow_run in self.meta_workflow_runs: meta_workflow_run_step_object = MetaWorkflowRunStep(meta_workflow_run) - step_name = meta_workflow_run["name"] + step_name = meta_workflow_run[NAME] meta_workflow_run_step_dict[step_name] = meta_workflow_run_step_object return meta_workflow_run_step_dict - # to update final_status attribute of the handler + def update_final_status(self): """ - Check status for all MetaWorkflowRunStep objects. - Initial final status = pending - If a step is running and none others have failed or stopped, final_status = running - If all steps are completed, final_status = completed - If a step has failed, final_status = failed - If a step has been stopped, final_status = stopped + Update final_status of handler based on combined statuses of + all MetaWorkflowRunStep objects. + + If all steps are pending, final_status = pending. + If a step is running and none others have failed or stopped, final_status = running. + If all steps are completed, final_status = completed. + If a step has failed, final_status = failed. + If a step has been stopped, final_status = stopped. - :return: final_status + :return: final_status of the MetaWorkflow Run Handler :rtype: str """ - # options for mwf runs: pending, running, completed, failed, stopped - # TODO: additional final_status possibilities from mwf run schema --> inactive, quality metric failed (how to handle these??) - # TODO: use setattr method consistently - all_steps_completed = True + all_steps_pending = True + + for meta_workflow_run_step in self.meta_workflow_run_steps_dict.values(): + current_step_status = getattr(meta_workflow_run_step, STATUS) - for meta_workflow_run_step in self.meta_workflow_run_step_dict.values(): - if meta_workflow_run_step.status != "completed": + # checking if all steps are "completed" or "pending" and toggling corresponding flags + if current_step_status != COMPLETED: all_steps_completed = False - if meta_workflow_run_step.status == "running": - setattr(self, self.FINAL_STATUS_ATTR, "running") - elif meta_workflow_run_step.status == "failed": - setattr(self, self.FINAL_STATUS_ATTR, "failed") - break - elif meta_workflow_run_step.status == "stopped": - setattr(self, self.FINAL_STATUS_ATTR, "stopped") - break + if current_step_status != PENDING: + all_steps_pending = False + + # if step neither "completed" or "pending", update final_status accordingly + if current_step_status == RUNNING: + setattr(self, FINAL_STATUS, RUNNING) + elif current_step_status == FAILED: + setattr(self, FINAL_STATUS, FAILED) + break + elif current_step_status == STOPPED: + setattr(self, FINAL_STATUS, STOPPED) + break # if all the steps were successfully completed if all_steps_completed: - setattr(self, self.FINAL_STATUS_ATTR, "completed") + setattr(self, FINAL_STATUS, COMPLETED) - #TODO: update pytests here - return self.FINAL_STATUS_ATTR + # if all the steps were pending + if all_steps_pending: + setattr(self, FINAL_STATUS, PENDING) - #TODO: add this to pytests - def retrieve_meta_workflow_run_step_by_name(self, meta_workflow_run_name): - step_obj = self.meta_workflow_run_step_dict[meta_workflow_run_name] - return step_obj + return getattr(self, FINAL_STATUS) - # the following allows for resetting a MetaWorkflow Run Step - # this can happen only when the duplication flag is set to True - def reset_meta_workflow_run_step(self, meta_workflow_run_name): + + def _retrieve_meta_workflow_run_step_obj_by_name(self, meta_workflow_run_name): """ - Resets status and meta_workflow_run attributes of a MetaWorkflowRunStep, given its name + Given a MetaWorkflow Run name, + retrieve its corresponding MetaWorkflowRunStep object. - :param meta_workflow_run_name: name attribute of a MetaWorkflowRunStep + :param meta_workflow_run_name: name of MetaWorkflow Run to be retrieved :type meta_workflow_run_name: str + :return: MetaWorkflowRunStep object corresponding to the given name + :raises: KeyError if the MetaWorkflow Run name is invalid """ try: - step_obj = self.retrieve_meta_workflow_run_step_by_name(meta_workflow_run_name) - # Reset the status of the MetaWorkflow Run - setattr(step_obj, step_obj.STATUS_ATTR, "pending") - # Remove and reset the attribute for the LinkTo to the corresponding MetaWorkflow Run - setattr(step_obj, step_obj.MWF_RUN_ATTR, None) + step_obj = self.meta_workflow_run_steps_dict[meta_workflow_run_name] + return step_obj except KeyError as key_err: raise KeyError("{0} is not a valid MetaWorkflowRun Step name.\n" .format(key_err.args[0])) + #TODO: sharding of mwfrs.... + - # this is a more generalized version of the above - # this is for redefining any attribute of a MetaWorkflow Run Step - def update_meta_workflow_run_step(self, meta_workflow_run_name, attribute, value): - try: - step_obj = self.retrieve_meta_workflow_run_step_by_name(meta_workflow_run_name) - # Reset the given attribute - setattr(step_obj, attribute, value) - except KeyError as key_err: - raise KeyError("{0} is not a valid MetaWorkflowRun Step name.\n" - .format(key_err.args[0])) + def get_meta_workflow_run_step_attr(self, meta_workflow_run_name, attribute_to_fetch): + """ + Given a MetaWorkflow Run name and an attribute to fetch, + retrieve this attribute from the corresponding MetaWorkflowRunStep object, + or None if the attribute to fetch doesn't exist on the MetaWorkflowRunStep object. + + :param meta_workflow_run_name: name of MetaWorkflow Run to be accessed + :type meta_workflow_run_name: str + :return: attribute_to_fetch's value from the MetaWorkflowRunStep object specified + :rtype: varied, or None if not an existing attribute on the given Run Step + :raises: KeyError if the MetaWorkflow Run name is invalid + """ + step_obj = self._retrieve_meta_workflow_run_step_obj_by_name(meta_workflow_run_name) + # Return the attribute_to_fetch + return getattr(step_obj, attribute_to_fetch, None) + + + def update_meta_workflow_run_step_obj(self, meta_workflow_run_name, attribute, value): + """ + Given a MetaWorkflow Run name, an attribute to update, and value to update it to, + retrieve its corresponding MetaWorkflowRunStep object by name + and redefine the given attribute with the provided new value. + + :param meta_workflow_run_name: name of MetaWorkflow Run to be retrieved and updated + :type meta_workflow_run_name: str + :param attribute: attribute to update + :type attribute: str + :param value: new value of the updated attribute + :type value: varies + :raises: KeyError if the MetaWorkflow Run name is invalid + """ + # Retrieve the specified step object + step_obj = self._retrieve_meta_workflow_run_step_obj_by_name(meta_workflow_run_name) + # Reset the given attribute + setattr(step_obj, attribute, value) - # TODO: also have to add this to pytests -- nonexistent attr? check w other fxn too - def get_step_attr(self, meta_workflow_run_name, attribute_to_fetch): - try: - step_obj = self.retrieve_meta_workflow_run_step_by_name(meta_workflow_run_name) - # Return the status - return getattr(step_obj, attribute_to_fetch, None) - except KeyError as key_err: - raise KeyError("{0} is not a valid MetaWorkflowRun Step name.\n" - .format(key_err.args[0])) def pending_steps(self): """ - returns a list of pending steps (by name) - if no more pending, return empty list + Returns a list of names of MetaWorkflowRunStep objects whose status is "pending". + Returns empty list if none are pending. + + :returns: list of pending steps, by name + :rtype: list[str] """ pending_steps_list = [] for meta_workflow_run_step in self.meta_workflow_runs: - step_name = meta_workflow_run_step["name"] - #TODO: make pending a global var - if self.get_step_attr(step_name, "status") == "pending": + step_name = meta_workflow_run_step[NAME] + if self.get_meta_workflow_run_step_attr(step_name, STATUS) == PENDING: pending_steps_list.append(step_name) return pending_steps_list def running_steps(self): """ - returns a list of running steps (by name) - if no more running, return empty list + Returns a list of names of MetaWorkflowRunStep objects whose status is "running". + Returns empty list if none are running. + + :returns: list of running steps, by name + :rtype: list[str] """ running_steps_list = [] for meta_workflow_run in self.meta_workflow_runs: - associated_meta_workflow_name = meta_workflow_run["name"] - if self.get_step_attr(associated_meta_workflow_name, "status") == "running": + associated_meta_workflow_name = meta_workflow_run[NAME] + if self.get_meta_workflow_run_step_attr(associated_meta_workflow_name, STATUS) == RUNNING: running_steps_list.append(associated_meta_workflow_name) return running_steps_list # TODO: move to ff because portal specific - # and test out - def update_meta_workflows_array(self): + def update_meta_workflow_runs_array(self): """ - updates run_uuid, status, error attrs - for mwfr dicts for patching mwfr steps array + Following any updates to MetaWorkflowRunStep objects in meta_workflow_run_steps_dict, + this method is called in order to update the original meta_workflow_runs array of dicts. + Possible attributes that are updated are meta_workflow_run (a linkTo), + status, and error. + + This allows for future PATCHing of a meta_workflow_runs array on the CGAP portal, + by providing the updated meta_workflow_runs. + + :returns: updated meta_workflow_runs array """ + #TODO: make sure this works with sharding for meta_workflow_run_dict in self.meta_workflow_runs: - associated_meta_workflow_name = meta_workflow_run_dict["name"] - meta_workflow_run_uuid = self.get_step_attr(associated_meta_workflow_name, "run_uuid") - status = self.get_step_attr(associated_meta_workflow_name, "status") - error = self.get_step_attr(associated_meta_workflow_name, "error") + meta_workflow_run_name = meta_workflow_run_dict[NAME] + meta_workflow_run_linkto = self.get_meta_workflow_run_step_attr(meta_workflow_run_name, META_WORKFLOW_RUN) + status = self.get_meta_workflow_run_step_attr(meta_workflow_run_name, STATUS) + error = self.get_meta_workflow_run_step_attr(meta_workflow_run_name, ERROR) - if meta_workflow_run_uuid: - meta_workflow_run_dict["run_uuid"] = meta_workflow_run_uuid + if meta_workflow_run_linkto: + meta_workflow_run_dict[META_WORKFLOW_RUN] = meta_workflow_run_linkto if status: - meta_workflow_run_dict["status"] = status + meta_workflow_run_dict[STATUS] = status if error: - meta_workflow_run_dict["error"] = error + meta_workflow_run_dict[ERROR] = error return self.meta_workflow_runs \ No newline at end of file diff --git a/test/test_metawflrun_handler.py b/test/test_metawflrun_handler.py index a2c24c0..428143a 100644 --- a/test/test_metawflrun_handler.py +++ b/test/test_metawflrun_handler.py @@ -7,32 +7,22 @@ from copy import deepcopy from magma.metawflrun_handler import MetaWorkflowRunStep, MetaWorkflowRunHandler +from magma.magma_constants import * ################################################################# # Vars ################################################################# -#TODO: make globals for attributes that you're checking in the tests - MWF_RUN_HANDLER_NAME = "test_mwf_run_handler" -PROJECT = "test_project" -INSTITUTION = "test_institution" +MWF_RUN_PROJECT = "test_project" +MWF_RUN_INSTITUTION = "test_institution" MWF_RUN_HANDLER_UUID = "test_mwf_run_handler_uuid" TESTER_UUID = "test_item_uuid" -# statuses -PENDING = "pending" -RUNNING = "running" -COMPLETED = "completed" -FAILED = "failed" -STOPPED = "stopped" - -# basic meta_workflow steps (dicts) used in meta_workflows array -#TODO: for validation of basic attributes, what if the value of an attribute is None? -# e.g. name or meta_workflow in metaworkflowRunstep? (because my helper function -# only checks that you can get the actual attribute, but getattr works still -# if the value is None) + +# basic meta_workflow_run dicts used in meta_workflow_runs array +# will have attributes added to them using mwf_run_with_added_attrs() MWFR_A = {"name": "A"} MWFR_B = {"name": "B"} MWFR_C = {"name": "C"} @@ -45,99 +35,137 @@ DEP_ON_C = ["C"] DEP_ON_D = ["D"] -def mwf_run_with_added_attrs(metaworkflow_dict, dependencies=None, items_for_creation=None, status=None): - dict_copy = deepcopy(metaworkflow_dict) +def mwf_run_with_added_attrs(meta_workflow_run_dict, dependencies=None, items_for_creation=None, \ + status=None, meta_workflow_run_linkto=None, error=None): + """ + Generates an updated meta_workflow_run_dict given a basic meta_workflow_run_dict and attributes to add. + These attributes are limited to dependencies, items_for_creation, and status for these tests. + + :param meta_workflow_run_dict: Dictionary with basic attribute(s) of a MetaWorkflow Run + :type meta_workflow_run_dict: dict + :param dependencies: MetaWorkflow Runs, by name, that the given MetaWorkflow Run depends on + :type dependencies: list + :param items_for_creation: Item linkTo(s) needed to created the given MetaWorkflow Run + :type items_for_creation: str or list[str] + :param status: the status of the given MetaWorkflow Run + :type status: str + :param meta_workflow_run_linkto: the linkTo to a "created" MetaWorkflow Run on CGAP portal + :type meta_workflow_run_linkto: str + :param error: error traceback at "creation" of a MetaWorkflow Run + :type error: str + :return: updated meta_workflow_run_dict + """ + dict_copy = deepcopy(meta_workflow_run_dict) if dependencies is not None: - dict_copy["dependencies"] = dependencies + dict_copy[DEPENDENCIES] = dependencies if items_for_creation is not None: - dict_copy["items_for_creation"] = items_for_creation + dict_copy[ITEMS_FOR_CREATION] = items_for_creation if status is not None: - dict_copy["status"] = status + dict_copy[STATUS] = status + if meta_workflow_run_linkto is not None: + dict_copy[META_WORKFLOW_RUN] = meta_workflow_run_linkto + if error is not None: + dict_copy[ERROR] = error return dict_copy def mwfr_handler_dict_generator(meta_workflow_runs_array): + """ + Given a meta_workflow_runs array, returns an input dict for + creation of a MetaWorkflow Run Handler object. + + :param meta_workflow_runs_array: list of meta_workflow_run dicts + :type meta_workflow_runs_array: list[dict] + :return: dictionary to be used as input to instantiate a MetaWorkflow Run Handler object + """ return { - "name": MWF_RUN_HANDLER_NAME, - "project": PROJECT, - "institution": INSTITUTION, - "uuid": MWF_RUN_HANDLER_UUID, - "meta_workflow_handler": TESTER_UUID, - "meta_workflow_runs": meta_workflow_runs_array + NAME: MWF_RUN_HANDLER_NAME, + PROJECT: MWF_RUN_PROJECT, + INSTITUTION: MWF_RUN_INSTITUTION, + UUID: MWF_RUN_HANDLER_UUID, + ASSOCIATED_META_WORKFLOW_HANDLER: TESTER_UUID, + META_WORKFLOW_RUNS: meta_workflow_runs_array } # handler without uuid -- fails validation of basic attributes -HANDLER_WITHOUT_UUID_DICT = { - "name": MWF_RUN_HANDLER_NAME, - "project": PROJECT, - "institution": INSTITUTION, - "meta_workflow_handler": TESTER_UUID, - "meta_workflow_runs": [] -} +full_handler_dict_0 = mwfr_handler_dict_generator([]) +full_handler_dict_0.pop(UUID) +HANDLER_WITHOUT_UUID_DICT = full_handler_dict_0 + # handler without associated MetaWorkflow Handler uuid -- fails validation of basic attributes -HANDLER_WITHOUT_ASSOC_MWFH_DICT = { - "name": MWF_RUN_HANDLER_NAME, - "project": PROJECT, - "institution": INSTITUTION, - "uuid": MWF_RUN_HANDLER_UUID, - "meta_workflow_runs": [] -} +full_handler_dict_1 = mwfr_handler_dict_generator([]) +full_handler_dict_1.pop(ASSOCIATED_META_WORKFLOW_HANDLER) +HANDLER_WITHOUT_ASSOC_MWFH_DICT = full_handler_dict_1 # handler without meta_workflow_runs array -- fails validation of basic attributes -HANDLER_WITHOUT_META_WORKFLOW_RUNS_ARRAY = { - "name": MWF_RUN_HANDLER_NAME, - "project": PROJECT, - "institution": INSTITUTION, - "uuid": MWF_RUN_HANDLER_UUID, - "meta_workflow_handler": TESTER_UUID -} +HANDLER_WITHOUT_META_WORKFLOW_RUNS_ARRAY = mwfr_handler_dict_generator(None) +# Constructing a Run Handler with the below step dependencies # B -----> D # | ⋀ ⋀ # | / | # ⋁ / | # A <----- C + +# Pending MetaWorkflow Run dicts MWFR_A_PENDING = mwf_run_with_added_attrs(MWFR_A, DEP_ON_B + DEP_ON_C, TESTER_UUID, PENDING) MWFR_B_PENDING = mwf_run_with_added_attrs(MWFR_B, [], TESTER_UUID, PENDING) MWFR_C_PENDING = mwf_run_with_added_attrs(MWFR_C, [], TESTER_UUID, PENDING) MWFR_D_PENDING = mwf_run_with_added_attrs(MWFR_D, DEP_ON_A + DEP_ON_B + DEP_ON_C, TESTER_UUID, PENDING) +# Running MetaWorkflow Run dicts MWFR_A_RUNNING = mwf_run_with_added_attrs(MWFR_A, DEP_ON_B + DEP_ON_C, TESTER_UUID, RUNNING) MWFR_B_RUNNING = mwf_run_with_added_attrs(MWFR_B, [], TESTER_UUID, RUNNING) MWFR_C_RUNNING = mwf_run_with_added_attrs(MWFR_C, [], TESTER_UUID, RUNNING) MWFR_D_RUNNING = mwf_run_with_added_attrs(MWFR_D, DEP_ON_A + DEP_ON_B + DEP_ON_C, TESTER_UUID, RUNNING) +# Failed/stopped MetaWorkflowRun dicts MWFR_A_FAILED = mwf_run_with_added_attrs(MWFR_A, DEP_ON_B + DEP_ON_C, TESTER_UUID, FAILED) - MWFR_A_STOPPED = mwf_run_with_added_attrs(MWFR_A, DEP_ON_B + DEP_ON_C, TESTER_UUID, STOPPED) +# Completed MetaWorkflow Run dicts MWFR_A_COMPLETED = mwf_run_with_added_attrs(MWFR_A, DEP_ON_B + DEP_ON_C, TESTER_UUID, COMPLETED) MWFR_B_COMPLETED = mwf_run_with_added_attrs(MWFR_B, [], TESTER_UUID, COMPLETED) MWFR_C_COMPLETED = mwf_run_with_added_attrs(MWFR_C, [], TESTER_UUID, COMPLETED) MWFR_D_COMPLETED = mwf_run_with_added_attrs(MWFR_D, DEP_ON_A + DEP_ON_B + DEP_ON_C, TESTER_UUID, COMPLETED) +# Magma FF - specific attributes handled here (for updating meta_workflow_runs array method) +MWFR_B_COMPLETED_W_LINKTO = mwf_run_with_added_attrs(MWFR_B, [], TESTER_UUID, COMPLETED, "a_link_to") +MWFR_A_FAILED_W_ERROR = mwf_run_with_added_attrs(MWFR_A, DEP_ON_B + DEP_ON_C, TESTER_UUID, FAILED, None, "error_message") +MWFR_A_STOPPED_W_LINKTO_AND_ERROR = mwf_run_with_added_attrs(MWFR_A, DEP_ON_B + DEP_ON_C, TESTER_UUID, STOPPED,\ + "another_link_to", "and_another_error_message") + +# Note: these MetaWorkflowRuns above will be mixed and matched for testing purposes +# See meta_workflow_runs arrays and Run Handle input dicts below + +# All steps pending PENDING_ARRAY = [MWFR_B_PENDING, MWFR_C_PENDING, MWFR_A_PENDING, MWFR_D_PENDING] HANDLER_PENDING = mwfr_handler_dict_generator(PENDING_ARRAY) +# Handlers currently running RUNNING_MWFR_ARRAY = [MWFR_B_RUNNING, MWFR_C_RUNNING, MWFR_A_PENDING, MWFR_D_PENDING] -RUNNING_MWFR_ARRAY_2 = [MWFR_B_COMPLETED, MWFR_C_RUNNING, MWFR_A_PENDING, MWFR_D_PENDING] +RUNNING_MWFR_ARRAY_2 = [MWFR_B_COMPLETED_W_LINKTO, MWFR_C_RUNNING, MWFR_A_PENDING, MWFR_D_PENDING] # this wouldn't happen with THIS dag in particular, -# but could in other cases (made for the sake of the final_status test for the handler) -RUNNING_MWFR_ARRAY_3 = [MWFR_B_COMPLETED, MWFR_C_PENDING, MWFR_A_RUNNING, MWFR_D_PENDING] +# but could in other cases (made for the sake of the final_status test for the handler TODO:) +# RUNNING_MWFR_ARRAY_3 = [MWFR_B_COMPLETED, MWFR_C_PENDING, MWFR_A_RUNNING, MWFR_D_PENDING] HANDLER_STEPS_RUNNING = mwfr_handler_dict_generator(RUNNING_MWFR_ARRAY) HANDLER_STEPS_RUNNING_2 = mwfr_handler_dict_generator(RUNNING_MWFR_ARRAY_2) -HANDLER_STEPS_RUNNING_3 = mwfr_handler_dict_generator(RUNNING_MWFR_ARRAY_3) +# HANDLER_STEPS_RUNNING_3 = mwfr_handler_dict_generator(RUNNING_MWFR_ARRAY_3) +# Handlers that have failed HALFWAY_DONE_N_FAIL_ARRAY = [MWFR_B_COMPLETED, MWFR_C_COMPLETED, MWFR_A_FAILED, MWFR_D_PENDING] -HALFWAY_DONE_N_FAIL_ARRAY_2 = [MWFR_B_COMPLETED, MWFR_C_COMPLETED, MWFR_A_FAILED, MWFR_D_RUNNING] +HALFWAY_DONE_N_FAIL_ARRAY_2 = [MWFR_B_COMPLETED, MWFR_C_COMPLETED, MWFR_A_FAILED_W_ERROR, MWFR_D_RUNNING] HANDLER_FAILED = mwfr_handler_dict_generator(HALFWAY_DONE_N_FAIL_ARRAY) HANDLER_FAILED_2 = mwfr_handler_dict_generator(HALFWAY_DONE_N_FAIL_ARRAY_2) +# Handler that has been stopped HALFWAY_DONE_N_STOPPED_ARRAY = [MWFR_B_COMPLETED, MWFR_C_COMPLETED, MWFR_A_STOPPED, MWFR_D_PENDING] +HALFWAY_DONE_N_STOPPED_ARRAY_2 = [MWFR_B_COMPLETED, MWFR_C_COMPLETED, MWFR_A_STOPPED_W_LINKTO_AND_ERROR, MWFR_D_PENDING] HANDLER_STOPPED = mwfr_handler_dict_generator(HALFWAY_DONE_N_STOPPED_ARRAY) +# Handler that is completed COMPLETED_ARRAY = [MWFR_B_COMPLETED, MWFR_C_COMPLETED, MWFR_A_COMPLETED, MWFR_D_COMPLETED] HANDLER_COMPLETED = mwfr_handler_dict_generator(COMPLETED_ARRAY) @@ -159,11 +187,10 @@ def test_attribute_validation(self, mwf_run_step_dict, dependencies, items_for_c """ completed_dict = mwf_run_with_added_attrs(mwf_run_step_dict, dependencies, items_for_creation) meta_workflow_run_step_object = MetaWorkflowRunStep(completed_dict) - # import pdb; pdb.set_trace() assert num_attributes == len(meta_workflow_run_step_object.__dict__) assert meta_workflow_run_step_object.status == PENDING - required_attributes = ["name", "dependencies"]#, "items_for_creation"] + required_attributes = [NAME, DEPENDENCIES]#, "items_for_creation"] for attr in required_attributes: assert hasattr(meta_workflow_run_step_object, attr) == True @@ -177,7 +204,7 @@ def test_attribute_validation(self, mwf_run_step_dict, dependencies, items_for_c def test_attribute_validation_attribute_errors(self, mwf_run_step_dict, dependencies, items_for_creation): """ Tests creation of appropriate MetaWorkflowRunStep objects, - no errors raised. + Attribute Errors raised (missing required attributes). """ with pytest.raises(AttributeError) as attr_err_info: completed_dict = mwf_run_with_added_attrs(mwf_run_step_dict, dependencies, items_for_creation) @@ -192,8 +219,8 @@ def test_attribute_validation(self): # TODO: for now, doesn't fail if no associated_item -- could make this check in ff """ meta_workflow_run_handler = MetaWorkflowRunHandler(HANDLER_PENDING) - assert getattr(meta_workflow_run_handler, "final_status") == PENDING - required_attributes = ["uuid", "meta_workflow_handler"] + assert getattr(meta_workflow_run_handler, FINAL_STATUS) == PENDING + required_attributes = [UUID, ASSOCIATED_META_WORKFLOW_HANDLER] for attr in required_attributes: assert hasattr(meta_workflow_run_handler, attr) == True @@ -201,24 +228,28 @@ def test_attribute_validation(self): "input_dict", [ (HANDLER_WITHOUT_UUID_DICT), # fails because no uuid - (HANDLER_WITHOUT_ASSOC_MWFH_DICT), # fails because no associated metaworkflow handler + (HANDLER_WITHOUT_ASSOC_MWFH_DICT), # fails because no associated MetaWorkflow Handler (HANDLER_WITHOUT_META_WORKFLOW_RUNS_ARRAY) # fails because no meta_workflow_runs array ] ) def test_attribute_validation_attribute_errors(self, input_dict): """ Tests creation of appropriate MetaWorkflowRunHandler objects, - no errors raised. + Attribute Errors raised (missing required attributes). """ with pytest.raises(AttributeError) as attr_err_info: MetaWorkflowRunHandler(input_dict) assert "Object validation error" in str(attr_err_info.value) - def test_create_meta_workflow_run_step_objects(self): + def test_set_meta_workflow_runs_dict(self): + """ + Tests creation of MetaWorkflowRunStep objects for all MetaWorkflow Runs + in the meta_workflow_runs array, and creates dict out of them for quick access and update. + """ meta_workflow_run_handler = MetaWorkflowRunHandler(HANDLER_PENDING) - meta_workflow_run_step_dict = getattr(meta_workflow_run_handler, "meta_workflow_run_step_dict") - assert len(meta_workflow_run_step_dict) == 4 - for mwf_name, mwf_run_step in meta_workflow_run_step_dict.items(): + meta_workflow_run_steps_dict = getattr(meta_workflow_run_handler, "meta_workflow_run_steps_dict") + assert len(meta_workflow_run_steps_dict) == 4 + for mwf_name, mwf_run_step in meta_workflow_run_steps_dict.items(): assert mwf_name in MWF_NAMES_LIST assert isinstance(mwf_run_step, MetaWorkflowRunStep) @@ -228,7 +259,7 @@ def test_create_meta_workflow_run_step_objects(self): (HANDLER_PENDING, PENDING), (HANDLER_STEPS_RUNNING, RUNNING), (HANDLER_STEPS_RUNNING_2, RUNNING), - (HANDLER_STEPS_RUNNING_3, RUNNING), + # (HANDLER_STEPS_RUNNING_3, RUNNING), (HANDLER_FAILED, FAILED), (HANDLER_FAILED_2, FAILED), (HANDLER_STOPPED, STOPPED), @@ -236,51 +267,75 @@ def test_create_meta_workflow_run_step_objects(self): ] ) def test_update_final_status(self, input_dict, updated_final_status): + """ + Tests the updating of the final_status attribute of a Run Handler + based on the combination of MetaWorkflowRunStep object statuses. + """ meta_workflow_run_handler = MetaWorkflowRunHandler(input_dict) assert meta_workflow_run_handler.final_status == PENDING meta_workflow_run_handler.update_final_status() assert meta_workflow_run_handler.final_status == updated_final_status @pytest.mark.parametrize( - "input_dict, mwfr_step_name_to_reset", + "input_dict, meta_workflow_run_name, step_dict", [ - (HANDLER_COMPLETED, "A"), - (HANDLER_COMPLETED, "non_existent_mwf_run_step") + (HANDLER_PENDING, "A", MWFR_A_PENDING), + (HANDLER_PENDING, "non_existent_mwf_run_step", None) # fails because invalid name ] ) - def test_reset_meta_workflow_run_step(self, input_dict, mwfr_step_name_to_reset): + def test_retrieve_meta_workflow_run_step_obj_by_name(self, input_dict, meta_workflow_run_name, step_dict): + """ + Tests the retrieval of a MetaWorkflowRunStep object by name. + """ try: - handler_obj = MetaWorkflowRunHandler(input_dict) - prior_step_status = handler_obj.meta_workflow_run_step_dict[mwfr_step_name_to_reset].status - handler_obj.reset_meta_workflow_run_step(mwfr_step_name_to_reset) - updated_step_status = handler_obj.meta_workflow_run_step_dict[mwfr_step_name_to_reset].status - assert prior_step_status != updated_step_status - assert updated_step_status == PENDING - updated_step_run = handler_obj.meta_workflow_run_step_dict[mwfr_step_name_to_reset].meta_workflow_run - assert updated_step_run is None + meta_workflow_run_handler = MetaWorkflowRunHandler(input_dict) + result = meta_workflow_run_handler._retrieve_meta_workflow_run_step_obj_by_name(meta_workflow_run_name) except KeyError as key_err_info: - assert mwfr_step_name_to_reset in str(key_err_info) + assert meta_workflow_run_name in str(key_err_info) + else: + step = MetaWorkflowRunStep(step_dict) + assert type(result) == MetaWorkflowRunStep + assert result.__dict__ == step.__dict__ + + @pytest.mark.parametrize( + "input_dict, mwfr_step_name_to_access, attribute_to_fetch, expected_value", + [ + (HANDLER_COMPLETED, "A", "status", COMPLETED), + (HANDLER_COMPLETED, "A", "non_existent_attr", None) # fails because invalid attribute name + ] + ) + def test_get_meta_workflow_run_step_attr(self, input_dict, mwfr_step_name_to_access, attribute_to_fetch, expected_value): + """ + Tests the retrieval of a MetaWorkflowRunStep object's attribute. + """ + handler_obj = MetaWorkflowRunHandler(input_dict) + result = handler_obj.get_meta_workflow_run_step_attr(mwfr_step_name_to_access, attribute_to_fetch) + assert result == expected_value + @pytest.mark.parametrize( "input_dict, mwfr_step_name_to_update, attribute, value", [ (HANDLER_COMPLETED, "A", "status", FAILED), - (HANDLER_COMPLETED, "non_existent_mwf_run_step", None, None) + (HANDLER_COMPLETED, "non_existent_mwf_run_step", None, None) # fails because invalid name ] ) - def test_update_meta_workflow_run_step(self, input_dict, mwfr_step_name_to_update, attribute, value): + def test_update_meta_workflow_run_step_obj(self, input_dict, mwfr_step_name_to_update, attribute, value): + """ + Tests the updating of a MetaWorkflowRunStep object' attribute with the provided value. + """ try: handler_obj = MetaWorkflowRunHandler(input_dict) - attr_value_before_change = getattr(handler_obj.meta_workflow_run_step_dict[mwfr_step_name_to_update], attribute) - handler_obj.update_meta_workflow_run_step(mwfr_step_name_to_update, attribute, value) - attr_value_after_change = getattr(handler_obj.meta_workflow_run_step_dict[mwfr_step_name_to_update], attribute) + attr_value_before_change = getattr(handler_obj.meta_workflow_run_steps_dict[mwfr_step_name_to_update], attribute) + handler_obj.update_meta_workflow_run_step_obj(mwfr_step_name_to_update, attribute, value) + attr_value_after_change = getattr(handler_obj.meta_workflow_run_steps_dict[mwfr_step_name_to_update], attribute) assert attr_value_before_change != attr_value_after_change assert attr_value_after_change == value except KeyError as key_err_info: assert mwfr_step_name_to_update in str(key_err_info) @pytest.mark.parametrize( - "input_dict, steps_to_run", + "input_dict, steps_currently_pending", [ (HANDLER_PENDING, MWF_NAMES_LIST), (HANDLER_STEPS_RUNNING, ["A", "D"]), @@ -290,10 +345,13 @@ def test_update_meta_workflow_run_step(self, input_dict, mwfr_step_name_to_updat (HANDLER_COMPLETED, []) ] ) - def test_pending_steps(self, input_dict, steps_to_run): + def test_pending_steps(self, input_dict, steps_currently_pending): + """ + Tests the listing of MetaWorkflow Run names that are pending. + """ handler_obj = MetaWorkflowRunHandler(input_dict) result = handler_obj.pending_steps() - assert result == steps_to_run + assert result == steps_currently_pending @pytest.mark.parametrize( "input_dict, steps_currently_running", @@ -307,6 +365,31 @@ def test_pending_steps(self, input_dict, steps_to_run): ] ) def test_running_steps(self, input_dict, steps_currently_running): + """ + Tests the listing of MetaWorkflow Run names that are running. + """ handler_obj = MetaWorkflowRunHandler(input_dict) result = handler_obj.running_steps() - assert result == steps_currently_running \ No newline at end of file + assert result == steps_currently_running + + + @pytest.mark.parametrize( + "input_dict, mwfr_steps_to_update, attrs_to_update, updated_values, expected_meta_workflow_runs_array", + [ + (HANDLER_STEPS_RUNNING, ["B", "B"], [STATUS, META_WORKFLOW_RUN], [COMPLETED, "a_link_to"], RUNNING_MWFR_ARRAY_2), + (HANDLER_FAILED, ["A", "D"], [ERROR, STATUS], ["error_message", RUNNING], HALFWAY_DONE_N_FAIL_ARRAY_2), + (HANDLER_STOPPED, ["A", "A"], [META_WORKFLOW_RUN, ERROR], ["another_link_to", "and_another_error_message"], HALFWAY_DONE_N_STOPPED_ARRAY_2) + ] + ) + def test_update_meta_workflow_runs_array(self, input_dict, mwfr_steps_to_update, attrs_to_update, updated_values, expected_meta_workflow_runs_array): + """ + Tests the updating of a meta_workflow_runs array based on + changed attributes of MetaWorkflowRunStep objects. + """ + handler_obj = MetaWorkflowRunHandler(input_dict) + # import pdb; pdb.set_trace() + for idx in range(len(mwfr_steps_to_update)): + handler_obj.update_meta_workflow_run_step_obj(mwfr_steps_to_update[idx], attrs_to_update[idx], updated_values[idx]) + + result = handler_obj.update_meta_workflow_runs_array() + assert result == expected_meta_workflow_runs_array \ No newline at end of file From d5deff59acb1abb7f7186278d0264d4242b42d71 Mon Sep 17 00:00:00 2001 From: vstevensf Date: Mon, 8 May 2023 10:22:21 -0400 Subject: [PATCH 32/38] quasi updated handler creation docstrings --- magma/magma_constants.py | 15 ++ magma/metawfl_handler.py | 2 +- magma_ff/create_metawflrun_handler.py | 247 +++++++++++++------------- test/test_metawfl_handler.py | 3 +- 4 files changed, 139 insertions(+), 128 deletions(-) diff --git a/magma/magma_constants.py b/magma/magma_constants.py index 2a6b655..54a360d 100644 --- a/magma/magma_constants.py +++ b/magma/magma_constants.py @@ -3,12 +3,14 @@ ################################################################# # Vars ################################################################# +TITLE = "title" # MetaWorkflow Handler attributes PROJECT = "project" INSTITUTION = "institution" UUID = "uuid" META_WORKFLOWS = "meta_workflows" +ORDERED_META_WORKFLOWS = "ordered_meta_workflows" META_WORKFLOW = "meta_workflow" NAME = "name" DEPENDENCIES = "dependencies" @@ -19,6 +21,7 @@ STATUS = "status" FINAL_STATUS = "final_status" ASSOCIATED_META_WORKFLOW_HANDLER = "meta_workflow_handler" +ASSOCIATED_ITEM = "associated_item" META_WORKFLOW_RUN = "meta_workflow_run" META_WORKFLOW_RUNS = "meta_workflow_runs" ITEMS_FOR_CREATION = "items_for_creation" @@ -29,3 +32,15 @@ COMPLETED = "completed" FAILED = "failed" STOPPED = "stopped" + + +# +MWFR_TO_HANDLER_STEP_STATUS_DICT = { + "pending": "pending", + "running": "running", + "completed": "completed", + "failed": "failed", + "inactive": "pending", + "stopped": "stopped", + "quality metric failed": "failed" +} diff --git a/magma/metawfl_handler.py b/magma/metawfl_handler.py index fff39bb..71c3cd0 100644 --- a/magma/metawfl_handler.py +++ b/magma/metawfl_handler.py @@ -120,7 +120,7 @@ def __init__(self, input_dict): # Create ordered MetaWorkflows name list based on dependencies # This ordered list is what's used to create the array of MetaWorkflow Runs in Run handler - self.ordered_meta_workflows = self._create_ordered_meta_workflows_list() + setattr(self, ORDERED_META_WORKFLOWS, self._create_ordered_meta_workflows_list()) def _set_meta_workflows_dict(self): """ diff --git a/magma_ff/create_metawflrun_handler.py b/magma_ff/create_metawflrun_handler.py index 6ef3c06..8b8658b 100644 --- a/magma_ff/create_metawflrun_handler.py +++ b/magma_ff/create_metawflrun_handler.py @@ -13,27 +13,14 @@ from magma_ff.metawfl_handler import MetaWorkflowHandler from magma_ff.metawflrun_handler import MetaWorkflowRunHandler from magma_ff.utils import make_embed_request +from magma.magma_constants import * -################################################ -# Constants -################################################ -# UUID = "uuid" -#TODO: make a file of these - -MWFR_TO_HANDLER_STEP_STATUS_DICT = { - "pending": "pending", - "running": "running", - "completed": "completed", - "failed": "failed", - "inactive": "pending", - "stopped": "stopped", - "quality metric failed": "failed" -} ################################################ # Custom Exception class(es) ################################################ class MetaWorkflowRunHandlerCreationError(Exception): + """Custom Exception when MetaWorkflow Run Handler encounters error during creation.""" pass ################################################ @@ -44,51 +31,24 @@ class MetaWorkflowRunHandlerFromItem: Base class to hold common methods required to create and POST a MetaWorkflowRun Handler, and PATCH the Item used to create it (the "associated item"). """ - # Schema constants - PROJECT = "project" - INSTITUTION = "institution" - UUID = "uuid" - TITLE = "title" - ASSOCIATED_META_WORKFLOW_HANDLER = "meta_workflow_handler" - ASSOCIATED_ITEM = "associated_item" - FINAL_STATUS = "final_status" - META_WORKFLOW_RUNS = "meta_workflow_runs" - - # specific to a mwf run step - META_WORKFLOW_RUN = "meta_workflow_run" - NAME = "name" - MWFR_STATUS = "status" - DEPENDENCIES = "dependencies" - ITEMS_FOR_CREATION = "items_for_creation" - ERROR = "error" - DUP_FLAG = "duplication_flag" - - # mwf step (from template mwf handler) - MWF_UUID = "meta_workflow" - ITEMS_FOR_CREATION_UUID = "items_for_creation_uuid" - ITEMS_FOR_CREATION_PROP_TRACE = "items_for_creation_property_trace" - - PENDING = "pending" - FAILED = "failed" # for embed requests - #TODO: use from constants file plz - ASSOC_ITEM_FIELDS = [ + ASSOCIATED_ITEM_FIELDS = [ "project", "institution", "uuid", - "meta_workflow_runs.uuid", - "meta_workflow_runs.meta_workflow", #TODO: this is sometimes an @id?? - "meta_workflow_runs.final_status" + # "meta_workflow_runs.uuid", + # "meta_workflow_runs.meta_workflow", + # "meta_workflow_runs.final_status" + # TODO: these last three are for the case of reintegrating duplication flag ] - # MWFH_FIELDS = [ - # "uuid", - # "meta_workflows", - # "meta_workflows.items_for_creation_property_trace", #TODO: same as above?? - # "meta_workflows.items_for_creation_uuid" - # ] - + META_WORKFLOW_HANDLER_FIELDS = [ + "uuid", + "title", + "meta_workflows", + "meta_workflows.*" + ] # TODO: is this correct?? also, will we end up patching on assoc item?? # TODO: if so, create a schema mixin (seems unnecessary, for now) @@ -102,7 +62,7 @@ def __init__(self, associated_item_identifier, meta_workflow_handler_identifier, on which this MetaWorkflow Run Handler is being created :type associated_item_identifier: str :param meta_workflow_handler_identifier: Associated MetaWorkflow Handler identifier - (UUID, @id, or accession) -- TODO: does embed request work with an accession + (UUID, @id, or accession) -- TODO: does embed request work with an accession (yes) :type meta_workflow_handler_identifier: str :param auth_key: Portal authorization key :type auth_key: dict @@ -111,129 +71,166 @@ def __init__(self, associated_item_identifier, meta_workflow_handler_identifier, """ self.auth_key = auth_key - self.associated_item_attributes = make_embed_request( + # Acquire associated item fields needed to create the Run Handler + self.associated_item_dict = make_embed_request( associated_item_identifier, - self.ASSOC_ITEM_FIELDS, + self.ASSOCIATED_ITEM_FIELDS, self.auth_key, single_item=True ) - if not self.associated_item_attributes: + if not self.associated_item_dict: raise MetaWorkflowRunHandlerCreationError( - "No Item found for given 'associated item' identifier: %s" % associated_item_identifier + "No Item found for given 'associated_item' identifier: %s" % associated_item_identifier ) - # check that the specified identifier for the associated MWF Handler does indeed exist on portal + # Acquired fields from associated MetaWorkflow Handler needed to create the Run Handler # TODO: a check to make sure it is indeed of mwf handler type? does this function exist on ff_utils? # same for above associated item request - #TODO: is this even necessary?? is it too complicated of a call to - # just check it exists? what about just a get request? - # self.meta_workflow_handler_json = make_embed_request( - # meta_workflow_handler_identifier, - # self.MWFH_FIELDS, - # self.auth_key, - # single_item=True - # ) - self.meta_workflow_handler_json = ff_utils.get_metadata( - meta_workflow_handler_identifier, - key=self.auth_key, - add_on="frame=raw" #TODO: or request object view + self.meta_workflow_handler_dict = make_embed_request( + meta_workflow_handler_identifier, + self.META_WORKFLOW_HANDLER_FIELDS, + self.auth_key, + single_item=True ) - if not self.meta_workflow_handler_json: + if not self.meta_workflow_handler_dict: raise MetaWorkflowRunHandlerCreationError( - "No MetaWorkflow Handler found for given identifier: %s" + "No MetaWorkflow Handler found for given 'meta_workflow_handler' identifier: %s" % meta_workflow_handler_identifier ) - # now fill in the rest of the attributes of this MWF Run Handler - self.project = self.associated_item_attributes.get(self.PROJECT) # project is same as associated item - self.institution = self.associated_item_attributes.get(self.INSTITUTION) # institution is same as associated item - self.associated_item_id = self.associated_item_attributes.get(self.UUID) # get uuid of associated item - self.meta_workflow_handler_id = self.meta_workflow_handler_json.get(self.UUID) # get uuid of the template mwf handler - self.meta_workflow_run_handler_uuid = str(uuid.uuid4()) #TODO: put exception to catch duplicates? i think the portal handles this + # Using associated item and associated MetaWorkflow Handler fields acquired, + # define some basic attrs for the Run Handler: project, institution, associated_item, meta_workflow_handler + self.project = self.associated_item_dict.get(PROJECT) # project is same as associated item + self.institution = self.associated_item_dict.get(INSTITUTION) # institution is same as associated item + self.associated_item_id = self.associated_item_dict.get(UUID) # get uuid of associated_item + self.meta_workflow_handler_id = self.meta_workflow_handler_dict.get(UUID) # get uuid of the template mwf handler - # and now create the actual MetaWorkflow Run Handler - # this returns the dict itself, not just an ID - # this attribute is later used to run the thang + self.meta_workflow_run_handler_uuid = str(uuid.uuid4()) + + # And now create the actual MetaWorkflow Run Handler using the instance vars defined above + # This returns the complete, populated MetaWorkflow Run Handler dictionary that can be POSTed self.meta_workflow_run_handler = self.create_meta_workflow_run_handler() def create_meta_workflow_run_handler(self): """ - Create MetaWorkflowRun Handler, which will later be POSTed to the CGAP portal. + Create MetaWorkflowRun Handler dictionary, which can later be POSTed to the CGAP portal. :return: MetaWorkflowRun Handler dictionary (for the portal JSON object) :rtype: dict """ - #TODO: check Doug's prior comments on title - meta_workflow_handler_title = self.meta_workflow_handler_json.get(self.TITLE) - creation_date = datetime.date.today().isoformat() - title = "MetaWorkflowRun Handler %s created %s" % ( - meta_workflow_handler_title, - creation_date - ) - + # Create basic MetaWorkflow Run Handler dictionary, using instance variables meta_workflow_run_handler = { - self.PROJECT: self.project, - self.INSTITUTION: self.institution, - self.UUID: self.meta_workflow_run_handler_uuid, - self.TITLE: title, - self.ASSOCIATED_META_WORKFLOW_HANDLER: self.meta_workflow_handler_id, - self.ASSOCIATED_ITEM: self.associated_item_id, - self.FINAL_STATUS: self.PENDING + PROJECT: self.project, + INSTITUTION: self.institution, + UUID: self.meta_workflow_run_handler_uuid, + ASSOCIATED_META_WORKFLOW_HANDLER: self.meta_workflow_handler_id, + ASSOCIATED_ITEM: self.associated_item_id, + FINAL_STATUS: PENDING } + # Create the title of the Run Handler, based on associated MetaWorkflow Handler's title + # and the timestamp at the time of creation of this class instance + meta_workflow_handler_title = self.meta_workflow_handler_dict.get(TITLE) + if meta_workflow_handler_title: + creation_date = datetime.date.today().isoformat() + title = "MetaWorkflowRun Handler %s created %s" % ( + meta_workflow_handler_title, + creation_date + ) + meta_workflow_run_handler[TITLE] = title - # now call helper function to populate and create the MetaWorkflow Runs + + # now call helper method to populate and create the meta_workflow_runs array meta_workflow_runs_array = self.create_meta_workflow_runs_array() - meta_workflow_run_handler[self.META_WORKFLOW_RUNS] = meta_workflow_runs_array + meta_workflow_run_handler[META_WORKFLOW_RUNS] = meta_workflow_runs_array #TODO: check for whether this is empty or nah? no for now # putting the burden of this error on the user - # return the completed MWFR Handler dictionary, which follows the CGAP schema + # return the completed MetaWorkflow Run Handler dictionary, which follows the CGAP schema return meta_workflow_run_handler def create_meta_workflow_runs_array(self): - # create MetaWorkflowHandler object - associated_meta_workflow_handler_object = MetaWorkflowHandler(self.meta_workflow_handler_json) - # this'll make sure all necessary attrs are present in the following run handler creation - - # then extract the ordered list of metaworkflows - #TODO: add ordered_meta_workflows to constants file - # and error catching with this call - ordered_meta_workflows = getattr(associated_meta_workflow_handler_object, "ordered_meta_workflows") + """ + Creates meta_workflow_runs array for a MetaWorkflowRun Handler dictionary. + These objects are in correct order due to topological sorting in + the MetaWorkflowHandler class, and uses the associated MetaWorkflow Handler's + ordered_meta_workflows array as a template. + + :return: array of meta_workflow_runs metadata, following CGAP schema + :rtype: list[dict] + """ + + # Create MetaWorkflowHandler object + # This ensures all necessary attrs are present in the following Run Handler creation + # and that MetaWorkflow Steps are topologically sorted + associated_meta_workflow_handler_object = MetaWorkflowHandler(self.meta_workflow_handler_dict) - ordered_meta_workflow_runs = [] # will eventually be the completed pending MWFRs array, in order - for meta_workflow_step_obj in ordered_meta_workflows: - meta_workflow_run_step_obj = {} # will become the populated MWFR step object - # mwfr attrs: meta_workflow_run - # attrs that stay the same and are passed in: name, dependencies - meta_workflow_run_step_obj[self.NAME] = meta_workflow_step_obj[self.NAME] - meta_workflow_run_step_obj[self.DEPENDENCIES] = meta_workflow_step_obj[self.DEPENDENCIES] + # Extract the ordered list of MetaWorkflows + try: + ordered_meta_workflows = getattr(associated_meta_workflow_handler_object, ORDERED_META_WORKFLOWS) + except AttributeError as attr_err: + raise MetaWorkflowRunHandlerCreationError( + "MetaWorkflow Handler does not contain ordered MetaWorkflow steps: \n%s" % str(attr_err) + ) + else: # edge case: ordered_meta_workflows is of NoneType + if ordered_meta_workflows is None: + raise MetaWorkflowRunHandlerCreationError( + "MetaWorkflow Handler 'ordered_meta_workflows' attribute is of NoneType \n%s" + ) + + + # Will eventually be the completed pending meta_workflow_runs array, in order + ordered_meta_workflow_runs = [] - # handle items_for_creation attribute - if self.ITEMS_FOR_CREATION_UUID in meta_workflow_step_obj.keys(): - meta_workflow_run_step_obj[self.ITEMS_FOR_CREATION] = meta_workflow_step_obj[self.ITEMS_FOR_CREATION_UUID] - else: # make embed requests as necessary + # Go through the ordered MetaWorkflow steps to populate basic MetaWorkflow Runs + for meta_workflow_step_obj in ordered_meta_workflows: + # will become the populated MetaWorkflowRun step object + meta_workflow_run_step_obj = {} + + # Attrs that stay the same and are passed in: name, dependencies + meta_workflow_run_step_obj[NAME] = meta_workflow_step_obj[NAME] + meta_workflow_run_step_obj[DEPENDENCIES] = meta_workflow_step_obj[DEPENDENCIES] + + ## Handle conversion of MetaWorkflow items_for_creation_(uuid/prop_trace) + ## to items_for_creation (just LinkTos) + + # if items_for_creation_uuid, just copy over + if ITEMS_FOR_CREATION_UUID in meta_workflow_step_obj.keys(): + meta_workflow_run_step_obj[ITEMS_FOR_CREATION] = meta_workflow_step_obj[ITEMS_FOR_CREATION_UUID] + # otherwise, dealing with property traces. Make necessary embed requests + # and convert property trace(s) to uuid(s) + else: items_for_creation_uuids = [] - for item_prop_trace in meta_workflow_step_obj[self.ITEMS_FOR_CREATION_PROP_TRACE]: + for item_prop_trace in meta_workflow_step_obj[ITEMS_FOR_CREATION_PROP_TRACE]: item_uuid = make_embed_request( self.associated_item_id, - [item_prop_trace], + [item_prop_trace.uuid], # TODO: will this actually work -- test manually self.auth_key, single_item=True - ) #TODO: add check + ) + if not item_uuid: + raise MetaWorkflowRunHandlerCreationError( + "Invalid property trace '%s' on item with the following ID: %s" + % (item_prop_trace, associated_item_id) + ) items_for_creation_uuids.append(item_uuid) - meta_workflow_run_step_obj[self.ITEMS_FOR_CREATION] = items_for_creation_uuids - + meta_workflow_run_step_obj[ITEMS_FOR_CREATION] = items_for_creation_uuids + # Basic dict for current MetaWorkflow Run step complete. Now append. ordered_meta_workflow_runs.append(meta_workflow_run_step_obj) return ordered_meta_workflow_runs def post_meta_workflow_run_handler(self): + """ + Posts meta_workflow_run_handler dict to CGAP portal. + + :raises: Exception when the dict cannot be POSTed. Could be due to schema incongruencies, for example. + """ try: ff_utils.post_metadata( self.meta_workflow_run_handler, diff --git a/test/test_metawfl_handler.py b/test/test_metawfl_handler.py index 5c454b8..0e35c9c 100644 --- a/test/test_metawfl_handler.py +++ b/test/test_metawfl_handler.py @@ -287,8 +287,7 @@ def test_create_ordered_meta_workflows_list(self, mwf_handler_dict, possible_reo Tests the topological sorting of MetaWorkflow steps. """ meta_workflow_handler = MetaWorkflowHandler(mwf_handler_dict) - assert getattr(meta_workflow_handler, "ordered_meta_workflows") in possible_reordered_mwf_lists - # TODO: add to constants file? + assert getattr(meta_workflow_handler, ORDERED_META_WORKFLOWS) in possible_reordered_mwf_lists @pytest.mark.parametrize( From 4908f1bb4b79dcc2eed96dc063d9e19707c2b1f2 Mon Sep 17 00:00:00 2001 From: vstevensf Date: Wed, 10 May 2023 17:04:21 -0400 Subject: [PATCH 33/38] some edits to the create run handler pytests -- need to refactor --- magma/magma_constants.py | 20 +- magma/metawfl_handler.py | 5 + magma_ff/create_metawflrun_handler.py | 76 +++-- magma_ff/metawflrun_handler.py | 2 +- test/test_create_metawflrun_handler_ff.py | 330 ++++++++++++++++++++++ 5 files changed, 392 insertions(+), 41 deletions(-) create mode 100644 test/test_create_metawflrun_handler_ff.py diff --git a/magma/magma_constants.py b/magma/magma_constants.py index 54a360d..13b4d79 100644 --- a/magma/magma_constants.py +++ b/magma/magma_constants.py @@ -34,13 +34,13 @@ STOPPED = "stopped" -# -MWFR_TO_HANDLER_STEP_STATUS_DICT = { - "pending": "pending", - "running": "running", - "completed": "completed", - "failed": "failed", - "inactive": "pending", - "stopped": "stopped", - "quality metric failed": "failed" -} +#TODO: the following is here in case dup flag is added in the future +# MWFR_TO_HANDLER_STEP_STATUS_DICT = { +# "pending": "pending", +# "running": "running", +# "completed": "completed", +# "failed": "failed", +# "inactive": "pending", +# "stopped": "stopped", +# "quality metric failed": "failed" +# } diff --git a/magma/metawfl_handler.py b/magma/metawfl_handler.py index 71c3cd0..31eec4a 100644 --- a/magma/metawfl_handler.py +++ b/magma/metawfl_handler.py @@ -117,6 +117,11 @@ def __init__(self, input_dict): # Using meta_workflows array of dicts from CGAP MetaWorkflow Handler # create dict of the form {meta_workflow_name: MetaWorkflow Step object} self._set_meta_workflows_dict() + # TODO: NOTE: nowhere in magma is there a check that meta_workflows + # is an empty list. I am putting the burden of that on the user + # would y'all like me to add a check for an empty list? or NoneType? + # right now I only catch instances where meta_workflows doesn't exist, + # and I create an empty dict # Create ordered MetaWorkflows name list based on dependencies # This ordered list is what's used to create the array of MetaWorkflow Runs in Run handler diff --git a/magma_ff/create_metawflrun_handler.py b/magma_ff/create_metawflrun_handler.py index 8b8658b..f447b12 100644 --- a/magma_ff/create_metawflrun_handler.py +++ b/magma_ff/create_metawflrun_handler.py @@ -17,7 +17,7 @@ ################################################ -# Custom Exception class(es) +# Custom Exception class ################################################ class MetaWorkflowRunHandlerCreationError(Exception): """Custom Exception when MetaWorkflow Run Handler encounters error during creation.""" @@ -52,7 +52,7 @@ class MetaWorkflowRunHandlerFromItem: # TODO: is this correct?? also, will we end up patching on assoc item?? # TODO: if so, create a schema mixin (seems unnecessary, for now) - self.META_WORKFLOW_RUN_HANDLER_ENDPOINT = "meta-workflow-run-handlers" + META_WORKFLOW_RUN_HANDLER_ENDPOINT = "meta-workflow-run-handlers" def __init__(self, associated_item_identifier, meta_workflow_handler_identifier, auth_key): """ @@ -72,47 +72,58 @@ def __init__(self, associated_item_identifier, meta_workflow_handler_identifier, self.auth_key = auth_key # Acquire associated item fields needed to create the Run Handler - self.associated_item_dict = make_embed_request( + self.associated_item_dict = self._retrieve_associated_item_dict(associated_item_identifier) + + # Acquired fields from associated MetaWorkflow Handler needed to create the Run Handler + # TODO: a check to make sure it is indeed of mwf handler type? does this function exist on ff_utils? + # same for above associated item request + self.meta_workflow_handler_dict = self._retrieve_meta_workflow_handler_dict(meta_workflow_handler_identifier) + + + # TODO: use @ property, cached property for the two embeds above + # static methods? + # Using associated item and associated MetaWorkflow Handler fields acquired, + # define some basic attrs for the Run Handler: project, institution, associated_item, meta_workflow_handler + self.project = self.associated_item_dict.get(PROJECT) # project is same as associated item + self.institution = self.associated_item_dict.get(INSTITUTION) # institution is same as associated item + self.associated_item_id = self.associated_item_dict.get(UUID) # get uuid of associated_item + self.meta_workflow_handler_id = self.meta_workflow_handler_dict.get(UUID) # get uuid of the template mwf handler + + self.meta_workflow_run_handler_uuid = str(uuid.uuid4()) + + # And now create the actual MetaWorkflow Run Handler using the instance vars defined above + # This returns the complete, populated MetaWorkflow Run Handler dictionary that can be POSTed + self.meta_workflow_run_handler = self._create_meta_workflow_run_handler() + + def _retrieve_associated_item_dict(self, associated_item_identifier): + associated_item = make_embed_request( associated_item_identifier, self.ASSOCIATED_ITEM_FIELDS, self.auth_key, single_item=True ) - if not self.associated_item_dict: + if not associated_item: raise MetaWorkflowRunHandlerCreationError( "No Item found for given 'associated_item' identifier: %s" % associated_item_identifier ) + return associated_item - # Acquired fields from associated MetaWorkflow Handler needed to create the Run Handler - # TODO: a check to make sure it is indeed of mwf handler type? does this function exist on ff_utils? - # same for above associated item request - self.meta_workflow_handler_dict = make_embed_request( + def _retrieve_meta_workflow_handler_dict(self, meta_workflow_handler_identifier): + meta_workflow_handler = make_embed_request( meta_workflow_handler_identifier, self.META_WORKFLOW_HANDLER_FIELDS, self.auth_key, single_item=True ) - if not self.meta_workflow_handler_dict: + if meta_workflow_handler: raise MetaWorkflowRunHandlerCreationError( "No MetaWorkflow Handler found for given 'meta_workflow_handler' identifier: %s" % meta_workflow_handler_identifier - ) - - # Using associated item and associated MetaWorkflow Handler fields acquired, - # define some basic attrs for the Run Handler: project, institution, associated_item, meta_workflow_handler - self.project = self.associated_item_dict.get(PROJECT) # project is same as associated item - self.institution = self.associated_item_dict.get(INSTITUTION) # institution is same as associated item - self.associated_item_id = self.associated_item_dict.get(UUID) # get uuid of associated_item - self.meta_workflow_handler_id = self.meta_workflow_handler_dict.get(UUID) # get uuid of the template mwf handler - - self.meta_workflow_run_handler_uuid = str(uuid.uuid4()) - - # And now create the actual MetaWorkflow Run Handler using the instance vars defined above - # This returns the complete, populated MetaWorkflow Run Handler dictionary that can be POSTed - self.meta_workflow_run_handler = self.create_meta_workflow_run_handler() + ) + return meta_workflow_handler - def create_meta_workflow_run_handler(self): + def _create_meta_workflow_run_handler(self): """ Create MetaWorkflowRun Handler dictionary, which can later be POSTed to the CGAP portal. @@ -141,17 +152,18 @@ def create_meta_workflow_run_handler(self): meta_workflow_run_handler[TITLE] = title - # now call helper method to populate and create the meta_workflow_runs array - meta_workflow_runs_array = self.create_meta_workflow_runs_array() + # now call helper method to create and populate the meta_workflow_runs array + meta_workflow_runs_array = self._create_meta_workflow_runs_array(self.meta_workflow_handler_dict) meta_workflow_run_handler[META_WORKFLOW_RUNS] = meta_workflow_runs_array - #TODO: check for whether this is empty or nah? no for now + #TODO: check for whether this is empty or nah? I'm not for now # putting the burden of this error on the user + # see my note in magma/metawfl_handler.py regarding this # return the completed MetaWorkflow Run Handler dictionary, which follows the CGAP schema return meta_workflow_run_handler - def create_meta_workflow_runs_array(self): + def _create_meta_workflow_runs_array(self, meta_workflow_handler_dict): """ Creates meta_workflow_runs array for a MetaWorkflowRun Handler dictionary. These objects are in correct order due to topological sorting in @@ -165,7 +177,7 @@ def create_meta_workflow_runs_array(self): # Create MetaWorkflowHandler object # This ensures all necessary attrs are present in the following Run Handler creation # and that MetaWorkflow Steps are topologically sorted - associated_meta_workflow_handler_object = MetaWorkflowHandler(self.meta_workflow_handler_dict) + associated_meta_workflow_handler_object = MetaWorkflowHandler(meta_workflow_handler_dict) # Extract the ordered list of MetaWorkflows @@ -240,4 +252,8 @@ def post_meta_workflow_run_handler(self): except Exception as error_msg: raise MetaWorkflowRunHandlerCreationError( "MetaWorkflowRunHandler not POSTed: \n%s" % str(error_msg) - ) \ No newline at end of file + ) + + + # TODO: PATCH associated item's meta_workflow_runs array? + # I've chosen to do this in the running function instead \ No newline at end of file diff --git a/magma_ff/metawflrun_handler.py b/magma_ff/metawflrun_handler.py index dbfe4da..f4e739f 100644 --- a/magma_ff/metawflrun_handler.py +++ b/magma_ff/metawflrun_handler.py @@ -5,7 +5,7 @@ ################################################ # magma -from magma.metawfl_handler import MetaWorkflowRunHandler as MetaWorkflowRunHandlerFromMagma +from magma.metawflrun_handler import MetaWorkflowRunHandler as MetaWorkflowRunHandlerFromMagma # from magma import metawfl #TODO: do this in FF # from magma_ff.utils import make_embed_request #check_status, chunk_ids diff --git a/test/test_create_metawflrun_handler_ff.py b/test/test_create_metawflrun_handler_ff.py new file mode 100644 index 0000000..68482c9 --- /dev/null +++ b/test/test_create_metawflrun_handler_ff.py @@ -0,0 +1,330 @@ +import mock +import pytest +from contextlib import contextmanager +from test.utils import patch_context +from typing import Iterator # thx Doug mwehehe + +import datetime + +# import json + +from magma.magma_constants import * +from magma_ff.utils import JsonObject +import magma_ff.create_metawflrun_handler as create_metaworkflow_run_handler_module +from magma_ff.create_metawflrun_handler import ( + MetaWorkflowRunHandlerFromItem, + MetaWorkflowRunHandlerCreationError, +) + + +@contextmanager +def patch_post_metadata(**kwargs) -> Iterator[mock.MagicMock]: + with patch_context( + create_metaworkflow_run_handler_module.ff_utils, "post_metadata", **kwargs + ) as mock_item: + yield mock_item + + +@contextmanager +def patch_make_embed_request(**kwargs) -> Iterator[mock.MagicMock]: + with patch_context( + create_metaworkflow_run_handler_module, "make_embed_request", **kwargs + ) as mock_item: + yield mock_item + + +@contextmanager +def patch_retrieve_associated_item_dict(**kwargs) -> Iterator[mock.MagicMock]: + with patch_context( + create_metaworkflow_run_handler_module.MetaWorkflowRunHandlerFromItem, + "_retrieve_associated_item_dict", + **kwargs + ) as mock_item: + yield mock_item + + +@contextmanager +def patch_retrieve_meta_workflow_handler_dict(**kwargs) -> Iterator[mock.MagicMock]: + with patch_context( + create_metaworkflow_run_handler_module.MetaWorkflowRunHandlerFromItem, + "_retrieve_meta_workflow_handler_dict", + **kwargs + ) as mock_item: + yield mock_item + +@contextmanager +def patch_create_meta_workflow_runs_array(**kwargs) -> Iterator[mock.MagicMock]: + with patch_context( + create_metaworkflow_run_handler_module.MetaWorkflowRunHandlerFromItem, + "_create_meta_workflow_runs_array", + **kwargs + ) as mock_item: + yield mock_item + + +@contextmanager +def patch_generate_uuid4(**kwargs) -> Iterator[mock.MagicMock]: + with patch_context( + create_metaworkflow_run_handler_module.uuid, "uuid4", **kwargs + ) as mock_item: + yield mock_item + + +@contextmanager +def patch_create_meta_workflow_run_handler_dict(**kwargs) -> Iterator[mock.MagicMock]: + with patch_context( + create_metaworkflow_run_handler_module.MetaWorkflowRunHandlerFromItem, + "_create_meta_workflow_run_handler", + **kwargs + ) as mock_item: + yield mock_item + + +TODAY = datetime.date.today().isoformat() +TESTER_PROJECT = "project_tester" +TESTER_INSTITUTION = "institution_tester" +TESTER_TITLE = "title_tester" + +ASSOCIATED_ITEM_UUID = "associated_item_tester_uuid" +ASSOCIATED_ITEM_SIMPLE_DICT = { + UUID: ASSOCIATED_ITEM_UUID, + PROJECT: TESTER_PROJECT, + INSTITUTION: TESTER_INSTITUTION, + # META_WORKFLOW_RUNS: [] # in the case that we wanna add dup flag back in future development + # TODO: and patching this array? should be handled in run mwfr handler +} + +META_WORKFLOW_HANDLER_UUID = "meta_workflow_handler_tester_uuid" +META_WORKFLOW_HANDLER_SIMPLE_DICT = { + UUID: META_WORKFLOW_HANDLER_UUID, + TITLE: TESTER_TITLE, + META_WORKFLOWS: [], #TODO: check my long note in magma/metawfl_handler.py +} + +META_WORKFLOW_RUN_HANDLER_UUID = "meta_workflow_run_handler_tester_uuid" +AUTH_KEY = {"key": "foo"} +META_WORKFLOW_RUN_HANDLER_SIMPLE_DICT = { + UUID: META_WORKFLOW_RUN_HANDLER_UUID, + PROJECT: TESTER_PROJECT, + INSTITUTION: TESTER_INSTITUTION, + "auth_key": AUTH_KEY, + ASSOCIATED_META_WORKFLOW_HANDLER: META_WORKFLOW_HANDLER_UUID, + ASSOCIATED_ITEM: ASSOCIATED_ITEM_UUID, + FINAL_STATUS: PENDING, + META_WORKFLOW_RUNS: [], #TODO: is this correct +} + +META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE = { + "auth_key": AUTH_KEY, + "associated_item_dict": ASSOCIATED_ITEM_SIMPLE_DICT, + "meta_workflow_handler_dict": META_WORKFLOW_HANDLER_SIMPLE_DICT, + PROJECT: TESTER_PROJECT, + INSTITUTION: TESTER_INSTITUTION, + "associated_item_id": ASSOCIATED_ITEM_UUID, + "meta_workflow_handler_id": META_WORKFLOW_HANDLER_UUID, + "meta_workflow_run_handler_uuid": META_WORKFLOW_RUN_HANDLER_UUID, + "meta_workflow_run_handler": META_WORKFLOW_RUN_HANDLER_SIMPLE_DICT, +} + + +@pytest.fixture +def meta_workflow_run_handler_from_item( + assoc_item_dict_embed, mwf_handler_dict_embed, mwfr_handler_instance +): + """ + Class for testing creation of MetaWorkflowRunHandlerFromItem, + with portal requests & imported library calls mocked. + """ + # import pdb; pdb.set_trace() + with patch_retrieve_associated_item_dict( + return_value=assoc_item_dict_embed + ) as mock_embed_associated_item: + with patch_retrieve_meta_workflow_handler_dict( + return_value=mwf_handler_dict_embed + ) as mock_embed_meta_workflow_handler: + with patch_generate_uuid4( + return_value=META_WORKFLOW_RUN_HANDLER_UUID + ) as mock_generate_run_handler_uuid4: + with patch_create_meta_workflow_run_handler_dict( + return_value=mwfr_handler_instance + ) as mock_generate_run_handler_dict: + # import pdb; pdb.set_trace() + return MetaWorkflowRunHandlerFromItem( + ASSOCIATED_ITEM_UUID, META_WORKFLOW_HANDLER_UUID, AUTH_KEY + ) + + + +class TestMetaWorkflowRunHandlerFromItem: + @pytest.mark.parametrize( + "attribute, expected_value, assoc_item_dict_embed, mwf_handler_dict_embed, mwfr_handler_instance", + [ + ( + "auth_key", + AUTH_KEY, + ASSOCIATED_ITEM_SIMPLE_DICT, + META_WORKFLOW_HANDLER_SIMPLE_DICT, + META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE, + ), + ( + "associated_item_dict", + ASSOCIATED_ITEM_SIMPLE_DICT, + ASSOCIATED_ITEM_SIMPLE_DICT, + META_WORKFLOW_HANDLER_SIMPLE_DICT, + META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE, + ), # + ( + "associated_item_id", + ASSOCIATED_ITEM_UUID, + ASSOCIATED_ITEM_SIMPLE_DICT, + META_WORKFLOW_HANDLER_SIMPLE_DICT, + META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE, + ), # + ( + "project", + TESTER_PROJECT, + ASSOCIATED_ITEM_SIMPLE_DICT, + META_WORKFLOW_HANDLER_SIMPLE_DICT, + META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE, + ), # + ( + "institution", + TESTER_INSTITUTION, + ASSOCIATED_ITEM_SIMPLE_DICT, + META_WORKFLOW_HANDLER_SIMPLE_DICT, + META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE, + ), # + ( + "meta_workflow_handler_dict", + META_WORKFLOW_HANDLER_SIMPLE_DICT, + ASSOCIATED_ITEM_SIMPLE_DICT, + META_WORKFLOW_HANDLER_SIMPLE_DICT, + META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE, + ), + ( + "meta_workflow_handler_id", + META_WORKFLOW_HANDLER_UUID, + ASSOCIATED_ITEM_SIMPLE_DICT, + META_WORKFLOW_HANDLER_SIMPLE_DICT, + META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE, + ), + ( + "meta_workflow_run_handler_uuid", + META_WORKFLOW_RUN_HANDLER_UUID, + ASSOCIATED_ITEM_SIMPLE_DICT, + META_WORKFLOW_HANDLER_SIMPLE_DICT, + META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE, + ), + ( + "meta_workflow_run_handler", + META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE, + ASSOCIATED_ITEM_SIMPLE_DICT, + META_WORKFLOW_HANDLER_SIMPLE_DICT, + META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE, + ), + ], + ) + def test_instance_attributes( + self, attribute, expected_value, meta_workflow_run_handler_from_item + ): + """Test that instance attributes are set correctly.""" + result = getattr(meta_workflow_run_handler_from_item, attribute) + assert result == expected_value + # TODO: add checks of inputting wrong identifiers for embed requests + # do this in separate pytest. but put a couple here (integrated) for good measure + + + @pytest.mark.parametrize( + "assoc_item_dict_embed, mwf_handler_dict_embed, mwfr_handler_instance", + [ + ( + ASSOCIATED_ITEM_SIMPLE_DICT, + META_WORKFLOW_HANDLER_SIMPLE_DICT, + META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE, + ), + ], + ) + def test_create_meta_workflow_runs_array( + self, meta_workflow_run_handler_from_item + ): + """ + Tests invocation of topological sort within MetaWorkflowHandler object, + and the conversion of the sorted MetaWorkflow Steps in MetaWorkflow Handler + to MetaWorkflow Run Steps in the Run Handler. + """ + # result = getattr(meta_workflow_run_handler_from_item, "meta_workflow_run_handler") + # print(result) + # assert result == result + with patch_create_meta_workflow_runs_array( + + ) as mock_create_meta_workflow_runs_array: + with patch_make_embed_request + + +# @pytest.mark.parametrize( +# "meta_workflow_run,error,expected", +# [ +# (META_WORKFLOW_RUN_NO_FILES_INPUT, True, None), +# (META_WORKFLOW_RUN_NO_WORKFLOW_RUNS, False, META_WORKFLOW_RUN), +# ], +# ) +# def test_create_workflow_runs( +# self, +# meta_workflow_run, +# error, +# expected, +# meta_workflow_run_from_item, +# ): +# """Test creation of workflow runs from given MetaWorkflowRun +# properties. +# """ +# if error: +# with pytest.raises(MetaWorkflowRunCreationError): +# meta_workflow_run_from_item.create_workflow_runs(meta_workflow_run) +# else: +# meta_workflow_run_from_item.create_workflow_runs(meta_workflow_run) +# assert meta_workflow_run == expected + +# @pytest.mark.parametrize( +# "return_value,exception,expected", +# [ +# ({"foo": "bar"}, True, None), +# ({"foo": "bar"}, False, {"foo": "bar"}), +# ], +# ) +# def test_get_item_properties( +# self, meta_workflow_run_from_item, return_value, exception, expected +# ): +# """Test item GET from portal.""" +# side_effect = None +# if exception: +# side_effect = Exception +# with mock.patch( +# "magma_ff.create_metawfr.ff_utils.get_metadata", +# return_value=return_value, +# side_effect=side_effect, +# ) as mock_get_metadata: +# result = meta_workflow_run_from_item.get_item_properties("foo") +# assert result == expected +# mock_get_metadata.assert_called_once_with( +# "foo", key=AUTH_KEY, add_on="frame=raw" +# ) + +# @pytest.mark.parametrize("exception", [True, False]) +# def test_post_meta_workflow_item(self, meta_workflow_run_from_item, exception): +# """Test MWFR POST to portal.""" +# side_effect = None +# if exception: +# side_effect = Exception +# with mock.patch( +# "magma_ff.create_metawfr.ff_utils.post_metadata", +# side_effect=side_effect, +# ) as mock_post_metadata: +# if exception: +# with pytest.raises(MetaWorkflowRunCreationError): +# meta_workflow_run_from_item.post_meta_workflow_run() +# else: +# meta_workflow_run_from_item.post_meta_workflow_run() +# mock_post_metadata.assert_called_once_with( +# {}, MetaWorkflowRunFromItem.META_WORKFLOW_RUN_ENDPOINT, key=AUTH_KEY +# ) From c68412113cc9c4bf8ff8d2643221b0b658d5b3f8 Mon Sep 17 00:00:00 2001 From: vstevensf Date: Tue, 16 May 2023 01:53:47 -0400 Subject: [PATCH 34/38] Finalized rough draft of pytests for create mwfr handler functionality --- magma_ff/create_metawflrun_handler.py | 375 +++++++++------ test/meta_workflow_handler_constants.py | 152 ++++++ test/test_create_metawflrun_handler_ff.py | 534 +++++++++++++--------- 3 files changed, 714 insertions(+), 347 deletions(-) create mode 100644 test/meta_workflow_handler_constants.py diff --git a/magma_ff/create_metawflrun_handler.py b/magma_ff/create_metawflrun_handler.py index f447b12..83a3ded 100644 --- a/magma_ff/create_metawflrun_handler.py +++ b/magma_ff/create_metawflrun_handler.py @@ -3,16 +3,18 @@ ################################################ # Libraries ################################################ -import datetime +from datetime import date import json import uuid from dcicutils import ff_utils +from functools import cached_property # magma from magma_ff.metawfl_handler import MetaWorkflowHandler -from magma_ff.metawflrun_handler import MetaWorkflowRunHandler -from magma_ff.utils import make_embed_request + +# from magma_ff.metawflrun_handler import MetaWorkflowRunHandler +from magma_ff.utils import make_embed_request, JsonObject from magma.magma_constants import * @@ -21,8 +23,10 @@ ################################################ class MetaWorkflowRunHandlerCreationError(Exception): """Custom Exception when MetaWorkflow Run Handler encounters error during creation.""" + pass + ################################################ # MetaWorkflow Run Handler from Item ################################################ @@ -38,25 +42,27 @@ class MetaWorkflowRunHandlerFromItem: "institution", "uuid", # "meta_workflow_runs.uuid", - # "meta_workflow_runs.meta_workflow", + # "meta_workflow_runs.meta_workflow", # "meta_workflow_runs.final_status" # TODO: these last three are for the case of reintegrating duplication flag ] META_WORKFLOW_HANDLER_FIELDS = [ "uuid", - "title", + "title", # TODO: test when no title present "meta_workflows", - "meta_workflows.*" + "meta_workflows.*", ] # TODO: is this correct?? also, will we end up patching on assoc item?? # TODO: if so, create a schema mixin (seems unnecessary, for now) META_WORKFLOW_RUN_HANDLER_ENDPOINT = "meta-workflow-run-handlers" - def __init__(self, associated_item_identifier, meta_workflow_handler_identifier, auth_key): + def __init__( + self, associated_item_identifier, meta_workflow_handler_identifier, auth_key + ): """ - Initialize the MWF Run Handler object, set all attributes. + Initialize the MWF Run Handler object, set basic attributes. :param associated_item_identifier: Item identifier (UUID, @id, or accession) on which this MetaWorkflow Run Handler is being created @@ -66,64 +72,25 @@ def __init__(self, associated_item_identifier, meta_workflow_handler_identifier, :type meta_workflow_handler_identifier: str :param auth_key: Portal authorization key :type auth_key: dict - :raises MetaWorkflowRunHandlerCreationError: If required item (associated_item) cannot - be found on environment of authorization key """ - self.auth_key = auth_key - - # Acquire associated item fields needed to create the Run Handler - self.associated_item_dict = self._retrieve_associated_item_dict(associated_item_identifier) - - # Acquired fields from associated MetaWorkflow Handler needed to create the Run Handler - # TODO: a check to make sure it is indeed of mwf handler type? does this function exist on ff_utils? - # same for above associated item request - self.meta_workflow_handler_dict = self._retrieve_meta_workflow_handler_dict(meta_workflow_handler_identifier) - - - # TODO: use @ property, cached property for the two embeds above - # static methods? - # Using associated item and associated MetaWorkflow Handler fields acquired, - # define some basic attrs for the Run Handler: project, institution, associated_item, meta_workflow_handler - self.project = self.associated_item_dict.get(PROJECT) # project is same as associated item - self.institution = self.associated_item_dict.get(INSTITUTION) # institution is same as associated item - self.associated_item_id = self.associated_item_dict.get(UUID) # get uuid of associated_item - self.meta_workflow_handler_id = self.meta_workflow_handler_dict.get(UUID) # get uuid of the template mwf handler - - self.meta_workflow_run_handler_uuid = str(uuid.uuid4()) - - # And now create the actual MetaWorkflow Run Handler using the instance vars defined above - # This returns the complete, populated MetaWorkflow Run Handler dictionary that can be POSTed - self.meta_workflow_run_handler = self._create_meta_workflow_run_handler() - - def _retrieve_associated_item_dict(self, associated_item_identifier): - associated_item = make_embed_request( - associated_item_identifier, - self.ASSOCIATED_ITEM_FIELDS, - self.auth_key, - single_item=True - ) - if not associated_item: + if associated_item_identifier is None: raise MetaWorkflowRunHandlerCreationError( - "No Item found for given 'associated_item' identifier: %s" % associated_item_identifier - ) - return associated_item - - def _retrieve_meta_workflow_handler_dict(self, meta_workflow_handler_identifier): - meta_workflow_handler = make_embed_request( - meta_workflow_handler_identifier, - self.META_WORKFLOW_HANDLER_FIELDS, - self.auth_key, - single_item=True - ) - if meta_workflow_handler: + f"Invalid argument: 'associated_item_identifier' is {str(associated_item_identifier)}" + ) + if meta_workflow_handler_identifier is None: raise MetaWorkflowRunHandlerCreationError( - "No MetaWorkflow Handler found for given 'meta_workflow_handler' identifier: %s" - % meta_workflow_handler_identifier - ) - return meta_workflow_handler + f"Invalid argument: 'meta_workflow_handler_identifier' is {str(meta_workflow_handler_identifier)}" + ) + if auth_key is None: + raise MetaWorkflowRunHandlerCreationError( + f"Invalid argument: 'auth_key' is {str(auth_key)}" + ) + self.auth_key = auth_key + self.associated_item_identifier = associated_item_identifier + self.meta_workflow_handler_identifier = meta_workflow_handler_identifier - def _create_meta_workflow_run_handler(self): + def create_meta_workflow_run_handler(self): """ Create MetaWorkflowRun Handler dictionary, which can later be POSTed to the CGAP portal. @@ -133,109 +100,124 @@ def _create_meta_workflow_run_handler(self): # Create basic MetaWorkflow Run Handler dictionary, using instance variables meta_workflow_run_handler = { - PROJECT: self.project, - INSTITUTION: self.institution, - UUID: self.meta_workflow_run_handler_uuid, - ASSOCIATED_META_WORKFLOW_HANDLER: self.meta_workflow_handler_id, - ASSOCIATED_ITEM: self.associated_item_id, - FINAL_STATUS: PENDING + PROJECT: self.get_project, + INSTITUTION: self.get_institution, + UUID: str(uuid.uuid4()), + ASSOCIATED_META_WORKFLOW_HANDLER: self.meta_workflow_handler_identifier, + ASSOCIATED_ITEM: self.associated_item_identifier, + FINAL_STATUS: PENDING, } # Create the title of the Run Handler, based on associated MetaWorkflow Handler's title # and the timestamp at the time of creation of this class instance - meta_workflow_handler_title = self.meta_workflow_handler_dict.get(TITLE) + meta_workflow_handler_title = self.retrieved_meta_workflow_handler.get(TITLE) if meta_workflow_handler_title: - creation_date = datetime.date.today().isoformat() - title = "MetaWorkflowRun Handler %s created %s" % ( - meta_workflow_handler_title, - creation_date - ) + creation_date = date.today() + # creation_date = datetime.date.today().isoformat() + title = f"MetaWorkflowRun Handler {meta_workflow_handler_title} created {creation_date.isoformat()}" meta_workflow_run_handler[TITLE] = title - # now call helper method to create and populate the meta_workflow_runs array - meta_workflow_runs_array = self._create_meta_workflow_runs_array(self.meta_workflow_handler_dict) - - meta_workflow_run_handler[META_WORKFLOW_RUNS] = meta_workflow_runs_array - #TODO: check for whether this is empty or nah? I'm not for now + meta_workflow_run_handler[ + META_WORKFLOW_RUNS + ] = self._create_meta_workflow_runs_array() + # TODO: check for whether this is empty or nah? I'm not for now # putting the burden of this error on the user # see my note in magma/metawfl_handler.py regarding this # return the completed MetaWorkflow Run Handler dictionary, which follows the CGAP schema + self.meta_workflow_run_handler = meta_workflow_run_handler return meta_workflow_run_handler - def _create_meta_workflow_runs_array(self, meta_workflow_handler_dict): + def _create_meta_workflow_runs_array(self): """ Creates meta_workflow_runs array for a MetaWorkflowRun Handler dictionary. These objects are in correct order due to topological sorting in the MetaWorkflowHandler class, and uses the associated MetaWorkflow Handler's ordered_meta_workflows array as a template. - :return: array of meta_workflow_runs metadata, following CGAP schema + :return: array of meta_workflow_runs metadata, following Run Handler CGAP schema :rtype: list[dict] """ - # Create MetaWorkflowHandler object - # This ensures all necessary attrs are present in the following Run Handler creation - # and that MetaWorkflow Steps are topologically sorted - associated_meta_workflow_handler_object = MetaWorkflowHandler(meta_workflow_handler_dict) - - - # Extract the ordered list of MetaWorkflows - try: - ordered_meta_workflows = getattr(associated_meta_workflow_handler_object, ORDERED_META_WORKFLOWS) - except AttributeError as attr_err: - raise MetaWorkflowRunHandlerCreationError( - "MetaWorkflow Handler does not contain ordered MetaWorkflow steps: \n%s" % str(attr_err) - ) - else: # edge case: ordered_meta_workflows is of NoneType - if ordered_meta_workflows is None: - raise MetaWorkflowRunHandlerCreationError( - "MetaWorkflow Handler 'ordered_meta_workflows' attribute is of NoneType \n%s" - ) - - - # Will eventually be the completed pending meta_workflow_runs array, in order - ordered_meta_workflow_runs = [] + # Will eventually be the meta_workflow_runs array, with the runs in order + ordered_meta_workflow_runs = [] # Go through the ordered MetaWorkflow steps to populate basic MetaWorkflow Runs - for meta_workflow_step_obj in ordered_meta_workflows: + for meta_workflow_step_name in self.ordered_meta_workflow_names: + + # self.meta_workflow_steps is a dict of step dicts, keys are step names + meta_workflow_step = self.meta_workflow_steps[meta_workflow_step_name] + # will become the populated MetaWorkflowRun step object - meta_workflow_run_step_obj = {} - - # Attrs that stay the same and are passed in: name, dependencies - meta_workflow_run_step_obj[NAME] = meta_workflow_step_obj[NAME] - meta_workflow_run_step_obj[DEPENDENCIES] = meta_workflow_step_obj[DEPENDENCIES] - - ## Handle conversion of MetaWorkflow items_for_creation_(uuid/prop_trace) - ## to items_for_creation (just LinkTos) - - # if items_for_creation_uuid, just copy over - if ITEMS_FOR_CREATION_UUID in meta_workflow_step_obj.keys(): - meta_workflow_run_step_obj[ITEMS_FOR_CREATION] = meta_workflow_step_obj[ITEMS_FOR_CREATION_UUID] - # otherwise, dealing with property traces. Make necessary embed requests - # and convert property trace(s) to uuid(s) - else: - items_for_creation_uuids = [] - for item_prop_trace in meta_workflow_step_obj[ITEMS_FOR_CREATION_PROP_TRACE]: - item_uuid = make_embed_request( - self.associated_item_id, - [item_prop_trace.uuid], # TODO: will this actually work -- test manually - self.auth_key, - single_item=True - ) - if not item_uuid: - raise MetaWorkflowRunHandlerCreationError( - "Invalid property trace '%s' on item with the following ID: %s" - % (item_prop_trace, associated_item_id) - ) - items_for_creation_uuids.append(item_uuid) - meta_workflow_run_step_obj[ITEMS_FOR_CREATION] = items_for_creation_uuids + meta_workflow_run_step = {} + + # Attrs that stay the same: name, dependencies + meta_workflow_run_step[NAME] = getattr(meta_workflow_step, NAME) + meta_workflow_run_step[DEPENDENCIES] = getattr(meta_workflow_step, DEPENDENCIES) + + # Handle conversion of MetaWorkflow items_for_creation_(uuid/prop_trace) + # to MetaWorkflow Run items_for_creation with embed requests + meta_workflow_run_step[ITEMS_FOR_CREATION] = self._embed_items_for_creation( + meta_workflow_step + ) # Basic dict for current MetaWorkflow Run step complete. Now append. - ordered_meta_workflow_runs.append(meta_workflow_run_step_obj) + ordered_meta_workflow_runs.append(meta_workflow_run_step) return ordered_meta_workflow_runs + def _embed_items_for_creation(self, meta_workflow_step): + """ + From a MetaWorkflow Step, extract the items_for_creation attribute, which + may be uuids or property traces (in relation to the associated item). + + If uuids, return this list of uuids. + If property traces, use embed requests to convert to identifiers. + + :param meta_workflow_step: object containing a MetaWorkflow Step's metadata + :type meta_workflow_step: dict + :return: list of items_for_creation identifiers + :rtype: list[str] + :raises MetaWorkflowRunHandlerCreationError: if a property trace cannot be embedded + """ + # if items_for_creation_uuid, just copy over + # if ITEMS_FOR_CREATION_UUID in meta_workflow_step.keys(): + if getattr(meta_workflow_step, ITEMS_FOR_CREATION_UUID, None): + return getattr(meta_workflow_step, ITEMS_FOR_CREATION_UUID) + # otherwise, dealing with property traces. Make necessary embed requests + # and convert property trace(s) to uuid(s) + else: + property_traces = getattr(meta_workflow_step, ITEMS_FOR_CREATION_PROP_TRACE, None) + if not isinstance(property_traces, list): + item_uuid = make_embed_request( + self.associated_item_identifier, + property_traces + + ".uuid", # TODO: are we assuming the user will include ".uuid" or @id as part of prop trace? + self.auth_key, + single_item=True, + ) + if not item_uuid: + raise MetaWorkflowRunHandlerCreationError( + f"Invalid property trace '{property_traces}' on item with the following ID: {self.associated_item_identifier}" + ) + return item_uuid + + + items_for_creation_uuids = [] + for item_prop_trace in property_traces: + item_uuid = make_embed_request( + self.associated_item_identifier, + item_prop_trace + + ".uuid", # TODO: are we assuming the user will include ".uuid" or @id as part of prop trace? + self.auth_key, + single_item=True, + ) + if not item_uuid: + raise MetaWorkflowRunHandlerCreationError( + f"Invalid property trace '{item_prop_trace}' on item with the following ID: {self.associated_item_identifier}" + ) + items_for_creation_uuids.append(item_uuid) + return items_for_creation_uuids def post_meta_workflow_run_handler(self): """ @@ -245,15 +227,132 @@ def post_meta_workflow_run_handler(self): """ try: ff_utils.post_metadata( - self.meta_workflow_run_handler, + self.meta_workflow_run_handler, #TODO: add check to see if this exists? self.META_WORKFLOW_RUN_HANDLER_ENDPOINT, key=self.auth_key, ) except Exception as error_msg: raise MetaWorkflowRunHandlerCreationError( - "MetaWorkflowRunHandler not POSTed: \n%s" % str(error_msg) + f"MetaWorkflowRunHandler not POSTed: \n{str(error_msg)}" + ) from error_msg + + # TODO: PATCH associated item's meta_workflow_runs array? + # I've chosen to do this in the running function instead + + @cached_property + def retrieved_associated_item(self): + """ + Acquire associated item fields needed to create the Run Handler + """ + associated_item = make_embed_request( + self.associated_item_identifier, + self.ASSOCIATED_ITEM_FIELDS, + self.auth_key, + single_item=True, + ) + if not associated_item: + raise MetaWorkflowRunHandlerCreationError( + f"No Item found for given 'associated_item' identifier: {self.associated_item_identifier}" + ) + return associated_item + + @cached_property + def retrieved_meta_workflow_handler(self): + """ + Acquire fields from associated MetaWorkflow Handler needed to create the Run Handler + """ + # TODO: a check to make sure it is indeed of mwf handler type? does this function exist on ff_utils? + # same for above associated item request + meta_workflow_handler = make_embed_request( + self.meta_workflow_handler_identifier, + self.META_WORKFLOW_HANDLER_FIELDS, + self.auth_key, + single_item=True, + ) + if meta_workflow_handler: + raise MetaWorkflowRunHandlerCreationError( + f"No MetaWorkflow Handler found for given 'meta_workflow_handler' identifier: {self.meta_workflow_handler_identifier}" ) + return meta_workflow_handler + @cached_property # made cached because topological sort can return different valid results + def meta_workflow_handler_instance(self): + """ + Creates MetaWorkflowHandler object. + This induces topological sort of steps and validation of attributes. + """ + return MetaWorkflowHandler(self.retrieved_meta_workflow_handler) - # TODO: PATCH associated item's meta_workflow_runs array? - # I've chosen to do this in the running function instead \ No newline at end of file + @property + def ordered_meta_workflow_names(self): + """ + Initializes a MetaWorkflowHandler object, which topologically sorts its + MetaWorkflow steps and contains attribute of these steps in order, + the ordered_meta_workflows array. + + :returns: ordered_meta_workflows attribute + :rtype: list[str] + """ + # Extract the ordered list of MetaWorkflow names + return getattr( + self.meta_workflow_handler_instance, ORDERED_META_WORKFLOWS + ) + + @property + def meta_workflow_steps(self): + """ + Initializes a MetaWorkflowHandler object, which topologically sorts its + MetaWorkflow steps and contains attribute of these steps in order, + the meta_workflows array. + + :returns: meta_workflows attribute + :rtype: list[dict] + """ + # Create MetaWorkflowHandler object + # This ensures all necessary attrs are present in the following Run Handler creation + # and that MetaWorkflow Steps are topologically sorted + associated_meta_workflow_handler_object = MetaWorkflowHandler( + self.retrieved_meta_workflow_handler + ) + + # Extract the ordered list of MetaWorkflow names + return getattr( + associated_meta_workflow_handler_object, META_WORKFLOWS + ) + + @property + def get_project(self): + """Retrieves project attribute from the associated item.""" + return self.retrieved_associated_item.get(PROJECT) + + @property + def get_institution(self): + """Retrieves institution attribute from the associated item.""" + return self.retrieved_associated_item.get(INSTITUTION) + +#################################################### +# Wrapper Fxn: MetaWorkflow Run Handler from Item +#################################################### +def create_meta_workflow_run_handler( + associated_item_identifier: str, + meta_workflow_handler_identifier: str, + auth_key: JsonObject, + post: bool = True, +) -> JsonObject: + """Create a MetaWorkflowRunHandler for the given associated item and MetaWorkflow Handler. + + POST MetaWorkflowRun as instructed. + + :param associated_item_identifier: Identifier (e.g. UUID, @id) for item from + which to create the MetaWorkflowRun Handler + :param meta_workflow_handler_identifier: Identifier for the MetaWorkflow Handler + from which to create the MetaWorkflowRun Handler + :param auth_key: Authorization keys for C4 account + :param post: Whether to POST the MetaWorkflowRun Handler created + :returns: MetaWorkflowRun Handler created + """ + meta_workflow_run_handler_creator = MetaWorkflowRunHandlerFromItem(associated_item_identifier, meta_workflow_handler_identifier, auth_key) + run_handler = meta_workflow_run_handler_creator.create_meta_workflow_run_handler() + if post: + meta_workflow_run_handler_creator.post_meta_workflow_run_handler() + return run_handler diff --git a/test/meta_workflow_handler_constants.py b/test/meta_workflow_handler_constants.py new file mode 100644 index 0000000..c9d0d6c --- /dev/null +++ b/test/meta_workflow_handler_constants.py @@ -0,0 +1,152 @@ +from copy import deepcopy +from magma.magma_constants import * + +MWF_HANDLER_NAME = "test_mwf_handler" +MWF_HANDLER_PROJECT = "test_project" +MWF_HANDLER_INSTITUTION = "test_institution" +MWF_HANDLER_UUID = "test_mwf_handler_uuid" + +TESTER_UUID = "uuid" + +# Basic meta_workflow steps (dicts) used in meta_workflows array +MWF_A = {"meta_workflow": "test_mwf_uuid_0", "name": "A"} +MWF_B = {"meta_workflow": "test_mwf_uuid_1", "name": "B"} +MWF_C = {"meta_workflow": "test_mwf_uuid_2", "name": "C"} +MWF_D = {"meta_workflow": "test_mwf_uuid_3", "name": "D"} + +# Dependencies +DEP_ON_A = ["A"] +DEP_ON_B = ["B"] +DEP_ON_C = ["C"] +DEP_ON_D = ["D"] + +#TODO: I never use the prop trace for tests... +def meta_workflow_with_added_attrs(meta_workflow_dict, items_for_creation_property_trace=None, items_for_creation_uuid=None, dependencies=None): + """ + Helper function used to add non-required attributes to a MetaWorkflow step input dictionary. + Returns new MetaWorkflow step dictionary with added attributes. + + :param meta_workflow_dict: dictionary containing required attributes for MetaWorkflow step ("name" and "meta_workflow"): + :type meta_workflow_dict: dic + :param items_for_creation_property_trace: property trace(s) of item(s) required to create MetaWorkflow Run from MetaWorkflow + :type items_for_creation_property_trace: str or list[str] or None + :param items_for_creation_uuid: uuid(s) of item(s) required to create MetaWorkflow Run from MetaWorkflow + :type items_for_creation_uuid: str or list[str] or None + :param dependencies: list of MetaWorkflows (names) that the current MetaWorkflow is dependent on + :type dependencies: list[str] + :return: reformatted MetaWorkflow dictionary with added attributes + """ + dict_copy = deepcopy(meta_workflow_dict) + if items_for_creation_property_trace: + dict_copy[ITEMS_FOR_CREATION_PROP_TRACE] = items_for_creation_property_trace + if items_for_creation_uuid: + dict_copy[ITEMS_FOR_CREATION_UUID] = items_for_creation_uuid + if dependencies is not None: + dict_copy[DEPENDENCIES] = dependencies + return dict_copy + + +# meta_workflows arrays for MetaWorkflow Handler +# handler without uuid -- fails validation of basic attributes +HANDLER_WITHOUT_UUID_DICT = { + NAME: MWF_HANDLER_NAME, + PROJECT: MWF_HANDLER_PROJECT, + INSTITUTION: MWF_HANDLER_INSTITUTION +} + +# handler without meta_workflows array -- passes validation, should set empty metaworkflows array +HANDLER_WITHOUT_MWF_ARRAY_DICT = { + NAME: MWF_HANDLER_NAME, + PROJECT: MWF_HANDLER_PROJECT, + INSTITUTION: MWF_HANDLER_INSTITUTION, + UUID: MWF_HANDLER_UUID +} + +# DAG_0 +# A B -----> C +MWF_A_DAG_0 = meta_workflow_with_added_attrs(MWF_A, None, TESTER_UUID, []) +MWF_B_DAG_0 = meta_workflow_with_added_attrs(MWF_B, None, TESTER_UUID, []) +MWF_B_DAG_0_W_DEP = meta_workflow_with_added_attrs(MWF_B, None, TESTER_UUID, DEP_ON_A) +MWF_C_DAG_0 = meta_workflow_with_added_attrs(MWF_C, None, TESTER_UUID, DEP_ON_B) +DAG_0_MWF_ARRAY = [MWF_B_DAG_0, MWF_A_DAG_0, MWF_C_DAG_0] # purposely in this order to test toposort +HANDLER_DAG_0 = { + NAME: MWF_HANDLER_NAME, + PROJECT: MWF_HANDLER_PROJECT, + INSTITUTION: MWF_HANDLER_INSTITUTION, + UUID: MWF_HANDLER_UUID, + META_WORKFLOWS: DAG_0_MWF_ARRAY +} +DAG_0_MWF_ARRAY_W_DUPLICATES = [MWF_B_DAG_0, MWF_A_DAG_0, MWF_C_DAG_0, MWF_B_DAG_0] +HANDLER_DAG_0_W_DUPLICATES = { + NAME: MWF_HANDLER_NAME, + PROJECT: MWF_HANDLER_PROJECT, + INSTITUTION: MWF_HANDLER_INSTITUTION, + UUID: MWF_HANDLER_UUID, + META_WORKFLOWS: DAG_0_MWF_ARRAY_W_DUPLICATES +} +DAG_0_MWF_ARRAY_W_DUPLICATES_BY_MWF_NAME = [MWF_B_DAG_0, MWF_A_DAG_0, MWF_C_DAG_0, MWF_B_DAG_0_W_DEP] +HANDLER_DAG_0_W_DUPLICATES_BY_MWF_NAME = { + NAME: MWF_HANDLER_NAME, + PROJECT: MWF_HANDLER_PROJECT, + INSTITUTION: MWF_HANDLER_INSTITUTION, + UUID: MWF_HANDLER_UUID, + META_WORKFLOWS: DAG_0_MWF_ARRAY_W_DUPLICATES_BY_MWF_NAME +} +REORDERED_MWFS_DAG_0 = [["A", "B", "C"], ["B", "A", "C"], ["B", "C", "A"]] + +# DAG_1 +# B -----> D +# | ⋀ ⋀ +# | / | +# ⋁ / | +# A <----- C +MWF_A_DAG_1 = meta_workflow_with_added_attrs(MWF_A, None, TESTER_UUID, DEP_ON_B + DEP_ON_C) +MWF_B_DAG_1 = meta_workflow_with_added_attrs(MWF_B, None, TESTER_UUID, []) +MWF_C_DAG_1 = meta_workflow_with_added_attrs(MWF_C, None, TESTER_UUID, []) +MWF_D_DAG_1 = meta_workflow_with_added_attrs(MWF_D, None, TESTER_UUID, DEP_ON_A + DEP_ON_B + DEP_ON_C) +DAG_1_MWF_ARRAY = [MWF_A_DAG_1, MWF_B_DAG_1, MWF_C_DAG_1, MWF_D_DAG_1] +HANDLER_DAG_1 = { + NAME: MWF_HANDLER_NAME, + PROJECT: MWF_HANDLER_PROJECT, + INSTITUTION: MWF_HANDLER_INSTITUTION, + UUID: MWF_HANDLER_UUID, + META_WORKFLOWS: DAG_1_MWF_ARRAY +} +REORDERED_MWFS_DAG_1 = [["B", "C", "A", "D"], ["C", "B", "A", "D"]] + +# CYCLIC_0 +# A B__ +# ⋀ \_____ +# | | +# | | +# C <----- | +MWF_A_CYCLIC_0 = meta_workflow_with_added_attrs(MWF_A, None, TESTER_UUID, []) +MWF_B_CYCLIC_0 = meta_workflow_with_added_attrs(MWF_B, None, TESTER_UUID, DEP_ON_C) +MWF_C_CYCLIC_0 = meta_workflow_with_added_attrs(MWF_C, None, TESTER_UUID, DEP_ON_B) +CYCLIC_0_MWF_ARRAY = [MWF_A_CYCLIC_0, MWF_B_CYCLIC_0, MWF_C_CYCLIC_0] +HANDLER_CYCLIC_0 = { + NAME: MWF_HANDLER_NAME, + PROJECT: MWF_HANDLER_PROJECT, + INSTITUTION: MWF_HANDLER_INSTITUTION, + UUID: MWF_HANDLER_UUID, + META_WORKFLOWS: CYCLIC_0_MWF_ARRAY +} + +# CYCLIC_1 +# A -----> B +# ⋀ | +# | | +# | ⋁ +# D <----- C +MWF_A_CYCLIC_1 = meta_workflow_with_added_attrs(MWF_A, None, TESTER_UUID, DEP_ON_D) +MWF_B_CYCLIC_1 = meta_workflow_with_added_attrs(MWF_B, None, TESTER_UUID, DEP_ON_A) +MWF_C_CYCLIC_1 = meta_workflow_with_added_attrs(MWF_C, None, TESTER_UUID, DEP_ON_B) +MWF_D_CYCLIC_1 = meta_workflow_with_added_attrs(MWF_D, None, TESTER_UUID, DEP_ON_C) +CYCLIC_1_MWF_ARRAY = [MWF_A_CYCLIC_1, MWF_B_CYCLIC_1, MWF_C_CYCLIC_1, MWF_D_CYCLIC_1] +HANDLER_CYCLIC_1 = { + NAME: MWF_HANDLER_NAME, + PROJECT: MWF_HANDLER_PROJECT, + INSTITUTION: MWF_HANDLER_INSTITUTION, + UUID: MWF_HANDLER_UUID, + META_WORKFLOWS: CYCLIC_1_MWF_ARRAY +} \ No newline at end of file diff --git a/test/test_create_metawflrun_handler_ff.py b/test/test_create_metawflrun_handler_ff.py index 68482c9..80c04c7 100644 --- a/test/test_create_metawflrun_handler_ff.py +++ b/test/test_create_metawflrun_handler_ff.py @@ -4,21 +4,24 @@ from test.utils import patch_context from typing import Iterator # thx Doug mwehehe -import datetime - -# import json - -from magma.magma_constants import * from magma_ff.utils import JsonObject +from magma.magma_constants import * import magma_ff.create_metawflrun_handler as create_metaworkflow_run_handler_module from magma_ff.create_metawflrun_handler import ( MetaWorkflowRunHandlerFromItem, MetaWorkflowRunHandlerCreationError, + create_meta_workflow_run_handler ) +from test.meta_workflow_handler_constants import * + +from magma_ff.metawfl_handler import MetaWorkflowHandler +from magma.metawfl_handler import MetaWorkflowStep + @contextmanager def patch_post_metadata(**kwargs) -> Iterator[mock.MagicMock]: + """Patch ff_utils.post_metadata call within MetaWorkflowRunHAndlerFromItem class.""" with patch_context( create_metaworkflow_run_handler_module.ff_utils, "post_metadata", **kwargs ) as mock_item: @@ -27,63 +30,76 @@ def patch_post_metadata(**kwargs) -> Iterator[mock.MagicMock]: @contextmanager def patch_make_embed_request(**kwargs) -> Iterator[mock.MagicMock]: + """Patch make_embed_request function defined in magma_ff/utils.py, + which is called within MetaWorkflowRunHandlerFromItem class.""" with patch_context( - create_metaworkflow_run_handler_module, "make_embed_request", **kwargs + create_metaworkflow_run_handler_module, + "make_embed_request", + **kwargs ) as mock_item: yield mock_item @contextmanager -def patch_retrieve_associated_item_dict(**kwargs) -> Iterator[mock.MagicMock]: +def patch_create_meta_workflow_runs_array(**kwargs) -> Iterator[mock.MagicMock]: with patch_context( create_metaworkflow_run_handler_module.MetaWorkflowRunHandlerFromItem, - "_retrieve_associated_item_dict", + "_create_meta_workflow_runs_array", **kwargs ) as mock_item: yield mock_item + @contextmanager -def patch_retrieve_meta_workflow_handler_dict(**kwargs) -> Iterator[mock.MagicMock]: +def patch_embed_items_for_creation(**kwargs) -> Iterator[mock.MagicMock]: + """Patch function that uses embed requests to convert property traces to IDs.""" with patch_context( create_metaworkflow_run_handler_module.MetaWorkflowRunHandlerFromItem, - "_retrieve_meta_workflow_handler_dict", + "_embed_items_for_creation", **kwargs ) as mock_item: yield mock_item @contextmanager -def patch_create_meta_workflow_runs_array(**kwargs) -> Iterator[mock.MagicMock]: +def patch_retrieved_meta_workflow_handler(**kwargs) -> Iterator[mock.MagicMock]: + """Patch cached property of embedded meta_workflow_handler""" with patch_context( create_metaworkflow_run_handler_module.MetaWorkflowRunHandlerFromItem, - "_create_meta_workflow_runs_array", + "retrieved_meta_workflow_handler", + new_callable=mock.PropertyMock, **kwargs ) as mock_item: yield mock_item - @contextmanager -def patch_generate_uuid4(**kwargs) -> Iterator[mock.MagicMock]: +def patch_retrieved_associated_item(**kwargs) -> Iterator[mock.MagicMock]: + """Patch cached property of embedded meta_workflow_handler""" with patch_context( - create_metaworkflow_run_handler_module.uuid, "uuid4", **kwargs + create_metaworkflow_run_handler_module.MetaWorkflowRunHandlerFromItem, + "retrieved_associated_item", + new_callable=mock.PropertyMock, + **kwargs ) as mock_item: yield mock_item @contextmanager -def patch_create_meta_workflow_run_handler_dict(**kwargs) -> Iterator[mock.MagicMock]: +def patch_generate_uuid4(**kwargs) -> Iterator[mock.MagicMock]: + """Patch generator of uuids, + which is called within MetaWorkflowRunHandlerFromItem class.""" with patch_context( - create_metaworkflow_run_handler_module.MetaWorkflowRunHandlerFromItem, - "_create_meta_workflow_run_handler", - **kwargs + create_metaworkflow_run_handler_module.uuid, "uuid4", **kwargs ) as mock_item: yield mock_item -TODAY = datetime.date.today().isoformat() +TODAY = "2023-05-12" TESTER_PROJECT = "project_tester" TESTER_INSTITUTION = "institution_tester" TESTER_TITLE = "title_tester" +TESTER_UUID = "uuid" +TESTER_PROP_TRACE = "property.trace" ASSOCIATED_ITEM_UUID = "associated_item_tester_uuid" ASSOCIATED_ITEM_SIMPLE_DICT = { @@ -95,236 +111,336 @@ def patch_create_meta_workflow_run_handler_dict(**kwargs) -> Iterator[mock.Magic } META_WORKFLOW_HANDLER_UUID = "meta_workflow_handler_tester_uuid" -META_WORKFLOW_HANDLER_SIMPLE_DICT = { - UUID: META_WORKFLOW_HANDLER_UUID, - TITLE: TESTER_TITLE, - META_WORKFLOWS: [], #TODO: check my long note in magma/metawfl_handler.py + +MWF_STEP_NO_EMBEDS = { + META_WORKFLOW: "foo", + NAME: "bar", + ITEMS_FOR_CREATION_UUID: TESTER_UUID +} +MWF_STEP_NO_EMBEDS_2 = { + META_WORKFLOW: "foo", + NAME: "bar", + ITEMS_FOR_CREATION_UUID: [TESTER_UUID] +} +MWF_STEP_EMBED_SIMPLE = { + META_WORKFLOW: "foo", + NAME: "bar", + ITEMS_FOR_CREATION_PROP_TRACE: TESTER_PROP_TRACE } +MWF_STEP_EMBED_SEVERAL = { + META_WORKFLOW: "foo", + NAME: "bar", + ITEMS_FOR_CREATION_PROP_TRACE: [TESTER_PROP_TRACE, TESTER_PROP_TRACE] +} + +# just redefining the uuids from the mwf handler dicts for consistency +# DAG_0 +# A B -----> C +HANDLER_DAG_0[UUID] = META_WORKFLOW_HANDLER_UUID +# DAG_1 +# B -----> D +# | ⋀ ⋀ +# | / | +# ⋁ / | +# A <----- C +HANDLER_DAG_1[UUID] = META_WORKFLOW_HANDLER_UUID + +# with title +HANDLER_DAG_0_W_TITLE = deepcopy(HANDLER_DAG_0) +HANDLER_DAG_0_W_TITLE[TITLE] = "DAG 0" + META_WORKFLOW_RUN_HANDLER_UUID = "meta_workflow_run_handler_tester_uuid" AUTH_KEY = {"key": "foo"} -META_WORKFLOW_RUN_HANDLER_SIMPLE_DICT = { - UUID: META_WORKFLOW_RUN_HANDLER_UUID, - PROJECT: TESTER_PROJECT, - INSTITUTION: TESTER_INSTITUTION, - "auth_key": AUTH_KEY, - ASSOCIATED_META_WORKFLOW_HANDLER: META_WORKFLOW_HANDLER_UUID, - ASSOCIATED_ITEM: ASSOCIATED_ITEM_UUID, - FINAL_STATUS: PENDING, - META_WORKFLOW_RUNS: [], #TODO: is this correct -} - -META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE = { - "auth_key": AUTH_KEY, - "associated_item_dict": ASSOCIATED_ITEM_SIMPLE_DICT, - "meta_workflow_handler_dict": META_WORKFLOW_HANDLER_SIMPLE_DICT, - PROJECT: TESTER_PROJECT, - INSTITUTION: TESTER_INSTITUTION, - "associated_item_id": ASSOCIATED_ITEM_UUID, - "meta_workflow_handler_id": META_WORKFLOW_HANDLER_UUID, - "meta_workflow_run_handler_uuid": META_WORKFLOW_RUN_HANDLER_UUID, - "meta_workflow_run_handler": META_WORKFLOW_RUN_HANDLER_SIMPLE_DICT, -} @pytest.fixture -def meta_workflow_run_handler_from_item( - assoc_item_dict_embed, mwf_handler_dict_embed, mwfr_handler_instance -): - """ - Class for testing creation of MetaWorkflowRunHandlerFromItem, - with portal requests & imported library calls mocked. - """ - # import pdb; pdb.set_trace() - with patch_retrieve_associated_item_dict( - return_value=assoc_item_dict_embed - ) as mock_embed_associated_item: - with patch_retrieve_meta_workflow_handler_dict( - return_value=mwf_handler_dict_embed - ) as mock_embed_meta_workflow_handler: - with patch_generate_uuid4( - return_value=META_WORKFLOW_RUN_HANDLER_UUID - ) as mock_generate_run_handler_uuid4: - with patch_create_meta_workflow_run_handler_dict( - return_value=mwfr_handler_instance - ) as mock_generate_run_handler_dict: - # import pdb; pdb.set_trace() - return MetaWorkflowRunHandlerFromItem( - ASSOCIATED_ITEM_UUID, META_WORKFLOW_HANDLER_UUID, AUTH_KEY - ) - +def meta_workflow_run_handler_from_item_fixture(): + """Fixture of MetaWorkflowRunHandlerFromItem instance""" + return MetaWorkflowRunHandlerFromItem(ASSOCIATED_ITEM_UUID, META_WORKFLOW_HANDLER_UUID, AUTH_KEY) class TestMetaWorkflowRunHandlerFromItem: + """Tests for methods/properties for MetaWorkflowRunHandlerFromItem class.""" + @pytest.mark.parametrize( - "attribute, expected_value, assoc_item_dict_embed, mwf_handler_dict_embed, mwfr_handler_instance", + "attribute, expected_value, assoc_item_id, mwf_handler_id, auth_key", [ ( "auth_key", AUTH_KEY, - ASSOCIATED_ITEM_SIMPLE_DICT, - META_WORKFLOW_HANDLER_SIMPLE_DICT, - META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE, + ASSOCIATED_ITEM_UUID, + META_WORKFLOW_HANDLER_UUID, + AUTH_KEY, ), ( - "associated_item_dict", - ASSOCIATED_ITEM_SIMPLE_DICT, - ASSOCIATED_ITEM_SIMPLE_DICT, - META_WORKFLOW_HANDLER_SIMPLE_DICT, - META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE, - ), # - ( - "associated_item_id", + "associated_item_identifier", ASSOCIATED_ITEM_UUID, - ASSOCIATED_ITEM_SIMPLE_DICT, - META_WORKFLOW_HANDLER_SIMPLE_DICT, - META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE, - ), # - ( - "project", - TESTER_PROJECT, - ASSOCIATED_ITEM_SIMPLE_DICT, - META_WORKFLOW_HANDLER_SIMPLE_DICT, - META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE, - ), # - ( - "institution", - TESTER_INSTITUTION, - ASSOCIATED_ITEM_SIMPLE_DICT, - META_WORKFLOW_HANDLER_SIMPLE_DICT, - META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE, - ), # + ASSOCIATED_ITEM_UUID, + META_WORKFLOW_HANDLER_UUID, + AUTH_KEY, + ), ( - "meta_workflow_handler_dict", - META_WORKFLOW_HANDLER_SIMPLE_DICT, - ASSOCIATED_ITEM_SIMPLE_DICT, - META_WORKFLOW_HANDLER_SIMPLE_DICT, - META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE, + "meta_workflow_handler_identifier", + META_WORKFLOW_HANDLER_UUID, + ASSOCIATED_ITEM_UUID, + META_WORKFLOW_HANDLER_UUID, + AUTH_KEY, ), ( - "meta_workflow_handler_id", + "auth_key", + None, + ASSOCIATED_ITEM_UUID, META_WORKFLOW_HANDLER_UUID, - ASSOCIATED_ITEM_SIMPLE_DICT, - META_WORKFLOW_HANDLER_SIMPLE_DICT, - META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE, + None, ), ( - "meta_workflow_run_handler_uuid", - META_WORKFLOW_RUN_HANDLER_UUID, - ASSOCIATED_ITEM_SIMPLE_DICT, - META_WORKFLOW_HANDLER_SIMPLE_DICT, - META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE, + "associated_item_identifier", + None, + None, + META_WORKFLOW_HANDLER_UUID, + AUTH_KEY, ), ( - "meta_workflow_run_handler", - META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE, - ASSOCIATED_ITEM_SIMPLE_DICT, - META_WORKFLOW_HANDLER_SIMPLE_DICT, - META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE, + "meta_workflow_handler_identifier", + None, + ASSOCIATED_ITEM_UUID, + None, + AUTH_KEY, ), ], ) - def test_instance_attributes( - self, attribute, expected_value, meta_workflow_run_handler_from_item + def test_init( + self, attribute, expected_value, assoc_item_id, mwf_handler_id, auth_key ): """Test that instance attributes are set correctly.""" - result = getattr(meta_workflow_run_handler_from_item, attribute) - assert result == expected_value - # TODO: add checks of inputting wrong identifiers for embed requests - # do this in separate pytest. but put a couple here (integrated) for good measure + try: + meta_workflow_run_handler_from_item = MetaWorkflowRunHandlerFromItem( + assoc_item_id, mwf_handler_id, auth_key + ) + result = getattr(meta_workflow_run_handler_from_item, attribute) + assert result == expected_value + except MetaWorkflowRunHandlerCreationError as creation_err: + assert attribute in str(creation_err) @pytest.mark.parametrize( - "assoc_item_dict_embed, mwf_handler_dict_embed, mwfr_handler_instance", + "meta_workflow_step, exception_expected, return_value, num_embed_calls", [ ( - ASSOCIATED_ITEM_SIMPLE_DICT, - META_WORKFLOW_HANDLER_SIMPLE_DICT, - META_WORKFLOW_RUN_HANDLER_SIMPLE_INSTANCE, + MetaWorkflowStep(MWF_STEP_EMBED_SIMPLE), + True, + None, + 1 + ), + ( + MetaWorkflowStep(MWF_STEP_EMBED_SEVERAL), + True, + None, + 1 + ), + ( + MetaWorkflowStep(MWF_STEP_NO_EMBEDS), + False, + TESTER_UUID, + 0 + ), + ( + MetaWorkflowStep(MWF_STEP_NO_EMBEDS_2), + False, + [TESTER_UUID], + 0 ), + ( + MetaWorkflowStep(MWF_STEP_EMBED_SIMPLE), + False, + TESTER_UUID, + 1 + ), + ( + MetaWorkflowStep(MWF_STEP_EMBED_SEVERAL), + False, + [TESTER_UUID, TESTER_UUID], + 2 + ) + ], + ) + def test_embed_items_for_creation( + self, meta_workflow_step, exception_expected, return_value, num_embed_calls, meta_workflow_run_handler_from_item_fixture + ): + """ + Tests the conversion of the items_for_creation_(uuid/prop_trace) in MetaWorkflow Steps + to items_for_creation in MetaWorkflow Run Steps in the Run Handler. + """ + with patch_make_embed_request() as mock_embed_request: + if exception_expected: + mock_embed_request.return_value = None + with pytest.raises(MetaWorkflowRunHandlerCreationError): + result = meta_workflow_run_handler_from_item_fixture._embed_items_for_creation(meta_workflow_step) + assert mock_embed_request.call_count == num_embed_calls + else: + mock_embed_request.return_value = TESTER_UUID + result = meta_workflow_run_handler_from_item_fixture._embed_items_for_creation(meta_workflow_step) + assert result == return_value + assert mock_embed_request.call_count == num_embed_calls + + @pytest.mark.parametrize( + "meta_workflow_handler, num_step_calls", + [ + (HANDLER_DAG_0, 3), + (HANDLER_DAG_1, 4) ], ) def test_create_meta_workflow_runs_array( - self, meta_workflow_run_handler_from_item + self, meta_workflow_handler, num_step_calls, meta_workflow_run_handler_from_item_fixture ): """ - Tests invocation of topological sort within MetaWorkflowHandler object, - and the conversion of the sorted MetaWorkflow Steps in MetaWorkflow Handler + Tests the conversion of the ordered MetaWorkflow Steps to MetaWorkflow Run Steps in the Run Handler. + Implicitly testing the property ordered_meta_workflows, + and cached property retrieved_meta_workflow_handler. + """ + with patch_retrieved_meta_workflow_handler(return_value=meta_workflow_handler): + with patch_embed_items_for_creation(return_value=TESTER_UUID) as mock_embed_request: + handler = meta_workflow_run_handler_from_item_fixture + result = handler._create_meta_workflow_runs_array() + + orig_ordered_mwf_names = getattr(handler.meta_workflow_handler_instance, ORDERED_META_WORKFLOWS) + orig_mwf_steps = getattr(handler.meta_workflow_handler_instance, META_WORKFLOWS) + + for idx, name in enumerate(orig_ordered_mwf_names): + assert result[idx][NAME] == name + assert result[idx][DEPENDENCIES] == getattr(orig_mwf_steps[name], DEPENDENCIES) + assert result[idx][ITEMS_FOR_CREATION] == TESTER_UUID + + assert mock_embed_request.call_count == num_step_calls + + + @pytest.mark.parametrize( + "meta_workflow_handler", + [ + (HANDLER_DAG_0), + (HANDLER_DAG_1) + ], + ) + def test_create_meta_workflow_run_handler_no_title( + self, meta_workflow_handler, meta_workflow_run_handler_from_item_fixture + ): """ - # result = getattr(meta_workflow_run_handler_from_item, "meta_workflow_run_handler") - # print(result) - # assert result == result - with patch_create_meta_workflow_runs_array( - - ) as mock_create_meta_workflow_runs_array: - with patch_make_embed_request - - -# @pytest.mark.parametrize( -# "meta_workflow_run,error,expected", -# [ -# (META_WORKFLOW_RUN_NO_FILES_INPUT, True, None), -# (META_WORKFLOW_RUN_NO_WORKFLOW_RUNS, False, META_WORKFLOW_RUN), -# ], -# ) -# def test_create_workflow_runs( -# self, -# meta_workflow_run, -# error, -# expected, -# meta_workflow_run_from_item, -# ): -# """Test creation of workflow runs from given MetaWorkflowRun -# properties. -# """ -# if error: -# with pytest.raises(MetaWorkflowRunCreationError): -# meta_workflow_run_from_item.create_workflow_runs(meta_workflow_run) -# else: -# meta_workflow_run_from_item.create_workflow_runs(meta_workflow_run) -# assert meta_workflow_run == expected - -# @pytest.mark.parametrize( -# "return_value,exception,expected", -# [ -# ({"foo": "bar"}, True, None), -# ({"foo": "bar"}, False, {"foo": "bar"}), -# ], -# ) -# def test_get_item_properties( -# self, meta_workflow_run_from_item, return_value, exception, expected -# ): -# """Test item GET from portal.""" -# side_effect = None -# if exception: -# side_effect = Exception -# with mock.patch( -# "magma_ff.create_metawfr.ff_utils.get_metadata", -# return_value=return_value, -# side_effect=side_effect, -# ) as mock_get_metadata: -# result = meta_workflow_run_from_item.get_item_properties("foo") -# assert result == expected -# mock_get_metadata.assert_called_once_with( -# "foo", key=AUTH_KEY, add_on="frame=raw" -# ) - -# @pytest.mark.parametrize("exception", [True, False]) -# def test_post_meta_workflow_item(self, meta_workflow_run_from_item, exception): -# """Test MWFR POST to portal.""" -# side_effect = None -# if exception: -# side_effect = Exception -# with mock.patch( -# "magma_ff.create_metawfr.ff_utils.post_metadata", -# side_effect=side_effect, -# ) as mock_post_metadata: -# if exception: -# with pytest.raises(MetaWorkflowRunCreationError): -# meta_workflow_run_from_item.post_meta_workflow_run() -# else: -# meta_workflow_run_from_item.post_meta_workflow_run() -# mock_post_metadata.assert_called_once_with( -# {}, MetaWorkflowRunFromItem.META_WORKFLOW_RUN_ENDPOINT, key=AUTH_KEY -# ) + Tests creation of run handler function, + using regular handler as template. + """ + with patch_retrieved_meta_workflow_handler(return_value=meta_workflow_handler): + with patch_retrieved_associated_item(return_value=ASSOCIATED_ITEM_SIMPLE_DICT) as mocked_assoc_item: + with patch_generate_uuid4(return_value=META_WORKFLOW_RUN_HANDLER_UUID) as mocked_uuid: + with mock.patch('datetime.date') as mocked_current_date: + with patch_create_meta_workflow_runs_array() as mocked_mwfr_arr_creation: + completed_handler = meta_workflow_run_handler_from_item_fixture.create_meta_workflow_run_handler() + mocked_uuid.assert_called_once() + mocked_current_date.assert_not_called() + assert mocked_assoc_item.call_count == 2 + mocked_mwfr_arr_creation.assert_called_once() + + assert completed_handler[PROJECT] == TESTER_PROJECT + assert completed_handler[INSTITUTION] == TESTER_INSTITUTION + assert completed_handler[UUID] == META_WORKFLOW_RUN_HANDLER_UUID + assert completed_handler[ASSOCIATED_META_WORKFLOW_HANDLER] == META_WORKFLOW_HANDLER_UUID + assert completed_handler[ASSOCIATED_ITEM] == ASSOCIATED_ITEM_UUID + assert completed_handler[FINAL_STATUS] == PENDING + assert completed_handler.get(TITLE) is None + assert getattr(meta_workflow_run_handler_from_item_fixture, "meta_workflow_run_handler", None) is not None + + @mock.patch('magma_ff.create_metawflrun_handler.date') + def test_create_meta_workflow_run_handler_with_title( + self, mocked_date, meta_workflow_run_handler_from_item_fixture + ): + """ + Tests creation of run handler function, + using regular handler as template, including title formatting. + """ + with patch_retrieved_meta_workflow_handler(return_value=HANDLER_DAG_0_W_TITLE): + with patch_retrieved_associated_item(return_value=ASSOCIATED_ITEM_SIMPLE_DICT): + with patch_generate_uuid4(return_value=META_WORKFLOW_RUN_HANDLER_UUID): + with patch_create_meta_workflow_runs_array(): + mocked_date.today.return_value.isoformat.return_value = TODAY + completed_handler = meta_workflow_run_handler_from_item_fixture.create_meta_workflow_run_handler() + mocked_date.today.assert_called_once() + assert completed_handler[TITLE] == f"MetaWorkflowRun Handler {HANDLER_DAG_0_W_TITLE[TITLE]} created {TODAY}" + + + @pytest.mark.parametrize("exception", [True, False]) + def test_post_meta_workflow_run_handler(self, exception, meta_workflow_run_handler_from_item_fixture): + """Test MetaWorkflow Run Handler POST to CGAP portal.""" + + with patch_retrieved_meta_workflow_handler(return_value=HANDLER_DAG_0): + with patch_retrieved_associated_item(return_value=ASSOCIATED_ITEM_SIMPLE_DICT): + with patch_generate_uuid4(return_value=META_WORKFLOW_RUN_HANDLER_UUID): + meta_workflow_run_handler_from_item_fixture.create_meta_workflow_run_handler() + if exception: + with patch_post_metadata(side_effect=Exception) as mock_post_with_error: + with pytest.raises(MetaWorkflowRunHandlerCreationError) as creation_err: + meta_workflow_run_handler_from_item_fixture.post_meta_workflow_run_handler() + assert "MetaWorkflowRunHandler not POSTed" in creation_err + mock_post_with_error.assert_called_once() + else: + with patch_post_metadata() as mock_post: + meta_workflow_run_handler_from_item_fixture.post_meta_workflow_run_handler() + mock_post.assert_called_once_with( + getattr(meta_workflow_run_handler_from_item_fixture, "meta_workflow_run_handler"), + MetaWorkflowRunHandlerFromItem.META_WORKFLOW_RUN_HANDLER_ENDPOINT, + key=AUTH_KEY + ) + +##################################################### +META_WORKFLOW_RUN_HANDLER_SIMPLE_DICT = { + UUID: META_WORKFLOW_RUN_HANDLER_UUID, + PROJECT: TESTER_PROJECT, + INSTITUTION: TESTER_INSTITUTION, + "auth_key": AUTH_KEY, + ASSOCIATED_META_WORKFLOW_HANDLER: META_WORKFLOW_HANDLER_UUID, + ASSOCIATED_ITEM: ASSOCIATED_ITEM_UUID, + FINAL_STATUS: PENDING, + META_WORKFLOW_RUNS: [], #TODO: is this correct +} + +@contextmanager +def patch_create_meta_workflow_run_handler(**kwargs) -> Iterator[mock.MagicMock]: + """Patch function that uses embed requests to convert property traces to IDs.""" + with patch_context( + create_metaworkflow_run_handler_module.MetaWorkflowRunHandlerFromItem, + "create_meta_workflow_run_handler", + **kwargs + ) as mock_item: + yield mock_item + +@contextmanager +def patch_post_meta_workflow_run_handler(**kwargs) -> Iterator[mock.MagicMock]: + """Patch function that uses embed requests to convert property traces to IDs.""" + with patch_context( + create_metaworkflow_run_handler_module.MetaWorkflowRunHandlerFromItem, + "post_meta_workflow_run_handler", + **kwargs + ) as mock_item: + yield mock_item + + +@pytest.mark.parametrize("post", [True, False]) +def test_create_meta_workflow_run_handler( + post: bool +) -> None: + """Test of wrapper function to Run Handler creation class.""" + with patch_create_meta_workflow_run_handler(return_value=META_WORKFLOW_RUN_HANDLER_SIMPLE_DICT) as mock_handler_creation: + with patch_post_meta_workflow_run_handler() as mock_post_handler: + result = create_meta_workflow_run_handler( + ASSOCIATED_ITEM_UUID, + META_WORKFLOW_HANDLER_UUID, + AUTH_KEY, + post + ) + mock_handler_creation.assert_called_once() + if post: + mock_post_handler.assert_called_once() + else: + mock_post_handler.assert_not_called() + + assert result == META_WORKFLOW_RUN_HANDLER_SIMPLE_DICT \ No newline at end of file From 93c1715d2794730ed46a5a7465d525050e232444 Mon Sep 17 00:00:00 2001 From: vstevensf Date: Wed, 31 May 2023 20:37:27 -0400 Subject: [PATCH 35/38] Edited execute handler function and created draft of pytests, plust docstrings --- magma/metawflrun_handler.py | 9 +- magma_ff/create_metawflrun_handler.py | 1 + magma_ff/run_metawflrun_handler.py | 264 +++++++++++++------- test/meta_workflow_run_handler_constants.py | 244 ++++++++++++++++++ test/test_run_metawflrun_handler.py | 235 +++++++++++++++++ 5 files changed, 663 insertions(+), 90 deletions(-) create mode 100644 test/meta_workflow_run_handler_constants.py create mode 100644 test/test_run_metawflrun_handler.py diff --git a/magma/metawflrun_handler.py b/magma/metawflrun_handler.py index 3a41a78..061069e 100644 --- a/magma/metawflrun_handler.py +++ b/magma/metawflrun_handler.py @@ -59,9 +59,6 @@ def __init__(self, input_dict): self._validate_basic_attributes(UUID, ASSOCIATED_META_WORKFLOW_HANDLER, META_WORKFLOW_RUNS) - # initial final_status attribute upon creation should be "pending" - setattr(self, FINAL_STATUS, PENDING) - ### Calculated attributes ### # by nature of how a MetaWorkflowRun Handler is created from the MetaWorkflow Handler, @@ -70,6 +67,10 @@ def __init__(self, input_dict): # for faster lookup and updating of steps self.meta_workflow_run_steps_dict = self._set_meta_workflow_runs_dict() + # initial final_status attribute upon creation should be "pending" + # setattr(self, FINAL_STATUS, PENDING) + self.update_final_status() + def _set_meta_workflow_runs_dict(self): """ @@ -101,6 +102,8 @@ def update_final_status(self): :return: final_status of the MetaWorkflow Run Handler :rtype: str """ + setattr(self, FINAL_STATUS, PENDING) + all_steps_completed = True all_steps_pending = True diff --git a/magma_ff/create_metawflrun_handler.py b/magma_ff/create_metawflrun_handler.py index 83a3ded..c5457a0 100644 --- a/magma_ff/create_metawflrun_handler.py +++ b/magma_ff/create_metawflrun_handler.py @@ -184,6 +184,7 @@ def _embed_items_for_creation(self, meta_workflow_step): # if ITEMS_FOR_CREATION_UUID in meta_workflow_step.keys(): if getattr(meta_workflow_step, ITEMS_FOR_CREATION_UUID, None): return getattr(meta_workflow_step, ITEMS_FOR_CREATION_UUID) + #TODO: have to do embed calls to check these actually exist? # otherwise, dealing with property traces. Make necessary embed requests # and convert property trace(s) to uuid(s) else: diff --git a/magma_ff/run_metawflrun_handler.py b/magma_ff/run_metawflrun_handler.py index db2adfe..f972e6a 100644 --- a/magma_ff/run_metawflrun_handler.py +++ b/magma_ff/run_metawflrun_handler.py @@ -1,94 +1,184 @@ -#!/usr/bin/env python3 - -################################################ -# Libraries -################################################ +from typing import Optional, List, Dict, Union, Any +from functools import cached_property from dcicutils import ff_utils -# magma -# from magma_ff.metawfl_handler import MetaWorkflowHandler - from magma_ff.metawflrun_handler import MetaWorkflowRunHandler -from magma_ff.utils import make_embed_request, check_status -from magma_ff.create_metawfr import create_meta_workflow_run, MetaWorkflowRunCreationError -# from magma_ff.run_metawfr import run_metawfr - -################################################ -# MetaWorkflowRunStep Generator Class -################################################ -class MetaWorkflowRunStepGenerator: - def __init__(self, mwfr_handler_input_dict, auth_key): +from magma_ff.utils import JsonObject, make_embed_request, check_status +from magma_ff.create_metawfr import ( + create_meta_workflow_run, + MetaWorkflowRunCreationError, +) + +from magma.magma_constants import * + + +def execute_metawflrun_handler( + meta_workflow_run_handler_id: str, + auth_key: JsonObject, + valid_final_status: Optional[List[str]] = None, + verbose: bool = False, +) -> None: + """ + Wrapper function to ExecuteMetaWorkflowRunHandler class method calls. + Executes the Run Handler for the given MetaWorkflow Run Handler ID. + Checks pending MetaWorkflow Run steps and, if dependencies are completed, + creates a corresponding MetaWorkflow Run, updates the step's status + and handler status, and PATCHes to CGAP portal. + + :param meta_workflow_run_handler_id: Identifier for the MetaWorkflow Run Handler + (e.g. UUID, @id) to be executed + :param auth_key: Authorization keys for C4 account + :param verbose: Whether to print the PATCH response(s) + :param valid_final_status: Run Handler final status(es) considered valid in CGAP portal + """ + # Retrieve Run Handler portal JSON from CGAP portal + fields_to_embed = [ + "*", + "meta_workflow_runs.*", + ] # TODO: double check this with integrated testing + meta_workflow_run_handler = make_embed_request( + meta_workflow_run_handler_id, fields_to_embed, auth_key, single_item=True + ) + if not meta_workflow_run_handler: + raise ValueError( + f"No MetaWorkflow Run Handler found for given identifier: {meta_workflow_run_handler_id}" + ) + + # Check that status of Run Handler retrieved is valid + perform_action = check_status(meta_workflow_run_handler, valid_final_status) + + # Start executing this Run Handler is its status is considered valid, PATCHing MWFRs as they're created + if perform_action: + newly_running_meta_workflow_runs = ExecuteMetaWorkflowRunHandler( + meta_workflow_run_handler, auth_key + ).generator_of_created_meta_workflow_run_steps() + for patch_dict in newly_running_meta_workflow_runs: + response_from_patch = ff_utils.patch_metadata( + patch_dict, meta_workflow_run_handler_id, key=auth_key + ) + if verbose: + print(response_from_patch) + # TODO: add patch to the associated item list of metaworkflow runs? + + +class ExecuteMetaWorkflowRunHandler: + """ + Class that generates updated dictionaries for PATCHing a MetaWorkflow Run Handler, + as each MetaWorkflow Run Step is executed in order, based on user-defined dependencies. + """ + + def __init__( + self, meta_workflow_run_handler: JsonObject, auth_key: JsonObject + ) -> None: + """ + Initialize the ExecuteMetaWorkflowRunHandler object, set basic attributes. + + :param meta_workflow_run_handler: JSON object of MetaWorkflowRun Handler, + retrieved from CGAP portal + :param auth_key: Portal authorization key + """ self.auth_key = auth_key - self.mwfr_handler_obj = MetaWorkflowRunHandler(mwfr_handler_input_dict) + self.meta_workflow_run_handler = meta_workflow_run_handler - def run_step_generator(self): - """this goes through pending steps - if all dependencies are complete, creates mwfr and runs it""" + def generator_of_created_meta_workflow_run_steps( + self, + ) -> Dict[str, Union[str, List[Any]]]: + """ + For each pending (ready to run) MetaWorkflow Run Step, if all dependencies are complete: + - updates status of that MetaWorkflow Run Step to "running" + - creates a corresponding MetaWorkflow Run + - generates updated meta_workflow_runs array and final_status + for MetaWorkflowRunHandler instance, yielded as + {final_status, meta_workflow_runs} for PATCHing + """ # going through all steps that are ready to run (pending) - for pending_mwfr_step_name in self.mwfr_handler_obj.pending_steps(): - curr_pending_step_obj = self.mwfr_handler_obj.retrieve_meta_workflow_run_step_by_name(pending_mwfr_step_name) - - # check that all dependencies are completed before running current step - curr_dependencies = getattr(curr_pending_step_obj, "dependencies", []) - deps_completed = True - for dependency_name in curr_dependencies: - dependency_step_status = self.mwfr_handler_obj.get_step_attr(dependency_name, "status") - if dependency_step_status != "completed": - deps_completed = False #TODO: add break here maybe - - # if all dependencies have run to completion - if deps_completed: - # create the metaworkflow run - #TODO: iterate through all items for creation, - # and use handler method instead of getattr? error catching n all dat - # oh a good idea is to add method to step class.... - try: - meta_workflow_run_portal_obj = create_meta_workflow_run( - getattr(curr_pending_step_obj, "items_for_creation"), - getattr(curr_pending_step_obj, "meta_workflow"), - self.auth_key - ) #TODO: !!! have to add run_uuid attr to schema!! arrray? to match items_for_creation - - # update the meta_workflow_run/run_uuid linkTo - setattr(curr_pending_step_obj, "run_uuid", meta_workflow_run_portal_obj["uuid"]) - # update the status to running - setattr(curr_pending_step_obj, "status", "running") - except MetaWorkflowRunCreationError as err: - # update error attr - setattr(curr_pending_step_obj, "error", err) - # update status to failed - setattr(curr_pending_step_obj, "status", "failed") - - - - # update final status & mwfr array of overall handler and yield for PATCHING - yield {'final_status': self.mwfr_handler_obj.update_final_status(), - 'meta_workflow_runs': self.mwfr_handler_obj.update_meta_workflows_array()} - - -################################################ -# Running Function: -# Calls MWFR creation/run fxns and patches handler -################################################ -def run_metawflrun_handler( - metawfr_handler_uuid, - auth_key, - verbose=False, - # sfn="tibanna_zebra", #TODO: just copying -- keeps option open - # env="fourfront-cgap", - # maxcount=None, # TODO: remnant of run metawfr -- no limit on mwfr steps per handler? -- this - valid_final_status=None -): - fields_to_embed = ["*", "meta_workflow_runs.*"] #TODO: double check this with integrated testing - mwfr_handler_input_dict = make_embed_request( - metawfr_handler_uuid, fields_to_embed, auth_key, single_item=True - ) #TODO: add error check here - perform_action = check_status(mwfr_handler_input_dict, valid_final_status) - if perform_action: - # this will create handler object which has checking status methods - mwfr_step_generator = MetaWorkflowRunStepGenerator(mwfr_handler_input_dict, auth_key).run_step_generator() + for pending_meta_workflow_run_name in self.handler.pending_steps(): + # current_pending_meta_workflow_run_step = self.meta_workflow_run_handler_instance.retrieve_meta_workflow_run_step_obj_by_name(pending_meta_workflow_run_name) - for patch_dict in mwfr_step_generator: - response_from_patch = ff_utils.patch_metadata(patch_dict, metawfr_handler_uuid, key=auth_key) - if verbose: - print(response_from_patch) \ No newline at end of file + dependencies_completed = self._check_pending_step_dependencies( + pending_meta_workflow_run_name + ) + + # if all dependencies for this pending step have run to completion + if dependencies_completed: + # Create this MetaWorkflow Run and POST to portal + # set this step's status to running too + self._create_and_update_meta_workflow_run_step( + pending_meta_workflow_run_name + ) + + # update final status & meta_workflow_runs array of the handler, yield for PATCHING + yield { + FINAL_STATUS: self.handler.update_final_status(), + META_WORKFLOW_RUNS: self.handler.update_meta_workflow_runs_array(), + } + + def _check_pending_step_dependencies(self, pending_step_name: str) -> bool: + """ + Given the name of a pending MetaWorkflowRun Step, check if all the Run Steps it is + dependent on are completed. + + :param pending_step_name: name of the pending MetaWorkflowRun Step + :returns: True if all dependencies are completed, otherwise False + """ + + current_dependencies = self.handler.get_meta_workflow_run_step_attr( + pending_step_name, DEPENDENCIES + ) + + for dependency_name in current_dependencies: + dependency_step_status = self.handler.get_meta_workflow_run_step_attr( + dependency_name, STATUS + ) + if dependency_step_status != COMPLETED: + return False + + return True + + def _create_and_update_meta_workflow_run_step(self, pending_step_name: str) -> None: + """ + For a given pending MetaWorkflow Run step name within a Run Handler, + create its corresponding MetaWorkflow Run and update appropriate attributes (status & MetaWorkflow Run LinkTo). + If there is any error in creation of the Run, update the error attribute. + + :param pending_step_name: name of MetaWorkflow Run to be created and updated + :raises MetaWorkflowRunCreationError: if the MetaWorkflow Run for the given name can't be created + """ + try: + # TODO: iterate through all items for creation, + meta_workflow_run_portal_object = create_meta_workflow_run( + self.handler.get_meta_workflow_run_step_attr( + pending_step_name, ITEMS_FOR_CREATION + ), + self.handler.get_meta_workflow_run_step_attr( + pending_step_name, META_WORKFLOW + ), + self.auth_key, + ) # TODO: !!! have to add run_uuid attr to schema!! arrray? to match items_for_creation + # TODO: will this be the actual output of this function or do i have to parse more? + + # update the meta_workflow_run linkTo + self.handler.update_meta_workflow_run_step_obj( + pending_step_name, + META_WORKFLOW_RUN, + meta_workflow_run_portal_object[UUID], + ) + # update the run step's status to running + self.handler.update_meta_workflow_run_step_obj( + pending_step_name, STATUS, RUNNING + ) + # if there is any error in creation of the MetaWorkflowRun + except MetaWorkflowRunCreationError as err: + # update error attr + self.handler.update_meta_workflow_run_step_obj( + pending_step_name, ERROR, str(err) + ) + # update run step's status to failed + self.handler.update_meta_workflow_run_step_obj( + pending_step_name, STATUS, FAILED + ) + + @cached_property + def handler(self): + """Using JSON object of Run Handler from CGAP portal, create MetaWorkflowRunHandler instance.""" + return MetaWorkflowRunHandler(self.meta_workflow_run_handler) diff --git a/test/meta_workflow_run_handler_constants.py b/test/meta_workflow_run_handler_constants.py new file mode 100644 index 0000000..754c845 --- /dev/null +++ b/test/meta_workflow_run_handler_constants.py @@ -0,0 +1,244 @@ +#!/usr/bin/env python3 + +################################################################# +# Libraries +################################################################# +from copy import deepcopy + +from magma.magma_constants import * + +################################################################# +# Vars +################################################################# + +MWF_RUN_HANDLER_NAME = "test_mwf_run_handler" +MWF_RUN_PROJECT = "test_project" +MWF_RUN_INSTITUTION = "test_institution" +MWF_RUN_HANDLER_UUID = "test_mwf_run_handler_uuid" + +TESTER_UUID = "test_item_uuid" + +TEST_MWFR_SIMPLE_GET_OUTPUT = { + "project": MWF_RUN_PROJECT, + "institution": MWF_RUN_INSTITUTION, + # "final_status": "completed", + "meta_workflow": "/meta-workflows/GAPMWIC28HMB/", + "@id": "/meta-workflow-runs/1734e9ac-af8c-4312-ac35-8b0018ef7411/", + "@type": ["MetaWorkflowRun", "Item"], + "uuid": TESTER_UUID +} + + +# basic meta_workflow_run dicts used in meta_workflow_runs array +# will have attributes added to them using mwf_run_with_added_attrs() +MWFR_A = {"name": "A", "meta_workflow": "link_to_mwf_A"} +MWFR_B = {"name": "B", "meta_workflow": "link_to_mwf_B"} +MWFR_C = {"name": "C", "meta_workflow": "link_to_mwf_C"} +MWFR_D = {"name": "D", "meta_workflow": "link_to_mwf_D"} + +MWF_NAMES_LIST = ["B", "C", "A", "D"] + +DEP_ON_A = ["A"] +DEP_ON_B = ["B"] +DEP_ON_C = ["C"] +DEP_ON_D = ["D"] + + +def mwf_run_with_added_attrs( + meta_workflow_run_dict, + dependencies=None, + items_for_creation=None, + status=None, + meta_workflow_run_linkto=None, + error=None, +): + """ + Generates an updated meta_workflow_run_dict given a basic meta_workflow_run_dict and attributes to add. + These attributes are limited to dependencies, items_for_creation, and status for these tests. + + :param meta_workflow_run_dict: Dictionary with basic attribute(s) of a MetaWorkflow Run + :type meta_workflow_run_dict: dict + :param dependencies: MetaWorkflow Runs, by name, that the given MetaWorkflow Run depends on + :type dependencies: list + :param items_for_creation: Item linkTo(s) needed to created the given MetaWorkflow Run + :type items_for_creation: str or list[str] + :param status: the status of the given MetaWorkflow Run + :type status: str + :param meta_workflow_run_linkto: the linkTo to a "created" MetaWorkflow Run on CGAP portal + :type meta_workflow_run_linkto: str + :param error: error traceback at "creation" of a MetaWorkflow Run + :type error: str + :return: updated meta_workflow_run_dict + """ + dict_copy = deepcopy(meta_workflow_run_dict) + if dependencies is not None: + dict_copy[DEPENDENCIES] = dependencies + if items_for_creation is not None: + dict_copy[ITEMS_FOR_CREATION] = items_for_creation + if status is not None: + dict_copy[STATUS] = status + if meta_workflow_run_linkto is not None: + dict_copy[META_WORKFLOW_RUN] = meta_workflow_run_linkto + if error is not None: + dict_copy[ERROR] = error + return dict_copy + + +def mwfr_handler_dict_generator(meta_workflow_runs_array): + """ + Given a meta_workflow_runs array, returns an input dict for + creation of a MetaWorkflow Run Handler object. + + :param meta_workflow_runs_array: list of meta_workflow_run dicts + :type meta_workflow_runs_array: list[dict] + :return: dictionary to be used as input to instantiate a MetaWorkflow Run Handler object + """ + return { + NAME: MWF_RUN_HANDLER_NAME, + PROJECT: MWF_RUN_PROJECT, + INSTITUTION: MWF_RUN_INSTITUTION, + UUID: MWF_RUN_HANDLER_UUID, + ASSOCIATED_META_WORKFLOW_HANDLER: TESTER_UUID, + META_WORKFLOW_RUNS: meta_workflow_runs_array, + } + + +# handler without uuid -- fails validation of basic attributes +full_handler_dict_0 = mwfr_handler_dict_generator([]) +full_handler_dict_0.pop(UUID) +HANDLER_WITHOUT_UUID_DICT = full_handler_dict_0 + + +# handler without associated MetaWorkflow Handler uuid -- fails validation of basic attributes +full_handler_dict_1 = mwfr_handler_dict_generator([]) +full_handler_dict_1.pop(ASSOCIATED_META_WORKFLOW_HANDLER) +HANDLER_WITHOUT_ASSOC_MWFH_DICT = full_handler_dict_1 + +# handler without meta_workflow_runs array -- fails validation of basic attributes +HANDLER_WITHOUT_META_WORKFLOW_RUNS_ARRAY = mwfr_handler_dict_generator(None) + +# Constructing a Run Handler with the below step dependencies +# B -----> D +# | ⋀ ⋀ +# | / | +# ⋁ / | +# A <----- C + +# Pending MetaWorkflow Run dicts +MWFR_A_PENDING = mwf_run_with_added_attrs( + MWFR_A, DEP_ON_B + DEP_ON_C, TESTER_UUID, PENDING +) +MWFR_B_PENDING = mwf_run_with_added_attrs(MWFR_B, [], TESTER_UUID, PENDING) +MWFR_C_PENDING = mwf_run_with_added_attrs(MWFR_C, [], TESTER_UUID, PENDING) +MWFR_D_PENDING = mwf_run_with_added_attrs( + MWFR_D, DEP_ON_A + DEP_ON_B + DEP_ON_C, TESTER_UUID, PENDING +) + +# Running MetaWorkflow Run dicts +MWFR_A_RUNNING = mwf_run_with_added_attrs( + MWFR_A, DEP_ON_B + DEP_ON_C, TESTER_UUID, RUNNING, TESTER_UUID +) +MWFR_B_RUNNING = mwf_run_with_added_attrs(MWFR_B, [], TESTER_UUID, RUNNING, TESTER_UUID) +MWFR_C_RUNNING = mwf_run_with_added_attrs(MWFR_C, [], TESTER_UUID, RUNNING, TESTER_UUID) +MWFR_D_RUNNING = mwf_run_with_added_attrs( + MWFR_D, DEP_ON_A + DEP_ON_B + DEP_ON_C, TESTER_UUID, RUNNING, TESTER_UUID +) + +# Failed/stopped MetaWorkflowRun dicts +MWFR_A_FAILED = mwf_run_with_added_attrs( + MWFR_A, DEP_ON_B + DEP_ON_C, TESTER_UUID, FAILED, TESTER_UUID +) +MWFR_A_STOPPED = mwf_run_with_added_attrs( + MWFR_A, DEP_ON_B + DEP_ON_C, TESTER_UUID, STOPPED, TESTER_UUID +) + +# Completed MetaWorkflow Run dicts +MWFR_A_COMPLETED = mwf_run_with_added_attrs( + MWFR_A, DEP_ON_B + DEP_ON_C, TESTER_UUID, COMPLETED, TESTER_UUID +) +MWFR_B_COMPLETED = mwf_run_with_added_attrs(MWFR_B, [], TESTER_UUID, COMPLETED, TESTER_UUID) +MWFR_C_COMPLETED = mwf_run_with_added_attrs(MWFR_C, [], TESTER_UUID, COMPLETED, TESTER_UUID) +MWFR_D_COMPLETED = mwf_run_with_added_attrs( + MWFR_D, DEP_ON_A + DEP_ON_B + DEP_ON_C, TESTER_UUID, COMPLETED, TESTER_UUID +) + + +# Magma FF - specific attributes handled here (for updating meta_workflow_runs array method) +MWFR_B_COMPLETED_W_LINKTO = mwf_run_with_added_attrs( + MWFR_B, [], TESTER_UUID, COMPLETED, "a_link_to" +) +MWFR_A_FAILED_W_ERROR = mwf_run_with_added_attrs( + MWFR_A, DEP_ON_B + DEP_ON_C, TESTER_UUID, FAILED, None, "error_message" +) +MWFR_A_STOPPED_W_LINKTO_AND_ERROR = mwf_run_with_added_attrs( + MWFR_A, + DEP_ON_B + DEP_ON_C, + TESTER_UUID, + STOPPED, + "another_link_to", + "and_another_error_message", +) + +# Note: these MetaWorkflowRuns above will be mixed and matched for testing purposes +# See meta_workflow_runs arrays and Run Handler input dicts below + +# All steps pending +PENDING_ARRAY = [MWFR_B_PENDING, MWFR_C_PENDING, MWFR_A_PENDING, MWFR_D_PENDING] +HANDLER_PENDING = mwfr_handler_dict_generator(PENDING_ARRAY) +HANDLER_PENDING_COPY = deepcopy(HANDLER_PENDING) #TODO: fix this hoe + +# Handlers currently running +FIRST_STEP_RUNNING_ARRAY = [MWFR_B_RUNNING, MWFR_C_PENDING, MWFR_A_PENDING, MWFR_D_PENDING] +RUNNING_MWFR_ARRAY = [MWFR_B_RUNNING, MWFR_C_RUNNING, MWFR_A_PENDING, MWFR_D_PENDING] +RUNNING_MWFR_ARRAY_2 = [ + MWFR_B_COMPLETED_W_LINKTO, + MWFR_C_RUNNING, + MWFR_A_PENDING, + MWFR_D_PENDING, +] +# this wouldn't happen with THIS dag in particular, +# but could in other cases (made for the sake of the final_status test for the handler TODO:) +# RUNNING_MWFR_ARRAY_3 = [MWFR_B_COMPLETED, MWFR_C_PENDING, MWFR_A_RUNNING, MWFR_D_PENDING] +HANDLER_STEPS_RUNNING = mwfr_handler_dict_generator(RUNNING_MWFR_ARRAY) +HANDLER_STEPS_RUNNING_2 = mwfr_handler_dict_generator(RUNNING_MWFR_ARRAY_2) +# HANDLER_STEPS_RUNNING_3 = mwfr_handler_dict_generator(RUNNING_MWFR_ARRAY_3) + +# Handlers that have failed +HALFWAY_DONE_N_FAIL_ARRAY = [ + MWFR_B_COMPLETED, + MWFR_C_COMPLETED, + MWFR_A_FAILED, + MWFR_D_PENDING, +] +HALFWAY_DONE_N_FAIL_ARRAY_2 = [ + MWFR_B_COMPLETED, + MWFR_C_COMPLETED, + MWFR_A_FAILED_W_ERROR, + MWFR_D_RUNNING, +] +HANDLER_FAILED = mwfr_handler_dict_generator(HALFWAY_DONE_N_FAIL_ARRAY) +HANDLER_FAILED_2 = mwfr_handler_dict_generator(HALFWAY_DONE_N_FAIL_ARRAY_2) + +# Handler that has been stopped +HALFWAY_DONE_N_STOPPED_ARRAY = [ + MWFR_B_COMPLETED, + MWFR_C_COMPLETED, + MWFR_A_STOPPED, + MWFR_D_PENDING, +] +HALFWAY_DONE_N_STOPPED_ARRAY_2 = [ + MWFR_B_COMPLETED, + MWFR_C_COMPLETED, + MWFR_A_STOPPED_W_LINKTO_AND_ERROR, + MWFR_D_PENDING, +] +HANDLER_STOPPED = mwfr_handler_dict_generator(HALFWAY_DONE_N_STOPPED_ARRAY) + +# Handler that is completed +COMPLETED_ARRAY = [ + MWFR_B_COMPLETED, + MWFR_C_COMPLETED, + MWFR_A_COMPLETED, + MWFR_D_COMPLETED, +] +HANDLER_COMPLETED = mwfr_handler_dict_generator(COMPLETED_ARRAY) diff --git a/test/test_run_metawflrun_handler.py b/test/test_run_metawflrun_handler.py new file mode 100644 index 0000000..d1ae8e8 --- /dev/null +++ b/test/test_run_metawflrun_handler.py @@ -0,0 +1,235 @@ +from contextlib import contextmanager +from test.utils import patch_context +from typing import Iterator, List, Any, Optional + +import mock +import pytest + + +from magma_ff.utils import JsonObject + +import magma_ff.run_metawflrun_handler as run_metaworkflow_run_handler_module +from magma_ff.run_metawflrun_handler import ( + ExecuteMetaWorkflowRunHandler, + execute_metawflrun_handler, +) + +from magma_ff.create_metawfr import ( + MetaWorkflowRunCreationError, +) + +from test.meta_workflow_run_handler_constants import * + + +META_WORKFLOW_RUN_HANDLER_UUID = "meta_workflow_run_handler_tester_uuid" +AUTH_KEY = {"server": "some_server"} + + +@contextmanager +def patch_patch_metadata(**kwargs) -> Iterator[mock.MagicMock]: + """Patch ff_utils.patch_metadata call within execute_metawflrun_handler function.""" + with patch_context( + run_metaworkflow_run_handler_module.ff_utils, "patch_metadata", **kwargs + ) as mock_item: + yield mock_item + + +@contextmanager +def patch_check_status(**kwargs) -> Iterator[mock.MagicMock]: + """Patch utils.check_status call within execute_metawflrun_handler function.""" + with patch_context( + run_metaworkflow_run_handler_module, "check_status", **kwargs + ) as mock_item: + yield mock_item + + +@contextmanager +def patch_make_embed_request(**kwargs) -> Iterator[mock.MagicMock]: + """Patch utils.make_embed_request call within make_embed_request function.""" + with patch_context( + run_metaworkflow_run_handler_module, "make_embed_request", **kwargs + ) as mock_item: + yield mock_item + + +@contextmanager +def patch_create_meta_workflow_run(**kwargs) -> Iterator[mock.MagicMock]: + """ + Patch magma_ff.create_metawfr.create_meta_workflow_run call + within ExecuteMetaWorkflowRunHandler class. + """ + with patch_context( + run_metaworkflow_run_handler_module, "create_meta_workflow_run", **kwargs + ) as mock_item: + yield mock_item + + +class TestExecuteMetaWorkflowRunHandler: + """Tests for methods/properties for ExecuteMetaWorkflowRunHandler class.""" + + @pytest.mark.parametrize( + "run_handler, pending_step_name, expected_result", + [ + (HANDLER_PENDING, "B", True), + (HANDLER_PENDING, "A", False), + (HANDLER_STEPS_RUNNING, "A", False), + (HANDLER_STEPS_RUNNING, "D", False), + (HANDLER_STEPS_RUNNING_2, "A", False), + (HANDLER_FAILED, "D", False), + (HANDLER_STOPPED, "D", False), + ], + ) + def test_check_pending_step_dependencies( + self, run_handler: JsonObject, pending_step_name: str, expected_result: bool + ) -> None: + """ + Tests the check of a mwfr step's dependencies, + and whether they are completed or not (checking status). + """ + execution_generator = ExecuteMetaWorkflowRunHandler(run_handler, AUTH_KEY) + result = execution_generator._check_pending_step_dependencies(pending_step_name) + assert result == expected_result + + @pytest.mark.parametrize( + "run_handler, pending_step_name, exception_expected", + [(HANDLER_PENDING, "B", False), (HANDLER_PENDING, "B", True)], + ) + def test_create_and_update_meta_workflow_run_step( + self, run_handler: JsonObject, pending_step_name: str, exception_expected: bool + ) -> None: + """Tests creation (and updates) of new metaworkflow run steps""" + with patch_create_meta_workflow_run() as mock_create_mwfr: + execution_generator = ExecuteMetaWorkflowRunHandler(run_handler, AUTH_KEY) + if not exception_expected: + mock_create_mwfr.return_value = TEST_MWFR_SIMPLE_GET_OUTPUT + execution_generator._create_and_update_meta_workflow_run_step( + pending_step_name + ) + assert ( + execution_generator.handler.get_meta_workflow_run_step_attr( + pending_step_name, META_WORKFLOW_RUN + ) + == TEST_MWFR_SIMPLE_GET_OUTPUT[UUID] + ) + assert ( + execution_generator.handler.get_meta_workflow_run_step_attr( + pending_step_name, STATUS + ) + == RUNNING + ) + else: + mock_create_mwfr.side_effect = MetaWorkflowRunCreationError("oops") + execution_generator._create_and_update_meta_workflow_run_step( + pending_step_name + ) + assert ( + execution_generator.handler.get_meta_workflow_run_step_attr( + pending_step_name, ERROR + ) + == "oops" + ) + assert ( + execution_generator.handler.get_meta_workflow_run_step_attr( + pending_step_name, STATUS + ) + == FAILED + ) + + @pytest.mark.parametrize( + "run_handler, orig_final_status, yielded_statuses, yielded_mwf_run_arrays", + [ + ( + HANDLER_PENDING, + PENDING, + [RUNNING, RUNNING, RUNNING, RUNNING], + [ + FIRST_STEP_RUNNING_ARRAY, + RUNNING_MWFR_ARRAY, + RUNNING_MWFR_ARRAY, + RUNNING_MWFR_ARRAY, + ], + ), + ( + HANDLER_STEPS_RUNNING, + RUNNING, + [RUNNING, RUNNING], + [RUNNING_MWFR_ARRAY, RUNNING_MWFR_ARRAY], + ), + ( + HANDLER_STEPS_RUNNING_2, + RUNNING, + [RUNNING, RUNNING], + [RUNNING_MWFR_ARRAY, RUNNING_MWFR_ARRAY], + ), + (HANDLER_FAILED, FAILED, [FAILED], [HALFWAY_DONE_N_FAIL_ARRAY]), + (HANDLER_FAILED_2, FAILED, [], []), + (HANDLER_STOPPED, STOPPED, [STOPPED], [HALFWAY_DONE_N_STOPPED_ARRAY]), + (HANDLER_COMPLETED, COMPLETED, [], []), + ], + ) + def test_generator_of_created_meta_workflow_run_steps( + self, + run_handler: JsonObject, + orig_final_status: str, + yielded_statuses: List[str], + yielded_mwf_run_arrays: List[List[Any]], + ) -> None: + """ + Tests generator of dictionaries used to PATCH created MetaWorkflow Runs + and the final status of the overall MetaWorkflow Run Handler. + """ + with patch_create_meta_workflow_run(return_value=TEST_MWFR_SIMPLE_GET_OUTPUT): + execution_generator = ExecuteMetaWorkflowRunHandler(run_handler, AUTH_KEY) + assert ( + getattr(execution_generator.handler, FINAL_STATUS) == orig_final_status + ) + patch_dict_generator = ( + execution_generator.generator_of_created_meta_workflow_run_steps() + ) + assert len(yielded_statuses) == len(list(patch_dict_generator)) + for idx, step in enumerate(patch_dict_generator): + assert step[FINAL_STATUS] == yielded_statuses[idx] + assert step[META_WORKFLOW_RUNS] == yielded_mwf_run_arrays[idx] + + +@pytest.mark.parametrize( + "run_handler_json, value_err_expected, status_valid, patch_metadata_calls", + [ + (None, True, True, 0), + (HANDLER_PENDING_COPY, False, False, 0), + (HANDLER_PENDING_COPY, False, True, 4), + ], +) +def test_execute_metawflrun_handler( + run_handler_json: Optional[JsonObject], + value_err_expected: bool, + status_valid: bool, + patch_metadata_calls: int, +) -> None: + """ + Tests wrapper function of generator of dictionaries used to PATCH + the Run Handler final status and created MetaWorkflow Runs. + Includes additional CGAP portal status checks. + """ + with patch_make_embed_request() as mock_embed_request: + with patch_check_status() as mock_check_status: + with patch_patch_metadata() as mock_patch_metadata: + with patch_create_meta_workflow_run( + return_value=TEST_MWFR_SIMPLE_GET_OUTPUT + ): + if value_err_expected: + mock_embed_request.return_value = None + with pytest.raises(ValueError) as val_err: + execute_metawflrun_handler(TESTER_UUID, AUTH_KEY) + assert TESTER_UUID in val_err + assert ( + mock_patch_metadata.call_count == patch_metadata_calls + ) + else: + mock_embed_request.return_value = run_handler_json + if not status_valid: + mock_check_status.return_value = False + else: + mock_check_status.return_value = True + execute_metawflrun_handler(TESTER_UUID, AUTH_KEY) + assert mock_patch_metadata.call_count == patch_metadata_calls From 82c8d5596f41b957f60386f9d21c124485988f39 Mon Sep 17 00:00:00 2001 From: vstevensf Date: Thu, 1 Jun 2023 10:12:54 -0400 Subject: [PATCH 36/38] renamed test file --- test/test_run_metawflrun_handler.py | 235 ---------------------------- 1 file changed, 235 deletions(-) delete mode 100644 test/test_run_metawflrun_handler.py diff --git a/test/test_run_metawflrun_handler.py b/test/test_run_metawflrun_handler.py deleted file mode 100644 index d1ae8e8..0000000 --- a/test/test_run_metawflrun_handler.py +++ /dev/null @@ -1,235 +0,0 @@ -from contextlib import contextmanager -from test.utils import patch_context -from typing import Iterator, List, Any, Optional - -import mock -import pytest - - -from magma_ff.utils import JsonObject - -import magma_ff.run_metawflrun_handler as run_metaworkflow_run_handler_module -from magma_ff.run_metawflrun_handler import ( - ExecuteMetaWorkflowRunHandler, - execute_metawflrun_handler, -) - -from magma_ff.create_metawfr import ( - MetaWorkflowRunCreationError, -) - -from test.meta_workflow_run_handler_constants import * - - -META_WORKFLOW_RUN_HANDLER_UUID = "meta_workflow_run_handler_tester_uuid" -AUTH_KEY = {"server": "some_server"} - - -@contextmanager -def patch_patch_metadata(**kwargs) -> Iterator[mock.MagicMock]: - """Patch ff_utils.patch_metadata call within execute_metawflrun_handler function.""" - with patch_context( - run_metaworkflow_run_handler_module.ff_utils, "patch_metadata", **kwargs - ) as mock_item: - yield mock_item - - -@contextmanager -def patch_check_status(**kwargs) -> Iterator[mock.MagicMock]: - """Patch utils.check_status call within execute_metawflrun_handler function.""" - with patch_context( - run_metaworkflow_run_handler_module, "check_status", **kwargs - ) as mock_item: - yield mock_item - - -@contextmanager -def patch_make_embed_request(**kwargs) -> Iterator[mock.MagicMock]: - """Patch utils.make_embed_request call within make_embed_request function.""" - with patch_context( - run_metaworkflow_run_handler_module, "make_embed_request", **kwargs - ) as mock_item: - yield mock_item - - -@contextmanager -def patch_create_meta_workflow_run(**kwargs) -> Iterator[mock.MagicMock]: - """ - Patch magma_ff.create_metawfr.create_meta_workflow_run call - within ExecuteMetaWorkflowRunHandler class. - """ - with patch_context( - run_metaworkflow_run_handler_module, "create_meta_workflow_run", **kwargs - ) as mock_item: - yield mock_item - - -class TestExecuteMetaWorkflowRunHandler: - """Tests for methods/properties for ExecuteMetaWorkflowRunHandler class.""" - - @pytest.mark.parametrize( - "run_handler, pending_step_name, expected_result", - [ - (HANDLER_PENDING, "B", True), - (HANDLER_PENDING, "A", False), - (HANDLER_STEPS_RUNNING, "A", False), - (HANDLER_STEPS_RUNNING, "D", False), - (HANDLER_STEPS_RUNNING_2, "A", False), - (HANDLER_FAILED, "D", False), - (HANDLER_STOPPED, "D", False), - ], - ) - def test_check_pending_step_dependencies( - self, run_handler: JsonObject, pending_step_name: str, expected_result: bool - ) -> None: - """ - Tests the check of a mwfr step's dependencies, - and whether they are completed or not (checking status). - """ - execution_generator = ExecuteMetaWorkflowRunHandler(run_handler, AUTH_KEY) - result = execution_generator._check_pending_step_dependencies(pending_step_name) - assert result == expected_result - - @pytest.mark.parametrize( - "run_handler, pending_step_name, exception_expected", - [(HANDLER_PENDING, "B", False), (HANDLER_PENDING, "B", True)], - ) - def test_create_and_update_meta_workflow_run_step( - self, run_handler: JsonObject, pending_step_name: str, exception_expected: bool - ) -> None: - """Tests creation (and updates) of new metaworkflow run steps""" - with patch_create_meta_workflow_run() as mock_create_mwfr: - execution_generator = ExecuteMetaWorkflowRunHandler(run_handler, AUTH_KEY) - if not exception_expected: - mock_create_mwfr.return_value = TEST_MWFR_SIMPLE_GET_OUTPUT - execution_generator._create_and_update_meta_workflow_run_step( - pending_step_name - ) - assert ( - execution_generator.handler.get_meta_workflow_run_step_attr( - pending_step_name, META_WORKFLOW_RUN - ) - == TEST_MWFR_SIMPLE_GET_OUTPUT[UUID] - ) - assert ( - execution_generator.handler.get_meta_workflow_run_step_attr( - pending_step_name, STATUS - ) - == RUNNING - ) - else: - mock_create_mwfr.side_effect = MetaWorkflowRunCreationError("oops") - execution_generator._create_and_update_meta_workflow_run_step( - pending_step_name - ) - assert ( - execution_generator.handler.get_meta_workflow_run_step_attr( - pending_step_name, ERROR - ) - == "oops" - ) - assert ( - execution_generator.handler.get_meta_workflow_run_step_attr( - pending_step_name, STATUS - ) - == FAILED - ) - - @pytest.mark.parametrize( - "run_handler, orig_final_status, yielded_statuses, yielded_mwf_run_arrays", - [ - ( - HANDLER_PENDING, - PENDING, - [RUNNING, RUNNING, RUNNING, RUNNING], - [ - FIRST_STEP_RUNNING_ARRAY, - RUNNING_MWFR_ARRAY, - RUNNING_MWFR_ARRAY, - RUNNING_MWFR_ARRAY, - ], - ), - ( - HANDLER_STEPS_RUNNING, - RUNNING, - [RUNNING, RUNNING], - [RUNNING_MWFR_ARRAY, RUNNING_MWFR_ARRAY], - ), - ( - HANDLER_STEPS_RUNNING_2, - RUNNING, - [RUNNING, RUNNING], - [RUNNING_MWFR_ARRAY, RUNNING_MWFR_ARRAY], - ), - (HANDLER_FAILED, FAILED, [FAILED], [HALFWAY_DONE_N_FAIL_ARRAY]), - (HANDLER_FAILED_2, FAILED, [], []), - (HANDLER_STOPPED, STOPPED, [STOPPED], [HALFWAY_DONE_N_STOPPED_ARRAY]), - (HANDLER_COMPLETED, COMPLETED, [], []), - ], - ) - def test_generator_of_created_meta_workflow_run_steps( - self, - run_handler: JsonObject, - orig_final_status: str, - yielded_statuses: List[str], - yielded_mwf_run_arrays: List[List[Any]], - ) -> None: - """ - Tests generator of dictionaries used to PATCH created MetaWorkflow Runs - and the final status of the overall MetaWorkflow Run Handler. - """ - with patch_create_meta_workflow_run(return_value=TEST_MWFR_SIMPLE_GET_OUTPUT): - execution_generator = ExecuteMetaWorkflowRunHandler(run_handler, AUTH_KEY) - assert ( - getattr(execution_generator.handler, FINAL_STATUS) == orig_final_status - ) - patch_dict_generator = ( - execution_generator.generator_of_created_meta_workflow_run_steps() - ) - assert len(yielded_statuses) == len(list(patch_dict_generator)) - for idx, step in enumerate(patch_dict_generator): - assert step[FINAL_STATUS] == yielded_statuses[idx] - assert step[META_WORKFLOW_RUNS] == yielded_mwf_run_arrays[idx] - - -@pytest.mark.parametrize( - "run_handler_json, value_err_expected, status_valid, patch_metadata_calls", - [ - (None, True, True, 0), - (HANDLER_PENDING_COPY, False, False, 0), - (HANDLER_PENDING_COPY, False, True, 4), - ], -) -def test_execute_metawflrun_handler( - run_handler_json: Optional[JsonObject], - value_err_expected: bool, - status_valid: bool, - patch_metadata_calls: int, -) -> None: - """ - Tests wrapper function of generator of dictionaries used to PATCH - the Run Handler final status and created MetaWorkflow Runs. - Includes additional CGAP portal status checks. - """ - with patch_make_embed_request() as mock_embed_request: - with patch_check_status() as mock_check_status: - with patch_patch_metadata() as mock_patch_metadata: - with patch_create_meta_workflow_run( - return_value=TEST_MWFR_SIMPLE_GET_OUTPUT - ): - if value_err_expected: - mock_embed_request.return_value = None - with pytest.raises(ValueError) as val_err: - execute_metawflrun_handler(TESTER_UUID, AUTH_KEY) - assert TESTER_UUID in val_err - assert ( - mock_patch_metadata.call_count == patch_metadata_calls - ) - else: - mock_embed_request.return_value = run_handler_json - if not status_valid: - mock_check_status.return_value = False - else: - mock_check_status.return_value = True - execute_metawflrun_handler(TESTER_UUID, AUTH_KEY) - assert mock_patch_metadata.call_count == patch_metadata_calls From 1d5e77b6dcca4f669300b61c2465d9bf42a0c00d Mon Sep 17 00:00:00 2001 From: vstevensf Date: Thu, 1 Jun 2023 18:07:43 -0400 Subject: [PATCH 37/38] modified FFMetaWfrUtils class and pytests --- magma/magma_constants.py | 1 + magma_ff/wfrutils.py | 134 +++++++------- test/test_run_metawflrun_handler_ff.py | 235 +++++++++++++++++++++++++ test/test_wfrutils_ff.py | 185 +++++++++++++++++++ 4 files changed, 493 insertions(+), 62 deletions(-) create mode 100644 test/test_run_metawflrun_handler_ff.py create mode 100644 test/test_wfrutils_ff.py diff --git a/magma/magma_constants.py b/magma/magma_constants.py index 13b4d79..db31cc8 100644 --- a/magma/magma_constants.py +++ b/magma/magma_constants.py @@ -18,6 +18,7 @@ ITEMS_FOR_CREATION_UUID = "items_for_creation_uuid" # MetaWorkflow Run Handler attributes +COST = "cost" STATUS = "status" FINAL_STATUS = "final_status" ASSOCIATED_META_WORKFLOW_HANDLER = "meta_workflow_handler" diff --git a/magma_ff/wfrutils.py b/magma_ff/wfrutils.py index 09cfc54..a321674 100644 --- a/magma_ff/wfrutils.py +++ b/magma_ff/wfrutils.py @@ -1,25 +1,15 @@ -#!/usr/bin/env python3 - -################################################ -# -# -# -################################################ - -################################################ -# Libraries -################################################ import sys, os -# dcicutils from dcicutils import ff_utils from dcicutils.s3_utils import s3Utils from tibanna.job import Job +from functools import cached_property +from magma.magma_constants import * +from magma_ff.utils import JsonObject +from typing import Optional +from requests.exceptions import HTTPError -################################################ -# FFWfrUtils -################################################ class FFWfrUtils(object): def __init__(self, env): """ @@ -122,65 +112,85 @@ def filter_wfr_output_minimal_processed(wfr_output): #end class +class FdnConnectionException(Exception): + pass -################################################ -# FFMetaWfrUtils -################################################ -#TODO: make pytests -class FFMetaWfrUtils(object): - def __init__(self, env): - """ - :param env: e.g. 'fourfront-cgap', 'fourfront-cgap-wolf' - :type env: str - """ - self.env = env +#end class - # Cache for metadata - # can save several mwfr's metadata dicts at a time - self._metadata = dict() - def get_meta_wfr_current_status(self, mwfr_uuid): +class FFMetaWfrUtils(object): + """Class for accessing status and cost metadata of a MetaWorkflow Run from CGAP portal.""" + + def __init__(self, auth_key: JsonObject) -> None: + """ + Initialize FFMetaWfrUtils object, setting basic attributes. + + :param auth_key: Authorization keys for C4 account """ - Return the status of the mwfr associated with specified uuid. - If run associated with uuid is not found, return None. + self._auth_key = auth_key + + def get_meta_workflow_run_status(self, meta_workflow_run_identifier: str) -> str: """ - mwfr_meta = self._mwfr_metadata(mwfr_uuid) - if not mwfr_meta: # if an empty list is result - return None - else: - return mwfr_meta['final_status'] + Return the status of the MetaWorkflow Run associated with specified ID. - def get_meta_wfr_cost(self, mwfr_uuid): + :param meta_workflow_run_identifier: Identifier (e.g. UUID, @id) for + MetaWorkflow Run to be searched + :return: the status of the specified MetaWorkflow Run """ - Return the status of the mwfr associated with specified uuid. - If run associated with uuid is not found, return None. + meta_workflow_run_portal_output = self._retrieve_meta_workflow_run(meta_workflow_run_identifier) + + # TODO: for now, just assuming it will have this attribute + # check this in integrated testing + return meta_workflow_run_portal_output[FINAL_STATUS] + + def get_meta_workflow_run_cost(self, meta_workflow_run_identifier: str) -> float: """ - mwfr_meta = self._mwfr_metadata(mwfr_uuid) - if not mwfr_meta: # if an empty list is result - return None - else: - return mwfr_meta['cost'] + Return the cost of the MetaWorkflow Run associated with specified ID. + If no cost attribute found, return cost as 0. - def _mwfr_metadata(self, mwfr_uuid): - """Get portal metawfrun metadata from uuid. - Return [] if a run associated with uuid isn't found. + :param meta_workflow_run_identifier: Identifier (e.g. UUID, @id) for + MetaWorkflow Run to be searched + :return: the cost of the specified MetaWorkflow Run """ - # Use cache - if mwfr_uuid in self._metadata: - return self._metadata[mwfr_uuid] + meta_workflow_run_portal_output = self._retrieve_meta_workflow_run(meta_workflow_run_identifier) - # Search by uuid - query='/search/?type=MetaWorkflowRun&frame=object&uuid=%s' % mwfr_uuid - try: - search_result_list = ff_utils.search_metadata(query, key=self.ff_key) - except Exception as e: - raise FdnConnectionException(e) + if COST in meta_workflow_run_portal_output: + return meta_workflow_run_portal_output[COST] - self._metadata[mwfr_uuid] = search_result_list[0] - return self._metadata[mwfr_uuid] + return float(0) + def _retrieve_meta_workflow_run(self, meta_workflow_run_identifier: str) -> JsonObject: + """ + Get portal MetaWorkflow Run metadata JSON using its identifier. + Raises Exception if GET request is unsuccessful. -class FdnConnectionException(Exception): - pass + :param meta_workflow_run_identifier: Identifier (e.g. UUID, @id) for + MetaWorkflow Run to be searched + :return: Portal JSON object representing this MetaWorkflow Run and its metadata + """ + # Use cache if ID is an existent key + if meta_workflow_run_identifier in self._meta_workflow_runs_cache: + return self._meta_workflow_runs_cache[meta_workflow_run_identifier] -#end class \ No newline at end of file + # Otherwise retrieve this metadata from the portal + try: + result = ff_utils.get_metadata( + meta_workflow_run_identifier, key=self._auth_key + ) + except Exception as err: + raise HTTPError(err, f"GET request unsuccessful for MetaWorkflow Run using the following ID:\ + {meta_workflow_run_identifier}") from err + + # Add GET request result to cache + self._meta_workflow_runs_cache[meta_workflow_run_identifier] = result + return result + + @cached_property + def _meta_workflow_runs_cache(self) -> dict: + """ + Cache for MetaWorkflowRun metadata retrieved from CGAP portal. + Can save several MetaWorkflow Run metadata dicts at a time. + Initially empty, modified as MetaWorkflow Runs are retrieved. + Key-value = uuid-metadata_dict + """ + return {} diff --git a/test/test_run_metawflrun_handler_ff.py b/test/test_run_metawflrun_handler_ff.py new file mode 100644 index 0000000..d1ae8e8 --- /dev/null +++ b/test/test_run_metawflrun_handler_ff.py @@ -0,0 +1,235 @@ +from contextlib import contextmanager +from test.utils import patch_context +from typing import Iterator, List, Any, Optional + +import mock +import pytest + + +from magma_ff.utils import JsonObject + +import magma_ff.run_metawflrun_handler as run_metaworkflow_run_handler_module +from magma_ff.run_metawflrun_handler import ( + ExecuteMetaWorkflowRunHandler, + execute_metawflrun_handler, +) + +from magma_ff.create_metawfr import ( + MetaWorkflowRunCreationError, +) + +from test.meta_workflow_run_handler_constants import * + + +META_WORKFLOW_RUN_HANDLER_UUID = "meta_workflow_run_handler_tester_uuid" +AUTH_KEY = {"server": "some_server"} + + +@contextmanager +def patch_patch_metadata(**kwargs) -> Iterator[mock.MagicMock]: + """Patch ff_utils.patch_metadata call within execute_metawflrun_handler function.""" + with patch_context( + run_metaworkflow_run_handler_module.ff_utils, "patch_metadata", **kwargs + ) as mock_item: + yield mock_item + + +@contextmanager +def patch_check_status(**kwargs) -> Iterator[mock.MagicMock]: + """Patch utils.check_status call within execute_metawflrun_handler function.""" + with patch_context( + run_metaworkflow_run_handler_module, "check_status", **kwargs + ) as mock_item: + yield mock_item + + +@contextmanager +def patch_make_embed_request(**kwargs) -> Iterator[mock.MagicMock]: + """Patch utils.make_embed_request call within make_embed_request function.""" + with patch_context( + run_metaworkflow_run_handler_module, "make_embed_request", **kwargs + ) as mock_item: + yield mock_item + + +@contextmanager +def patch_create_meta_workflow_run(**kwargs) -> Iterator[mock.MagicMock]: + """ + Patch magma_ff.create_metawfr.create_meta_workflow_run call + within ExecuteMetaWorkflowRunHandler class. + """ + with patch_context( + run_metaworkflow_run_handler_module, "create_meta_workflow_run", **kwargs + ) as mock_item: + yield mock_item + + +class TestExecuteMetaWorkflowRunHandler: + """Tests for methods/properties for ExecuteMetaWorkflowRunHandler class.""" + + @pytest.mark.parametrize( + "run_handler, pending_step_name, expected_result", + [ + (HANDLER_PENDING, "B", True), + (HANDLER_PENDING, "A", False), + (HANDLER_STEPS_RUNNING, "A", False), + (HANDLER_STEPS_RUNNING, "D", False), + (HANDLER_STEPS_RUNNING_2, "A", False), + (HANDLER_FAILED, "D", False), + (HANDLER_STOPPED, "D", False), + ], + ) + def test_check_pending_step_dependencies( + self, run_handler: JsonObject, pending_step_name: str, expected_result: bool + ) -> None: + """ + Tests the check of a mwfr step's dependencies, + and whether they are completed or not (checking status). + """ + execution_generator = ExecuteMetaWorkflowRunHandler(run_handler, AUTH_KEY) + result = execution_generator._check_pending_step_dependencies(pending_step_name) + assert result == expected_result + + @pytest.mark.parametrize( + "run_handler, pending_step_name, exception_expected", + [(HANDLER_PENDING, "B", False), (HANDLER_PENDING, "B", True)], + ) + def test_create_and_update_meta_workflow_run_step( + self, run_handler: JsonObject, pending_step_name: str, exception_expected: bool + ) -> None: + """Tests creation (and updates) of new metaworkflow run steps""" + with patch_create_meta_workflow_run() as mock_create_mwfr: + execution_generator = ExecuteMetaWorkflowRunHandler(run_handler, AUTH_KEY) + if not exception_expected: + mock_create_mwfr.return_value = TEST_MWFR_SIMPLE_GET_OUTPUT + execution_generator._create_and_update_meta_workflow_run_step( + pending_step_name + ) + assert ( + execution_generator.handler.get_meta_workflow_run_step_attr( + pending_step_name, META_WORKFLOW_RUN + ) + == TEST_MWFR_SIMPLE_GET_OUTPUT[UUID] + ) + assert ( + execution_generator.handler.get_meta_workflow_run_step_attr( + pending_step_name, STATUS + ) + == RUNNING + ) + else: + mock_create_mwfr.side_effect = MetaWorkflowRunCreationError("oops") + execution_generator._create_and_update_meta_workflow_run_step( + pending_step_name + ) + assert ( + execution_generator.handler.get_meta_workflow_run_step_attr( + pending_step_name, ERROR + ) + == "oops" + ) + assert ( + execution_generator.handler.get_meta_workflow_run_step_attr( + pending_step_name, STATUS + ) + == FAILED + ) + + @pytest.mark.parametrize( + "run_handler, orig_final_status, yielded_statuses, yielded_mwf_run_arrays", + [ + ( + HANDLER_PENDING, + PENDING, + [RUNNING, RUNNING, RUNNING, RUNNING], + [ + FIRST_STEP_RUNNING_ARRAY, + RUNNING_MWFR_ARRAY, + RUNNING_MWFR_ARRAY, + RUNNING_MWFR_ARRAY, + ], + ), + ( + HANDLER_STEPS_RUNNING, + RUNNING, + [RUNNING, RUNNING], + [RUNNING_MWFR_ARRAY, RUNNING_MWFR_ARRAY], + ), + ( + HANDLER_STEPS_RUNNING_2, + RUNNING, + [RUNNING, RUNNING], + [RUNNING_MWFR_ARRAY, RUNNING_MWFR_ARRAY], + ), + (HANDLER_FAILED, FAILED, [FAILED], [HALFWAY_DONE_N_FAIL_ARRAY]), + (HANDLER_FAILED_2, FAILED, [], []), + (HANDLER_STOPPED, STOPPED, [STOPPED], [HALFWAY_DONE_N_STOPPED_ARRAY]), + (HANDLER_COMPLETED, COMPLETED, [], []), + ], + ) + def test_generator_of_created_meta_workflow_run_steps( + self, + run_handler: JsonObject, + orig_final_status: str, + yielded_statuses: List[str], + yielded_mwf_run_arrays: List[List[Any]], + ) -> None: + """ + Tests generator of dictionaries used to PATCH created MetaWorkflow Runs + and the final status of the overall MetaWorkflow Run Handler. + """ + with patch_create_meta_workflow_run(return_value=TEST_MWFR_SIMPLE_GET_OUTPUT): + execution_generator = ExecuteMetaWorkflowRunHandler(run_handler, AUTH_KEY) + assert ( + getattr(execution_generator.handler, FINAL_STATUS) == orig_final_status + ) + patch_dict_generator = ( + execution_generator.generator_of_created_meta_workflow_run_steps() + ) + assert len(yielded_statuses) == len(list(patch_dict_generator)) + for idx, step in enumerate(patch_dict_generator): + assert step[FINAL_STATUS] == yielded_statuses[idx] + assert step[META_WORKFLOW_RUNS] == yielded_mwf_run_arrays[idx] + + +@pytest.mark.parametrize( + "run_handler_json, value_err_expected, status_valid, patch_metadata_calls", + [ + (None, True, True, 0), + (HANDLER_PENDING_COPY, False, False, 0), + (HANDLER_PENDING_COPY, False, True, 4), + ], +) +def test_execute_metawflrun_handler( + run_handler_json: Optional[JsonObject], + value_err_expected: bool, + status_valid: bool, + patch_metadata_calls: int, +) -> None: + """ + Tests wrapper function of generator of dictionaries used to PATCH + the Run Handler final status and created MetaWorkflow Runs. + Includes additional CGAP portal status checks. + """ + with patch_make_embed_request() as mock_embed_request: + with patch_check_status() as mock_check_status: + with patch_patch_metadata() as mock_patch_metadata: + with patch_create_meta_workflow_run( + return_value=TEST_MWFR_SIMPLE_GET_OUTPUT + ): + if value_err_expected: + mock_embed_request.return_value = None + with pytest.raises(ValueError) as val_err: + execute_metawflrun_handler(TESTER_UUID, AUTH_KEY) + assert TESTER_UUID in val_err + assert ( + mock_patch_metadata.call_count == patch_metadata_calls + ) + else: + mock_embed_request.return_value = run_handler_json + if not status_valid: + mock_check_status.return_value = False + else: + mock_check_status.return_value = True + execute_metawflrun_handler(TESTER_UUID, AUTH_KEY) + assert mock_patch_metadata.call_count == patch_metadata_calls diff --git a/test/test_wfrutils_ff.py b/test/test_wfrutils_ff.py new file mode 100644 index 0000000..d690017 --- /dev/null +++ b/test/test_wfrutils_ff.py @@ -0,0 +1,185 @@ +from contextlib import contextmanager +from test.utils import patch_context +from typing import Iterator # , List, Any, Optional +from requests.exceptions import HTTPError + +import mock +import pytest + +import magma_ff.wfrutils as wfrutils_module +from magma_ff.wfrutils import ( + # FFWfrUtils, + FFMetaWfrUtils, +) + +from magma.magma_constants import * +from magma_ff.utils import JsonObject + +# TODO: add to constants file? +TEST_MWFR_ID_A = "test_uuid_a" +TEST_MWFR_ID_B = "test_uuid_b" +AUTH_KEY = {"server": "some_server"} +RANDOM_COST = 34.56 + +MWFR_A_PORTAL_OBJ = {UUID: TEST_MWFR_ID_A, FINAL_STATUS: PENDING, COST: RANDOM_COST} + +MWFR_B_PORTAL_OBJ = {UUID: TEST_MWFR_ID_B, FINAL_STATUS: RUNNING} + +CACHE_WITH_MWFR = {TEST_MWFR_ID_B: MWFR_B_PORTAL_OBJ} + + +@contextmanager +def patch_get_metadata(**kwargs) -> Iterator[mock.MagicMock]: + """Patch ff_utils.get_metadata call within FFMetaWfrUtils class.""" + with patch_context(wfrutils_module.ff_utils, "get_metadata", **kwargs) as mock_item: + yield mock_item + + +@contextmanager +def patch_meta_workflow_runs_cache(**kwargs) -> Iterator[mock.MagicMock]: + """Patch _meta_workflow_runs_cache property within FFMetaWfrUtils class.""" + with patch_context( + wfrutils_module.FFMetaWfrUtils, + "_meta_workflow_runs_cache", + new_callable=mock.PropertyMock, + **kwargs + ) as mock_item: + yield mock_item + + +@contextmanager +def patch_retrieve_meta_workflow_run(**kwargs) -> Iterator[mock.MagicMock]: + """Patch _retrieve_meta_workflow_run method within FFMetaWfrUtils class.""" + with patch_context( + wfrutils_module.FFMetaWfrUtils, "_retrieve_meta_workflow_run", **kwargs + ) as mock_item: + yield mock_item + + +class TestFFMetaWfrUtils: + """Tests for methods/properties for FFMetaWfrUtils class.""" + + def test_meta_workflow_runs_cache(self) -> None: + """ + Tests updates to _meta_workflow_runs_cache property. + """ + meta_workflow_runs_retriever = FFMetaWfrUtils(AUTH_KEY) + assert meta_workflow_runs_retriever._meta_workflow_runs_cache == {} + meta_workflow_runs_retriever._meta_workflow_runs_cache[ + TEST_MWFR_ID_B + ] = MWFR_B_PORTAL_OBJ + assert meta_workflow_runs_retriever._meta_workflow_runs_cache == CACHE_WITH_MWFR + meta_workflow_runs_retriever._meta_workflow_runs_cache[ + TEST_MWFR_ID_A + ] = MWFR_A_PORTAL_OBJ + assert len(meta_workflow_runs_retriever._meta_workflow_runs_cache) == 2 + + @pytest.mark.parametrize( + "meta_workflow_run_identifier, meta_workflow_run, in_cache, get_request_exception, cache_calls", + [ + ( + TEST_MWFR_ID_A, + MWFR_A_PORTAL_OBJ, + False, + False, + 2, + ), # successful GET from portal + ( + TEST_MWFR_ID_A, + MWFR_A_PORTAL_OBJ, + False, + True, + 1, + ), # unsuccessful GET from portal + ( + TEST_MWFR_ID_B, + MWFR_B_PORTAL_OBJ, + True, + False, + 2, + ), # MWFR already in the cache + ], + ) + def test_retrieve_meta_workflow_run( + self, + meta_workflow_run_identifier: str, + meta_workflow_run: JsonObject, + in_cache: bool, + get_request_exception: bool, + cache_calls: int, + ) -> None: + """ + Tests retrieval of MetaWorkflow Runs from portal, and addition to cache. + """ + with patch_get_metadata() as mock_get_metadata: + with patch_meta_workflow_runs_cache() as mock_cache: + meta_workflow_runs_retriever = FFMetaWfrUtils(AUTH_KEY) + if in_cache: + mock_cache.return_value = CACHE_WITH_MWFR + result = meta_workflow_runs_retriever._retrieve_meta_workflow_run( + meta_workflow_run_identifier + ) + assert result == meta_workflow_run + mock_get_metadata.assert_not_called() + assert mock_cache.call_count == cache_calls + else: + if get_request_exception: + mock_get_metadata.side_effect = Exception("oops") + with pytest.raises(HTTPError): + meta_workflow_runs_retriever._retrieve_meta_workflow_run( + meta_workflow_run_identifier + ) + assert mock_cache.call_count == cache_calls + else: + mock_get_metadata.return_value = meta_workflow_run + result = ( + meta_workflow_runs_retriever._retrieve_meta_workflow_run( + meta_workflow_run_identifier + ) + ) + assert mock_cache.call_count == cache_calls + assert result == meta_workflow_run + + @pytest.mark.parametrize( + "meta_workflow_run_identifier, meta_workflow_run, expected_status", + [ + (TEST_MWFR_ID_A, MWFR_A_PORTAL_OBJ, PENDING), + (TEST_MWFR_ID_B, MWFR_B_PORTAL_OBJ, RUNNING) + ], + ) + def test_get_meta_workflow_run_status( + self, + meta_workflow_run_identifier: str, + meta_workflow_run: JsonObject, + expected_status: str + ) -> None: + """ + Tests retrieval of a MetaWorkflow Run's status attribute from portal. + """ + with patch_retrieve_meta_workflow_run(return_value=meta_workflow_run): + meta_workflow_runs_retriever = FFMetaWfrUtils(AUTH_KEY) + result = meta_workflow_runs_retriever.get_meta_workflow_run_status(meta_workflow_run_identifier) + assert result == expected_status + + @pytest.mark.parametrize( + "meta_workflow_run_identifier, meta_workflow_run, expected_cost", + [ + (TEST_MWFR_ID_A, MWFR_A_PORTAL_OBJ, RANDOM_COST), + (TEST_MWFR_ID_B, MWFR_B_PORTAL_OBJ, float(0)) + ], + ) + def test_get_meta_workflow_run_cost( + self, + meta_workflow_run_identifier: str, + meta_workflow_run: JsonObject, + expected_cost: float + ) -> None: + """ + Tests retrieval of a MetaWorkflow Run's cost attribute from portal. + """ + with patch_retrieve_meta_workflow_run(return_value=meta_workflow_run): + meta_workflow_runs_retriever = FFMetaWfrUtils(AUTH_KEY) + result = meta_workflow_runs_retriever.get_meta_workflow_run_cost(meta_workflow_run_identifier) + assert result == expected_cost + assert isinstance(result, float) + \ No newline at end of file From 4f17490d1e2a9cf692331305950318f33dfa6967 Mon Sep 17 00:00:00 2001 From: vstevensf Date: Sat, 24 Jun 2023 19:39:51 -0400 Subject: [PATCH 38/38] Draft of checkstatus tests --- magma/magma_constants.py | 4 + magma/validated_dictionary.py | 6 +- magma_ff/checkstatus.py | 173 ++++++------ test/meta_workflow_run_handler_constants.py | 1 + test/test_checkstatus_ff.py | 284 +++++++++++++------- test/tester.py | 78 ++++++ 6 files changed, 364 insertions(+), 182 deletions(-) create mode 100644 test/tester.py diff --git a/magma/magma_constants.py b/magma/magma_constants.py index db31cc8..d178223 100644 --- a/magma/magma_constants.py +++ b/magma/magma_constants.py @@ -34,8 +34,12 @@ FAILED = "failed" STOPPED = "stopped" +INACTIVE = "inactive" +QC_FAIL = "quality metric failed" + #TODO: the following is here in case dup flag is added in the future +#TODO: add back in # MWFR_TO_HANDLER_STEP_STATUS_DICT = { # "pending": "pending", # "running": "running", diff --git a/magma/validated_dictionary.py b/magma/validated_dictionary.py index 5422f52..7021be8 100644 --- a/magma/validated_dictionary.py +++ b/magma/validated_dictionary.py @@ -32,9 +32,9 @@ def _validate_basic_attributes(self, *attributes_to_check): """ for attribute in attributes_to_check: try: - retrieved_attr = getattr(self, attribute) - if retrieved_attr is None: - raise AttributeError("attribute %s cannot have value 'None'." % attribute) + # retrieved_attr = getattr(self, attribute) + getattr(self, attribute) + # if retrieved_attr is None: # if not retrieved_attr --> for falsy values raise AttributeError("attribute %s cannot have value 'None'." % attribute) # TODO: add this to the pytests except AttributeError as e: raise AttributeError("Object validation error, {0}\n" diff --git a/magma_ff/checkstatus.py b/magma_ff/checkstatus.py index d7fb341..bcf7257 100644 --- a/magma_ff/checkstatus.py +++ b/magma_ff/checkstatus.py @@ -1,23 +1,14 @@ -#!/usr/bin/env python3 +from magma_ff.wfrutils import FFWfrUtils, FFMetaWfrUtils +from magma_ff.utils import JsonObject +from magma_ff.metawflrun_handler import MetaWorkflowRunHandler +from magma.checkstatus import AbstractCheckStatus -################################################ -# -# dcicutils wrapper -# -################################################ +from typing import List, Dict, Union, Any, Optional +from functools import cached_property -################################################ -# Libraries -################################################ +from magma.magma_constants import * -# magma -from magma.checkstatus import AbstractCheckStatus -from magma_ff.wfrutils import FFWfrUtils, FFMetaWfrUtils -from magma_ff.metawflrun_handler import MetaWorkflowRunHandler -################################################ -# CheckStatusFF -################################################ class CheckStatusFF(AbstractCheckStatus): """Customized CheckStatus class for the portal. """ @@ -65,17 +56,17 @@ def check_running(self): def get_uuid(self, jobid): """ """ - return self._ff.wfr_run_uuid(jobid) + return self.ff.wfr_run_uuid(jobid) def get_status(self, jobid): """ """ - return self._ff.wfr_run_status(jobid) + return self.ff.wfr_run_status(jobid) def get_output(self, jobid): """ """ - return self._ff.get_minimal_processed_output(jobid) + return self.ff.get_minimal_processed_output(jobid) @property def ff(self): @@ -88,95 +79,115 @@ def ff(self): #end class -################################################ -# CheckStatusRunHandlerFF -################################################ -#TODO: not using an abstract class -- will check on this later + class CheckStatusRunHandlerFF(object): """ - Customized CheckStatus class for MetaWorkflow Run Handler for the CGAP portal. + Customized CheckStatus class for MetaWorkflow Run Handler from the CGAP portal. """ - def __init__(self, mwfr_handler_input_dict, env=None): + def __init__(self, meta_workflow_run_handler: JsonObject, auth_key: JsonObject) -> None: """ - Initialize object and attributes. - - :param mwfr_handler_input_dict: MetaWorkflowRunHandler input dict - :type mwfr_handler_input_dict: dict - :param env: Name of the environment to use (e.g. fourfront-cgap) - :type env: str - """ - # Basic attributes - #TODO: may do this outside of this class for consistency - self.mwfr_handler_obj = MetaWorkflowRunHandler(mwfr_handler_input_dict) - - # Used for searching CGAP portal-related attributes - self._env = env - - # For FFMetaWfrUtils object - self._ff = FFMetaWfrUtils(self._env) + Initialize CheckStatusRunHandlerFF object. - @property - def status_map(self): + :param meta_workflow_run_handler: MetaWorkflowRunHandler input dict + :param auth_key: Authorization keys for C4 account """ - Mapping from MWFR portal final_status output to magma final_status. - """ - return { - "pending": "pending", - "running": "running", - "completed": "completed", - "failed": "failed", - "inactive": "pending", - "stopped": "stopped", - "quality metric failed": "failed" - } + self.meta_workflow_run_handler = meta_workflow_run_handler + self.auth_key = auth_key - def check_running_mwfr_steps(self): + def update_running_steps(self) -> Optional[Dict[str, Union[str, List[Any]]]]: """ - Check the currently running MetaWorkflowRun steps and update - statuses accordingly. - Returns a generator. clever. + For each running MetaWorkflow Run Step: + - updates status of that MetaWorkflow Run Step to its current portal output + - generates updated meta_workflow_runs array and final_status (of handler) + for MetaWorkflowRunHandler instance, yielded as + {final_status, meta_workflow_runs} for PATCHing """ # Iterate through list of running MetaWorkflow Run steps (array of objects) - for running_mwfr_step_name in self.mwfr_handler_obj.running_steps(): + for running_step_name in self.handler.running_steps(): # Get run uuid - run_uuid = self.mwfr_handler_obj.get_step_attr(running_mwfr_step_name, uuid) + run_step_uuid = self.handler.get_meta_workflow_run_step_attr(running_step_name, UUID) - # Check current status from MWF run name - status = self.status_map[self.get_mwfr_status(run_uuid)] + # Check current status of this MetaWorkflow Run step + curr_status = self.get_meta_workflow_run_step_status(run_step_uuid) - # Update run status no matter what - self.mwfr_handler_obj.update_meta_workflow_run_step(running_mwfr_step_name, "status", status) + # TODO: is there any case where a uuid of a "running" step doesn't exist? + # I don't think so but check with Doug - # Update run uuid regardless of the status - # if run_uuid: # some failed runs don't have run uuid - # self.wflrun_obj.update_attribute(run_obj.shard_name, 'workflow_run', run_uuid) - # TODO: what's good w a mwfr that failed and may not have uuid?? + # TODO: is there any way to catch traceback from Tibanna of a failed job? + # if so, can add attr to run handler schema to save these, otherwise it is + # manually searched/inspected (I imagine it is the latter) + # TODO: worry about other attrs at all (like uuid?) - if status == 'running': - yield None # yield None so that it doesn't terminate iteration + if curr_status == RUNNING: + yield None # yield None so iteration isn't terminated continue - # TODO: what about when failed? add to error attr (ik originally for just creation error but still) - # TODO: add part cost check/calculation here? tbd -- rn no, only checks running - # but actually that may work + # Update run status + self.handler.update_meta_workflow_run_step_obj(running_step_name, STATUS, curr_status) # Return the json to PATCH meta_workflow_runs and final_status in handler - yield {'final_status': self.mwfr_handler_obj.update_final_status(), - 'meta_workflow_runs': self.mwfr_handler_obj.update_meta_workflows_array()} + yield {FINAL_STATUS: self.handler.update_final_status(), + META_WORKFLOW_RUNS: self.handler.update_meta_workflow_runs_array()} + def updated_run_handler_cost(self) -> Dict[str, float]: + """ + For each running MetaWorkflow Run Step: + - retrieve its Tibanna cost from CGAP portal. Returns 0 if it doesn't have this attribute + - add this step's cost to the overall run handler cost + - once loop is completed, generates updated cost for MetaWorkflowRunHandler instance, + yielded as a dict for PATCHing on CGAP portal + """ + curr_cost = float(0) + for run_step_name in self.handler.meta_workflow_run_steps_dict: + # Get run uuid + run_step_uuid = self.handler.get_meta_workflow_run_step_attr(run_step_name, UUID) + # Get its cost and add to overall handler cost + run_step_cost = self.portal_run_attr_getter.get_meta_workflow_run_cost(run_step_uuid) + curr_cost += run_step_cost + # Return the json to PATCH cost attribute in handler + return {COST: curr_cost} + # TODO: is there actually any case where we don't need to check non-running + # steps for cost? other than when initializing cost of a newly created handler to 0... - def get_mwfr_status(self, mwfr_uuid): + def get_meta_workflow_run_step_status(self, meta_workflow_run_identifier: str) -> str: """ - using portal, gets final_status of given mwfr + Using the CGAP portal, gets the current status of given MetaWorkflow Run step. + + :param meta_workflow_run_identifier: Identifier (e.g. UUID, @id) for + MetaWorkflow Run to be searched + :return: the status of the specified MetaWorkflow Run """ - return self._ff.get_meta_wfr_current_status(mwfr_uuid) + current_status = self.portal_run_attr_getter.get_meta_workflow_run_status(meta_workflow_run_identifier) + return self.run_status_mapping[current_status] - def get_mwfr_cost(self, mwfr_uuid): + @property + def run_status_mapping(self) -> dict: """ - using portal, gets cost of given mwfr + Mapping from possible CGAP portal final_status value for a MetaWorkflow Run, + to possible status values for a MetaWorkflow Run step within a Run Handler, + according to CGAP schema for a Run Handler. """ - return self._ff.get_meta_wfr_cost(mwfr_uuid) \ No newline at end of file + #TODO: add this to constants + return { + PENDING: PENDING, + RUNNING: RUNNING, + COMPLETED: COMPLETED, + FAILED: FAILED, + INACTIVE: PENDING, + STOPPED: STOPPED, + QC_FAIL: FAILED + } + + @cached_property + def portal_run_attr_getter(self): + """Used for accessing status and cost attributes of MetaWorkflow Runs from CGAP portal.""" + return FFMetaWfrUtils(self.auth_key) + + @cached_property + def handler(self): + """Using JSON object of Run Handler from CGAP portal, create magma_ff MetaWorkflowRunHandler instance.""" + return MetaWorkflowRunHandler(self.meta_workflow_run_handler) \ No newline at end of file diff --git a/test/meta_workflow_run_handler_constants.py b/test/meta_workflow_run_handler_constants.py index 754c845..94ef6ba 100644 --- a/test/meta_workflow_run_handler_constants.py +++ b/test/meta_workflow_run_handler_constants.py @@ -189,6 +189,7 @@ def mwfr_handler_dict_generator(meta_workflow_runs_array): # Handlers currently running FIRST_STEP_RUNNING_ARRAY = [MWFR_B_RUNNING, MWFR_C_PENDING, MWFR_A_PENDING, MWFR_D_PENDING] +FIRST_STEP_COMPLETED_ARRAY = [MWFR_B_COMPLETED, MWFR_C_PENDING, MWFR_A_PENDING, MWFR_D_PENDING] RUNNING_MWFR_ARRAY = [MWFR_B_RUNNING, MWFR_C_RUNNING, MWFR_A_PENDING, MWFR_D_PENDING] RUNNING_MWFR_ARRAY_2 = [ MWFR_B_COMPLETED_W_LINKTO, diff --git a/test/test_checkstatus_ff.py b/test/test_checkstatus_ff.py index 4a65e07..6c58947 100644 --- a/test/test_checkstatus_ff.py +++ b/test/test_checkstatus_ff.py @@ -1,103 +1,191 @@ +from contextlib import contextmanager +from test.utils import patch_context +from typing import Iterator, List, Any + import json import mock +import pytest -from magma_ff import checkstatus +import magma_ff.checkstatus as checkstatus_module +from magma_ff.checkstatus import CheckStatusFF, CheckStatusRunHandlerFF from magma_ff import metawflrun as run_ff - -def test_CheckStatusFF(): - """This check does not actually connect to the portal. - It uses mocks for get_status and get_output - """ - with open('test/files/CGAP_WGS_trio_scatter_ff.run.json') as json_file: - data_wflrun = json.load(json_file) - - # fake that the first one is running - data_wflrun['workflow_runs'][0]['status'] = 'running' - data_wflrun['workflow_runs'][0]['jobid'] = 'somejobid' - - # Create MetaWorkflowRun object and check_running generator - wflrun_obj = run_ff.MetaWorkflowRun(data_wflrun) - cs = checkstatus.CheckStatusFF(wflrun_obj) - cr = cs.check_running() - - # mock get_status and get_output - with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_status', return_value='complete'): - with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_output', - return_value=[{'argument_name': 'raw_bam', 'files': 'abc'}]): - with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_uuid', return_value='run_uuid'): - res = next(cr) - - # check yielded result - assert len(res['workflow_runs']) == len(data_wflrun['workflow_runs']) # same as original - assert res['workflow_runs'][0] == {'name': 'workflow_bwa-mem_no_unzip-check', - 'workflow_run': 'run_uuid', - 'shard': '0:0', - 'jobid': 'somejobid', - 'status': 'completed', # changed from running to completed - 'output': [{'argument_name': 'raw_bam', 'files': 'abc'}]} # output is filled in - assert 'failed_jobs' not in res # if nothing failed, '' failed_jobs should not be in the patch dict - - -def test_CheckStatusFF_failed(): - """This check does not actually connect to the portal. - It uses mocks for get_status and get_output - """ - with open('test/files/CGAP_WGS_trio_scatter_ff.run.json') as json_file: - data_wflrun = json.load(json_file) - - # fake that the first one is running - data_wflrun['workflow_runs'][0]['status'] = 'running' - data_wflrun['workflow_runs'][0]['jobid'] = 'somejobid' - - # Create MetaWorkflowRun object and check_running generator - wflrun_obj = run_ff.MetaWorkflowRun(data_wflrun) - cs = checkstatus.CheckStatusFF(wflrun_obj) - cr = cs.check_running() - - # mock get_status and get_output - with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_status', return_value='error'): - with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_output', - return_value=[{'argument_name': 'raw_bam', 'files': 'abc'}]): - with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_uuid', return_value='run_uuid'): - res = next(cr) - - # check yielded result - assert len(res['workflow_runs']) == len(data_wflrun['workflow_runs']) # same as original - assert res['workflow_runs'][0] == {'name': 'workflow_bwa-mem_no_unzip-check', - 'workflow_run': 'run_uuid', - 'shard': '0:0', - 'jobid': 'somejobid', - 'status': 'failed'} # changed from running to failed, no output - assert res['failed_jobs'] == ['somejobid'] - - -def test_CheckStatusFF_running(): - """This check does not actually connect to the portal. - It uses mocks for get_status and get_output - """ - with open('test/files/CGAP_WGS_trio_scatter_ff.run.json') as json_file: - data_wflrun = json.load(json_file) - # fake that the first one is running - data_wflrun['workflow_runs'][0]['status'] = 'running' - data_wflrun['workflow_runs'][0]['jobid'] = 'somejobid' - # Create MetaWorkflowRun object and check_running generator - wflrun_obj = run_ff.MetaWorkflowRun(data_wflrun) - cs = checkstatus.CheckStatusFF(wflrun_obj) - cr = cs.check_running() - # Mock WorkflowRun with "started" status - with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_status', return_value='started'): - with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_output', - return_value=[{'argument_name': 'raw_bam', 'files': 'abc'}]): - with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_uuid', return_value='run_uuid'): - result = list(cr) - assert result == [] - - cr = cs.check_running() - # Mock WorkflowRun with "complete" status - with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_status', return_value='complete'): - with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_output', - return_value=[{'argument_name': 'raw_bam', 'files': 'abc'}]): - with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_uuid', return_value='run_uuid'): - result = list(cr) - assert len(result) == 1 +from magma_ff.utils import JsonObject + +from test.meta_workflow_run_handler_constants import * + +class TestCheckStatusFF: + def test_CheckStatusFF(self): + """This check does not actually connect to the portal. + It uses mocks for get_status and get_output + """ + with open('test/files/CGAP_WGS_trio_scatter_ff.run.json') as json_file: + data_wflrun = json.load(json_file) + + # fake that the first one is running + data_wflrun['workflow_runs'][0]['status'] = 'running' + data_wflrun['workflow_runs'][0]['jobid'] = 'somejobid' + + # Create MetaWorkflowRun object and check_running generator + wflrun_obj = run_ff.MetaWorkflowRun(data_wflrun) + cs = CheckStatusFF(wflrun_obj) + cr = cs.check_running() + + # mock get_status and get_output + with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_status', return_value='complete'): + with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_output', + return_value=[{'argument_name': 'raw_bam', 'files': 'abc'}]): + with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_uuid', return_value='run_uuid'): + res = next(cr) + + # check yielded result + assert len(res['workflow_runs']) == len(data_wflrun['workflow_runs']) # same as original + assert res['workflow_runs'][0] == {'name': 'workflow_bwa-mem_no_unzip-check', + 'workflow_run': 'run_uuid', + 'shard': '0:0', + 'jobid': 'somejobid', + 'status': 'completed', # changed from running to completed + 'output': [{'argument_name': 'raw_bam', 'files': 'abc'}]} # output is filled in + assert 'failed_jobs' not in res # if nothing failed, '' failed_jobs should not be in the patch dict + + + def test_CheckStatusFF_failed(self): + """This check does not actually connect to the portal. + It uses mocks for get_status and get_output + """ + with open('test/files/CGAP_WGS_trio_scatter_ff.run.json') as json_file: + data_wflrun = json.load(json_file) + + # fake that the first one is running + data_wflrun['workflow_runs'][0]['status'] = 'running' + data_wflrun['workflow_runs'][0]['jobid'] = 'somejobid' + + # Create MetaWorkflowRun object and check_running generator + wflrun_obj = run_ff.MetaWorkflowRun(data_wflrun) + cs = CheckStatusFF(wflrun_obj) + cr = cs.check_running() + + # mock get_status and get_output + with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_status', return_value='error'): + with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_output', + return_value=[{'argument_name': 'raw_bam', 'files': 'abc'}]): + with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_uuid', return_value='run_uuid'): + res = next(cr) + + # check yielded result + assert len(res['workflow_runs']) == len(data_wflrun['workflow_runs']) # same as original + assert res['workflow_runs'][0] == {'name': 'workflow_bwa-mem_no_unzip-check', + 'workflow_run': 'run_uuid', + 'shard': '0:0', + 'jobid': 'somejobid', + 'status': 'failed'} # changed from running to failed, no output + assert res['failed_jobs'] == ['somejobid'] + + + def test_CheckStatusFF_running(self): + """This check does not actually connect to the portal. + It uses mocks for get_status and get_output + """ + with open('test/files/CGAP_WGS_trio_scatter_ff.run.json') as json_file: + data_wflrun = json.load(json_file) + # fake that the first one is running + data_wflrun['workflow_runs'][0]['status'] = 'running' + data_wflrun['workflow_runs'][0]['jobid'] = 'somejobid' + # Create MetaWorkflowRun object and check_running generator + wflrun_obj = run_ff.MetaWorkflowRun(data_wflrun) + cs = CheckStatusFF(wflrun_obj) + cr = cs.check_running() + # Mock WorkflowRun with "started" status + with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_status', return_value='started'): + with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_output', + return_value=[{'argument_name': 'raw_bam', 'files': 'abc'}]): + with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_uuid', return_value='run_uuid'): + result = list(cr) + assert result == [] + + cr = cs.check_running() + # Mock WorkflowRun with "complete" status + with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_status', return_value='complete'): + with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_output', + return_value=[{'argument_name': 'raw_bam', 'files': 'abc'}]): + with mock.patch('magma_ff.checkstatus.CheckStatusFF.get_uuid', return_value='run_uuid'): + result = list(cr) + assert len(result) == 1 + +################################################################## +AUTH_KEY = {"server": "some_server"} + +@contextmanager +def patch_get_meta_workflow_run_status(**kwargs) -> Iterator[mock.MagicMock]: + """Patch _meta_workflow_runs_cache property within FFMetaWfrUtils class.""" + with patch_context( + checkstatus_module.FFMetaWfrUtils, + "get_meta_workflow_run_status", + # new_callable=mock.PropertyMock, + **kwargs + ) as mock_item: + yield mock_item + +class TestCheckStatusRunHandlerFF: + """Testing for customized CheckStatus class for MetaWorkflow Run Handler (CGAP portal).""" + + @pytest.mark.parametrize( + "portal_run_status, expected_value", + [ + (PENDING, PENDING), + (RUNNING, RUNNING), + (COMPLETED, COMPLETED), + (FAILED, FAILED), + (INACTIVE, PENDING), + (STOPPED, STOPPED), + (QC_FAIL, FAILED), + ], + ) + def test_get_meta_workflow_run_step_status( + self, portal_run_status: str, expected_value: str + ) -> None: + """ + Tests retrieval of MetaWorkflow Run status from portal, and status mapping to magma. + """ + with patch_get_meta_workflow_run_status() as mock_get_status: + mock_get_status.return_value = portal_run_status + returned_step_status = CheckStatusRunHandlerFF(HANDLER_PENDING, AUTH_KEY).get_meta_workflow_run_step_status("tester") + assert returned_step_status == expected_value + + @pytest.mark.parametrize( + "run_handler, orig_final_status, yielded_statuses, yielded_mwf_run_arrays", + [ + ( + HANDLER_STEPS_RUNNING, + RUNNING, + [COMPLETED], + [FIRST_STEP_COMPLETED_ARRAY], + ) + ], + ) + def test_update_running_steps( + self, + run_handler: JsonObject, + orig_final_status: str, + yielded_statuses: List[str], + yielded_mwf_run_arrays: List[List[Any]], + ) -> None: + """ + Tests generator of dictionaries used to PATCH running MetaWorkflow Runs + and the final status of the overall MetaWorkflow Run Handler. + """ + status_checker = CheckStatusRunHandlerFF(run_handler, AUTH_KEY) + assert ( + getattr(status_checker.handler, FINAL_STATUS) == orig_final_status + ) + + with patch_get_meta_workflow_run_status() as mock_get_status: + mock_get_status.side_effect = yielded_statuses + patch_dict_generator = ( + status_checker.update_running_steps() + ) + import pdb; pdb.set_trace() + assert len(yielded_statuses) == len(list(patch_dict_generator)) + for idx, step in enumerate(patch_dict_generator): + assert step[FINAL_STATUS] == yielded_statuses[idx] + assert step[META_WORKFLOW_RUNS] == yielded_mwf_run_arrays[idx] diff --git a/test/tester.py b/test/tester.py new file mode 100644 index 0000000..eae4eb1 --- /dev/null +++ b/test/tester.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 + +### python3 -m test.tester + +################################################################# +# Libraries +################################################################# + +import mock +from contextlib import contextmanager + +from typing import Iterator + +from test.utils import patch_context +# from magma.magma_constants import * +import magma_ff.run_metawflrun_handler as run_metaworkflow_run_handler_module +from magma_ff.run_metawflrun_handler import ( + ExecuteMetaWorkflowRunHandler, + execute_metawflrun_handler, +) + +# import magma_ff.create_metawfr as create_metaworkflow_run_module +from magma_ff.create_metawfr import ( + create_meta_workflow_run, + MetaWorkflowRunCreationError, +) + +from test.meta_workflow_run_handler_constants import * + +from magma_ff.metawflrun_handler import MetaWorkflowRunHandler +from magma.metawflrun_handler import MetaWorkflowRunStep + +META_WORKFLOW_RUN_HANDLER_UUID = "meta_workflow_run_handler_tester_uuid" +AUTH_KEY = {"server": "some_server"} + +@contextmanager +def patch_get_metadata(**kwargs) -> Iterator[mock.MagicMock]: + """Patch ff_utils.get_metadata call within MetaWorkflowRunHAndlerFromItem class.""" + with patch_context( + create_metaworkflow_run_module.ff_utils, "get_metadata", **kwargs + ) as mock_item: + yield mock_item + + +@contextmanager +def patch_create_meta_workflow_run(**kwargs) -> Iterator[mock.MagicMock]: + """Patch ff_utils.post_metadata call within MetaWorkflowRunHAndlerFromItem class.""" + with patch_context( + run_metaworkflow_run_handler_module, "create_meta_workflow_run", **kwargs + ) as mock_item: + yield mock_item + + +@contextmanager +def patch_create_and_update_meta_workflow_run_step( + **kwargs, +) -> Iterator[mock.MagicMock]: + """Patch ff_utils.post_metadata call within MetaWorkflowRunHAndlerFromItem class.""" + with patch_context( + run_metaworkflow_run_handler_module, + "_create_and_update_meta_workflow_run_step", + **kwargs, + ) as mock_item: + yield mock_item + +############################ +if __name__ == "__main__": + + with patch_create_meta_workflow_run(return_value=TEST_MWFR_SIMPLE_GET_OUTPUT) as mock_create_mwfr: + execution_generator = ExecuteMetaWorkflowRunHandler( + HANDLER_PENDING, AUTH_KEY + ) + generatorr = execution_generator.generator_of_created_meta_workflow_run_steps() + # import pdb; pdb.set_trace() + for idx, step in enumerate(generatorr): + print(idx) + print(step) + print() \ No newline at end of file