diff --git a/src/sasctl/_services/model_repository.py b/src/sasctl/_services/model_repository.py index 0971a125..37a4534f 100644 --- a/src/sasctl/_services/model_repository.py +++ b/src/sasctl/_services/model_repository.py @@ -359,9 +359,10 @@ def create_model( model["scoreCodeType"] = score_code_type or model.get("scoreCodeType") model["trainTable"] = training_table or model.get("trainTable") - model["classificationEventProbabilityVariableName"] = ( - event_prob_variable - or model.get("classificationEventProbabilityVariableName") + model[ + "classificationEventProbabilityVariableName" + ] = event_prob_variable or model.get( + "classificationEventProbabilityVariableName" ) model["classificationTargetEventValue"] = event_target_value or model.get( "classificationTargetEventValue" diff --git a/src/sasctl/pzmm/write_json_files.py b/src/sasctl/pzmm/write_json_files.py index 6995df63..c6280006 100644 --- a/src/sasctl/pzmm/write_json_files.py +++ b/src/sasctl/pzmm/write_json_files.py @@ -57,8 +57,8 @@ class NpEncoder(json.JSONEncoder): LIFT = "dmcas_lift.json" MAXDIFFERENCES = "maxDifferences.json" GROUPMETRICS = "groupMetrics.json" -VARIMPORTANCES = 'dmcas_relativeimportance.json' -MISC = 'dmcas_misc.json' +VARIMPORTANCES = "dmcas_relativeimportance.json" +MISC = "dmcas_misc.json" def _flatten(nested_list: Iterable) -> Generator[Any, None, None]: @@ -1175,8 +1175,8 @@ def calculate_model_statistics( train_data: Union[DataFrame, List[list], Type["numpy.array"]] = None, test_data: Union[DataFrame, List[list], Type["numpy.array"]] = None, json_path: Union[str, Path, None] = None, - target_type: str = "classification", - cutoff: Optional[float] = None + target_type: str = "classification", + cutoff: Optional[float] = None, ) -> Union[dict, None]: """ Calculates fit statistics (including ROC and Lift curves) from datasets and then @@ -1266,7 +1266,7 @@ def calculate_model_statistics( data, casout={"name": "assess_dataset", "replace": True, "caslib": "Public"}, ) - if target_type == 'classification': + if target_type == "classification": conn.percentile.assess( table={"name": "assess_dataset", "caslib": "Public"}, response="predict", @@ -1284,7 +1284,7 @@ def calculate_model_statistics( response="predict", inputs="actual", fitStatOut={"name": "FitStat", "replace": True, "caslib": "Public"}, - casout={"name": "Lift", "replace": True, "caslib": "Public"} + casout={"name": "Lift", "replace": True, "caslib": "Public"}, ) fitstat_dict = ( @@ -1295,21 +1295,21 @@ def calculate_model_statistics( ) json_dict[0]["data"][i]["dataMap"].update(fitstat_dict) - if target_type == 'classification': + if target_type == "classification": roc_df = pd.DataFrame(conn.CASTable("ROC", caslib="Public").to_frame()) roc_dict = cls.apply_dataframe_to_json(json_dict[1]["data"], i, roc_df) for j in range(len(roc_dict)): json_dict[1]["data"][j].update(roc_dict[j]) - if(roc_dict[j]["dataMap"]["_KS_"] == 1): + if roc_dict[j]["dataMap"]["_KS_"] == 1: fitstat_data = { "_KS_": roc_dict[j]["dataMap"]["_KS_"], "_KS2_": roc_dict[j]["dataMap"]["_KS2_"], "_C_": roc_dict[j]["dataMap"]["_C_"], "_Gini_": roc_dict[j]["dataMap"]["_Gini_"], "_Gamma_": roc_dict[j]["dataMap"]["_Gamma_"], - "_Tau_": roc_dict[j]["dataMap"]["_Tau_"] + "_Tau_": roc_dict[j]["dataMap"]["_Tau_"], } - + json_dict[0]["data"][i]["dataMap"].update(fitstat_data) lift_df = pd.DataFrame(conn.CASTable("Lift", caslib="Public").to_frame()) @@ -1321,14 +1321,16 @@ def calculate_model_statistics( for i, name in enumerate([FITSTAT, ROC, LIFT]): if not (name == ROC and target_type == "prediction"): with open(Path(json_path) / name, "w") as json_file: - json_file.write(json.dumps(json_dict[i], indent=4, cls=NpEncoder)) + json_file.write( + json.dumps(json_dict[i], indent=4, cls=NpEncoder) + ) if cls.notebook_output: print( f"{name} was successfully written and saved to " f"{Path(json_path) / name}" ) else: - if target_type == 'classification': + if target_type == "classification": return { FITSTAT: json.dumps(json_dict[0], indent=4, cls=NpEncoder), ROC: json.dumps(json_dict[1], indent=4, cls=NpEncoder), @@ -2251,8 +2253,8 @@ def generate_model_card( ): """ Generates everything required for the model card feature within SAS Model Manager. - - This includes uploading the training data to CAS, updating ModelProperties.json to have + + This includes uploading the training data to CAS, updating ModelProperties.json to have some extra properties, and generating dmcas_relativeimportance.json. Parameters @@ -2265,7 +2267,7 @@ def generate_model_card( a dictionary containing the contents of all the model files. algorithm : str The name of the algorithm used to generate the model. - train_data: pandas.DataFrame + train_data: pandas.DataFrame Training data that contains all input variables as well as the target variable. train_predictions : pandas.Series, list List of predictions made by the model on the training data. @@ -2281,10 +2283,10 @@ def generate_model_card( A list of classification variables. The default value is an empty list. selection_statistic: str, optional The selection statistic chosen to score the model against other models. Classification - models can take any of the following values: "_RASE_", "_GINI_", "_GAMMA_", "_MCE_", - "_ASE_", "_MCLL_", "_KS_", "_KSPostCutoff_", "_DIV_", "_TAU_", "_KSCut_", or "_C_". - Prediction models can take any of the following values: "_ASE_", "_DIV_", "_RASE_", "_MAE_", - "_RMAE_", "_MSLE_", "_RMSLE_" The default value is "_KS_" for classification models and + models can take any of the following values: "_RASE_", "_GINI_", "_GAMMA_", "_MCE_", + "_ASE_", "_MCLL_", "_KS_", "_KSPostCutoff_", "_DIV_", "_TAU_", "_KSCut_", or "_C_". + Prediction models can take any of the following values: "_ASE_", "_DIV_", "_RASE_", "_MAE_", + "_RMAE_", "_MSLE_", "_RMSLE_" The default value is "_KS_" for classification models and "_ASE_" for prediction models. server: str, optional The CAS server the training data will be stored on. The default value is "cas-shared-default" @@ -2301,9 +2303,9 @@ def generate_model_card( "Only classification and prediction target types are currently accepted." ) if selection_statistic is None: - if target_type is 'classification': - selection_statistic = '_KS_' - elif target_type is 'prediction': + if target_type is "classification": + selection_statistic = "_KS_" + elif target_type is "prediction": selection_statistic = "_ASE_" if selection_statistic not in cls.valid_params: raise RuntimeError( @@ -2322,14 +2324,10 @@ def generate_model_card( "The `swat` package is required to generate fit statistics, ROC, and " "Lift charts with the calculate_model_statistics function." ) - + # Upload training table to CAS. The location of the training table is returned. training_table = cls.upload_training_data( - conn, - model_prefix, - train_data, - server, - caslib + conn, model_prefix, train_data, server, caslib ) # Generates the event percentage for Classification targets, and the event average @@ -2338,14 +2336,16 @@ def generate_model_card( train_data=train_data, input_variables=interval_vars + class_vars, target_type=target_type, - target_value=target_value + target_value=target_value, ) - + # Formats all new ModelProperties information into one dictionary that can be used to update the json file - update_dict['trainTable'] = training_table - update_dict['selectionStatistic'] = selection_statistic - update_dict['algorithm'] = algorithm - update_dict['selectionStatisticValue'] = cls.get_selection_statistic_value(model_files, selection_statistic) + update_dict["trainTable"] = training_table + update_dict["selectionStatistic"] = selection_statistic + update_dict["algorithm"] = algorithm + update_dict["selectionStatisticValue"] = cls.get_selection_statistic_value( + model_files, selection_statistic + ) cls.update_model_properties(model_files, update_dict) # Generates dmcas_relativeimportance.json file @@ -2357,23 +2357,20 @@ def generate_model_card( target_type, interval_vars, class_vars, - caslib + caslib, ) # Generates dmcas_misc.json file - cls.generate_misc( - conn, - model_files - ) - + cls.generate_misc(conn, model_files) + @staticmethod def upload_training_data( conn, model_prefix: str, train_data: pd.DataFrame, server: str = "cas-shared-default", - caslib: str = 'Public' - ): + caslib: str = "Public", + ): """ Uploads training data to CAS server. @@ -2384,7 +2381,7 @@ def upload_training_data( model_prefix : string The prefix used to name files relating to the model. This is used to provide a unique name to the training data table when it is uploaded to CAS. - train_data: pandas.DataFrame + train_data: pandas.DataFrame Training data that contains all input variables as well as the target variable. server: str, optional The CAS server the training data will be stored on. The default value is "cas-shared-default" @@ -2395,29 +2392,27 @@ def upload_training_data( ------- string Returns a string that represents the location of the training table within CAS. - """ + """ # Upload raw training data to caslib so that data can be analyzed train_data_name = model_prefix + "_train_data" upload_train_data = conn.upload( - train_data, - casout={"name": train_data_name, "caslib": caslib}, - promote=True + train_data, casout={"name": train_data_name, "caslib": caslib}, promote=True ) if upload_train_data.status is not None: raise RuntimeError( - f'A table with the name {train_data_name} already exists in the specified caslib. Please ' - 'either delete/rename the old table or give a new name to the current table.' + f"A table with the name {train_data_name} already exists in the specified caslib. Please " + "either delete/rename the old table or give a new name to the current table." ) - - return server + '/' + caslib + '/' + train_data_name - + + return server + "/" + caslib + "/" + train_data_name + @staticmethod def generate_outcome_average( train_data: pd.DataFrame, input_variables: list, target_type, - target_value: Union[str, int, float] = None + target_value: Union[str, int, float] = None, ): """ Generates the outcome average of the training data. For prediction targets, the event average @@ -2425,9 +2420,9 @@ def generate_outcome_average( Parameters ---------- - train_data: pandas.DataFrame - Training data that contains all input variables as well as the target variable. If multiple - non-input variables are included, the function will assume that the first non-input variable row + train_data: pandas.DataFrame + Training data that contains all input variables as well as the target variable. If multiple + non-input variables are included, the function will assume that the first non-input variable row is the output. input_variables: list A list of all input variables used by the model. Used to isolate the output variable. @@ -2443,22 +2438,28 @@ def generate_outcome_average( Returns a dictionary with a key value pair that represents the outcome average. """ import numbers + output_var = train_data.drop(input_variables, axis=1) if target_type == "classification": value_counts = output_var[output_var.columns[0]].value_counts() - return {'eventPercentage': value_counts[target_value]/sum(value_counts)} + return {"eventPercentage": value_counts[target_value] / sum(value_counts)} elif target_type == "prediction": - if not isinstance(output_var[output_var.columns[0]].iloc[0], numbers.Number): - raise ValueError("Detected output column is not numeric. Please ensure that " + - "the correct output column is being passed, and that no extra columns " + - "are in front of the output column. This function assumes that the first " + - "non-input column is the output column.jf") - return {'eventAverage': sum(output_var[output_var.columns[0]]) / len(output_var)} + if not isinstance( + output_var[output_var.columns[0]].iloc[0], numbers.Number + ): + raise ValueError( + "Detected output column is not numeric. Please ensure that " + + "the correct output column is being passed, and that no extra columns " + + "are in front of the output column. This function assumes that the first " + + "non-input column is the output column.jf" + ) + return { + "eventAverage": sum(output_var[output_var.columns[0]]) / len(output_var) + } @staticmethod def get_selection_statistic_value( - model_files: Union[str, Path, dict], - selection_statistic: str = "_GINI_" + model_files: Union[str, Path, dict], selection_statistic: str = "_GINI_" ): """ Finds the value of the chosen selection statistic in dmcas_fitstat.json, which should have been @@ -2470,7 +2471,7 @@ def get_selection_statistic_value( Either the directory location of the model files (string or Path object), or a dictionary containing the contents of all the model files. selection_statistic: str, optional - The selection statistic chosen to score the model against other models. Can be any of the + The selection statistic chosen to score the model against other models. Can be any of the following values: "_RASE_", "_NObs_", "_GINI_", "_GAMMA_", "_MCE_", "_ASE_", "_MCLL_", "_KS_", "_KSPostCutoff_", "_DIV_", "_TAU_", "_KSCut_", or "_C_". The default value is "_GINI_". @@ -2485,36 +2486,39 @@ def get_selection_statistic_value( "The dmcas_fitstat.json file must be generated before the model card data " "can be generated." ) - for fitstat in model_files[FITSTAT]['data']: - if fitstat['dataMap']['_DataRole_'] == "TRAIN": - if selection_statistic not in fitstat['dataMap'] or fitstat['dataMap'][selection_statistic] == None: + for fitstat in model_files[FITSTAT]["data"]: + if fitstat["dataMap"]["_DataRole_"] == "TRAIN": + if ( + selection_statistic not in fitstat["dataMap"] + or fitstat["dataMap"][selection_statistic] == None + ): raise RuntimeError( "The chosen selection statistic was not generated properly. Please ensure the value has been " "properly created then try again." ) - return fitstat['dataMap'][selection_statistic] + return fitstat["dataMap"][selection_statistic] else: if not Path.exists(Path(model_files) / FITSTAT): raise RuntimeError( "The dmcas_fitstat.json file must be generated before the model card data " "can be generated." ) - with open(Path(model_files) / FITSTAT, 'r') as fitstat_json: + with open(Path(model_files) / FITSTAT, "r") as fitstat_json: fitstat_dict = json.load(fitstat_json) - for fitstat in fitstat_dict['data']: - if fitstat['dataMap']['_DataRole_'] == "TRAIN": - if selection_statistic not in fitstat['dataMap'] or fitstat['dataMap'][selection_statistic] == None: + for fitstat in fitstat_dict["data"]: + if fitstat["dataMap"]["_DataRole_"] == "TRAIN": + if ( + selection_statistic not in fitstat["dataMap"] + or fitstat["dataMap"][selection_statistic] == None + ): raise RuntimeError( "The chosen selection statistic was not generated properly. Please ensure the value has been " "properly created then try again." ) - return fitstat['dataMap'][selection_statistic] + return fitstat["dataMap"][selection_statistic] @staticmethod - def update_model_properties( - model_files, - update_dict - ): + def update_model_properties(model_files, update_dict): """ Updates the ModelProperties.json file to include properties listed in the update_dict dictionary. @@ -2532,7 +2536,7 @@ def update_model_properties( raise RuntimeError( "The ModelProperties.json file must be generated before the model card data " "can be generated." - ) + ) for key in update_dict: if not isinstance(update_dict[key], str): model_files[PROP][key] = str(round(update_dict[key], 14)) @@ -2544,7 +2548,7 @@ def update_model_properties( "The ModelProperties.json file must be generated before the model card data " "can be generated." ) - with open(Path(model_files) / PROP, 'r+') as properties_json: + with open(Path(model_files) / PROP, "r+") as properties_json: model_properties = json.load(properties_json) for key in update_dict: if not isinstance(update_dict[key], str): @@ -2552,7 +2556,9 @@ def update_model_properties( else: model_properties[key] = update_dict[key] properties_json.seek(0) - properties_json.write(json.dumps(model_properties, indent=4, cls=NpEncoder)) + properties_json.write( + json.dumps(model_properties, indent=4, cls=NpEncoder) + ) properties_json.truncate() @classmethod @@ -2577,7 +2583,7 @@ def generate_variable_importance( model_files : string, Path, or dict Either the directory location of the model files (string or Path object), or a dictionary containing the contents of all the model files. - train_data: pandas.DataFrame + train_data: pandas.DataFrame Training data that contains all input variables as well as the target variable. train_predictions : pandas.Series, list List of predictions made by the model on the training data. @@ -2596,112 +2602,128 @@ def generate_variable_importance( # Upload scored training data to run variable importance on x_train_data.insert(0, "Prediction", train_predictions, True) conn.upload( - x_train_data, - casout={"name": "train_data", "replace": True, "caslib": caslib} + x_train_data, + casout={"name": "train_data", "replace": True, "caslib": caslib}, ) # Load actionset necessary to generate variable importance - conn.loadactionset('dataPreprocess') + conn.loadactionset("dataPreprocess") request_packages = list() if target_type == "classification": method = "DTREE" treeCrit = "Entropy" elif target_type == "interval": method = "RTREE" - treeCrit = 'RSS' + treeCrit = "RSS" else: raise RuntimeError( "The selected model type is unsupported. Currently, only models that have prediction or classification target types are supported." ) request_packages = list() if interval_vars: - request_packages.append({ - "name": 'BIN', - "inputs": [{"name": var} for var in interval_vars], - "targets": [{"name": "Prediction"}], - "discretize": { - "method": method, - "arguments": { - "minNBins": 1, - "maxNBins": 8, - "treeCrit": treeCrit, - "contingencyTblOpts":{"inputsMethod": 'BUCKET', "inputsNLevels": 100}, - "overrides": {"minNObsInBin": 5, "binMissing": True, "noDataLowerUpperBound": True} - } + request_packages.append( + { + "name": "BIN", + "inputs": [{"name": var} for var in interval_vars], + "targets": [{"name": "Prediction"}], + "discretize": { + "method": method, + "arguments": { + "minNBins": 1, + "maxNBins": 8, + "treeCrit": treeCrit, + "contingencyTblOpts": { + "inputsMethod": "BUCKET", + "inputsNLevels": 100, + }, + "overrides": { + "minNObsInBin": 5, + "binMissing": True, + "noDataLowerUpperBound": True, + }, + }, + }, } - }) + ) if class_vars: - request_packages.append({ - "name": 'BIN_NOM', - "inputs": [{"name": var} for var in class_vars], - "targets": [{"name": "Prediction"}], - "catTrans": { - "method": method, - "arguments": { - "minNBins": 1, - "maxNBins": 8, - "treeCrit": treeCrit, - "overrides": {"minNObsInBin": 5, "binMissing": True} - } + request_packages.append( + { + "name": "BIN_NOM", + "inputs": [{"name": var} for var in class_vars], + "targets": [{"name": "Prediction"}], + "catTrans": { + "method": method, + "arguments": { + "minNBins": 1, + "maxNBins": 8, + "treeCrit": treeCrit, + "overrides": {"minNObsInBin": 5, "binMissing": True}, + }, + }, } - }) + ) var_data = conn.dataPreprocess.transform( table={"name": "train_data", "caslib": caslib}, requestPackages=request_packages, evaluationStats=True, - percentileMaxIterations=10, - percentileTolerance=0.00001, - distinctCountLimit=5000, - sasVarNameLength=True, + percentileMaxIterations=10, + percentileTolerance=0.00001, + distinctCountLimit=5000, + sasVarNameLength=True, outputTableOptions={"inputVarPrintOrder": True}, - sasProcClient=True + sasProcClient=True, ) - var_importances = var_data['VarTransInfo'][['Variable', 'RelVarImportance']] - var_importances = var_importances.sort_values(by=['RelVarImportance'], ascending=False).reset_index(drop=True) + var_importances = var_data["VarTransInfo"][["Variable", "RelVarImportance"]] + var_importances = var_importances.sort_values( + by=["RelVarImportance"], ascending=False + ).reset_index(drop=True) relative_importances = list() for index, row in var_importances.iterrows(): - if row['Variable'] in interval_vars: + if row["Variable"] in interval_vars: level = "INTERVAL" - elif row['Variable'] in class_vars: + elif row["Variable"] in class_vars: level = "NOMINAL" - relative_importances.append({ - "dataMap" : { - "LABEL": "", - "LEVEL": level, - "ROLE": "INPUT", - "RelativeImportance": str(row['RelVarImportance']), - "Variable": row['Variable'] - }, - "rowNumber": index+1 - }) + relative_importances.append( + { + "dataMap": { + "LABEL": "", + "LEVEL": level, + "ROLE": "INPUT", + "RelativeImportance": str(row["RelVarImportance"]), + "Variable": row["Variable"], + }, + "rowNumber": index + 1, + } + ) json_template_path = ( Path(__file__).resolve().parent / f"template_files/{VARIMPORTANCES}" ) - with open(json_template_path, 'r') as f: + with open(json_template_path, "r") as f: relative_importance_json = json.load(f) - relative_importance_json['data'] = relative_importances + relative_importance_json["data"] = relative_importances if isinstance(model_files, dict): - model_files[VARIMPORTANCES] = json.dumps(relative_importance_json, indent=4, cls=NpEncoder) + model_files[VARIMPORTANCES] = json.dumps( + relative_importance_json, indent=4, cls=NpEncoder + ) if cls.notebook_output: print( f"{VARIMPORTANCES} was successfully written and saved to " f"model files dictionary." ) else: - with open(Path(model_files) / VARIMPORTANCES, 'w') as json_file: - json_file.write(json.dumps(relative_importance_json, indent=4, cls=NpEncoder)) + with open(Path(model_files) / VARIMPORTANCES, "w") as json_file: + json_file.write( + json.dumps(relative_importance_json, indent=4, cls=NpEncoder) + ) if cls.notebook_output: print( f"{VARIMPORTANCES} was successfully written and saved to " f"{Path(model_files) / VARIMPORTANCES}" ) - + @classmethod - def generate_misc( - cls, - model_files: Union[str, Path, dict] - ): + def generate_misc(cls, model_files: Union[str, Path, dict]): """ Generates the dmcas_relativeimportance.json file, which is used to determine variable importance @@ -2718,7 +2740,7 @@ def generate_misc( raise RuntimeError( "The ModelProperties.json file must be generated before the model card data " "can be generated." - ) + ) roc_table = model_files[ROC] else: if not Path.exists(Path(model_files) / ROC): @@ -2726,35 +2748,41 @@ def generate_misc( "The ModelProperties.json file must be generated before the model card data " "can be generated." ) - with open(Path(model_files) / ROC, 'r') as roc_file: + with open(Path(model_files) / ROC, "r") as roc_file: roc_table = json.load(roc_file) correct_text = ["CORRECT", "INCORRECT", "CORRECT", "INCORRECT"] - outcome_values = ['1', '0', '0', '1'] + outcome_values = ["1", "0", "0", "1"] misc_data = list() # Iterates through ROC table to get TRAIN, TEST, and VALIDATE data with a cutoff of .5 for i in range(50, 300, 100): - roc_data = roc_table['data'][i]['dataMap'] - correctness_values = [roc_data['_TP_'], roc_data['_FP_'], roc_data['_TN_'], roc_data['_FN_']] - for (c_text, c_val, o_val) in zip(correct_text, correctness_values, outcome_values): + roc_data = roc_table["data"][i]["dataMap"] + correctness_values = [ + roc_data["_TP_"], + roc_data["_FP_"], + roc_data["_TN_"], + roc_data["_FN_"], + ] + for c_text, c_val, o_val in zip( + correct_text, correctness_values, outcome_values + ): misc_data.append( { "dataMap": { "CorrectText": c_text, "Outcome": o_val, "_Count_": c_val, - "_DataRole_": roc_data['_DataRole_'], + "_DataRole_": roc_data["_DataRole_"], "_cutoffSource_": "Default", - "_cutoff_": "0.5" + "_cutoff_": "0.5", }, - "rowNumber": len(misc_data) + 1 - }) - - json_template_path = ( - Path(__file__).resolve().parent / f"template_files/{MISC}" - ) - with open(json_template_path, 'r') as f: + "rowNumber": len(misc_data) + 1, + } + ) + + json_template_path = Path(__file__).resolve().parent / f"template_files/{MISC}" + with open(json_template_path, "r") as f: misc_json = json.load(f) - misc_json['data'] = misc_data + misc_json["data"] = misc_data if isinstance(model_files, dict): model_files[MISC] = json.dumps(misc_json, indent=4, cls=NpEncoder) @@ -2764,10 +2792,10 @@ def generate_misc( f"model files dictionary." ) else: - with open(Path(model_files) / MISC, 'w') as json_file: + with open(Path(model_files) / MISC, "w") as json_file: json_file.write(json.dumps(misc_json, indent=4, cls=NpEncoder)) if cls.notebook_output: print( f"{MISC} was successfully written and saved to " f"{Path(model_files) / MISC}" - ) \ No newline at end of file + ) diff --git a/src/sasctl/pzmm/write_score_code.py b/src/sasctl/pzmm/write_score_code.py index 4a57fbe2..615b4dd4 100644 --- a/src/sasctl/pzmm/write_score_code.py +++ b/src/sasctl/pzmm/write_score_code.py @@ -476,7 +476,6 @@ def _write_imports( """ - @classmethod def _viya35_model_load( cls, diff --git a/tests/unit/test_write_json_files.py b/tests/unit/test_write_json_files.py index 19cbd4f9..898bfb58 100644 --- a/tests/unit/test_write_json_files.py +++ b/tests/unit/test_write_json_files.py @@ -46,9 +46,11 @@ {"name": "REASON_HomeImp", "type": "integer"}, ] + class BadModel: attr = None + @pytest.fixture def bad_model(): return BadModel() @@ -78,6 +80,7 @@ def sklearn_model(train_data): model.fit(X, y) return model + @pytest.fixture def change_dir(): """Change working directory for the duration of the test.""" @@ -888,15 +891,16 @@ def test_errors(self): class TestModelCardGeneration(unittest.TestCase): def test_generate_outcome_average_interval(self): df = pd.DataFrame({"input": [3, 2, 1], "output": [1, 2, 3]}) - assert ( - jf.generate_outcome_average(df, ["input"], "interval") == - {'eventAverage': 2.0} - ) + assert jf.generate_outcome_average(df, ["input"], "interval") == { + "eventAverage": 2.0 + } def test_generate_outcome_average_classification(self): df = pd.DataFrame({"input": [3, 2], "output": [0, 1]}) - event_percentage = jf.generate_outcome_average(df, ["input"], "classification", 1) - assert('eventPercentage' in event_percentage) + event_percentage = jf.generate_outcome_average( + df, ["input"], "classification", 1 + ) + assert "eventPercentage" in event_percentage def test_generate_outcome_average_interval_non_numeric_output(self): df = pd.DataFrame({"input": [3, 2, 1], "output": ["one", "two", "three"]}) @@ -913,7 +917,7 @@ class TestGetSelectionStatisticValue(unittest.TestCase): "_GINI_": 1, "_C_": 2, "_TAU_": None, - "_DataRole_": "TRAIN" + "_DataRole_": "TRAIN", } } ] @@ -921,15 +925,17 @@ class TestGetSelectionStatisticValue(unittest.TestCase): } tmp_dir = tempfile.TemporaryDirectory() with open(Path(tmp_dir.name) / "dmcas_fitstat.json", "w+") as f: - f.write(json.dumps(model_file_dict['dmcas_fitstat.json'])) + f.write(json.dumps(model_file_dict["dmcas_fitstat.json"])) def test_get_statistic_dict_default(self): selection_statistic = jf.get_selection_statistic_value(self.model_file_dict) - assert(selection_statistic == 1) + assert selection_statistic == 1 def test_get_statistic_dict_custom(self): - selection_statistic = jf.get_selection_statistic_value(self.model_file_dict, "_C_") - assert(selection_statistic == 2) + selection_statistic = jf.get_selection_statistic_value( + self.model_file_dict, "_C_" + ) + assert selection_statistic == 2 def test_get_blank_statistic_dict(self): with pytest.raises(RuntimeError): @@ -937,11 +943,13 @@ def test_get_blank_statistic_dict(self): def test_get_statistics_path_default(self): selection_statistic = jf.get_selection_statistic_value(Path(self.tmp_dir.name)) - assert(selection_statistic == 1) + assert selection_statistic == 1 def test_get_statistics_path_custom(self): - selection_statistic = jf.get_selection_statistic_value(Path(self.tmp_dir.name), "_C_") - assert(selection_statistic == 2) + selection_statistic = jf.get_selection_statistic_value( + Path(self.tmp_dir.name), "_C_" + ) + assert selection_statistic == 2 def test_get_blank_statistic_path(self): with pytest.raises(RuntimeError): @@ -949,11 +957,11 @@ def test_get_blank_statistic_path(self): def test_get_statistics_str_default(self): selection_statistic = jf.get_selection_statistic_value(self.tmp_dir.name) - assert (selection_statistic == 1) + assert selection_statistic == 1 def test_get_statistics_str_custom(self): selection_statistic = jf.get_selection_statistic_value(self.tmp_dir.name, "_C_") - assert (selection_statistic == 2) + assert selection_statistic == 2 def test_get_blank_statistic_str(self): with pytest.raises(RuntimeError): @@ -962,69 +970,66 @@ def test_get_blank_statistic_str(self): class TestUpdateModelProperties(unittest.TestCase): def setUp(self): - self.model_file_dict = { - "ModelProperties.json": - { - "example": "property" - } - } + self.model_file_dict = {"ModelProperties.json": {"example": "property"}} self.tmp_dir = tempfile.TemporaryDirectory() with open(Path(self.tmp_dir.name) / "ModelProperties.json", "w+") as f: - f.write(json.dumps(self.model_file_dict['ModelProperties.json'])) + f.write(json.dumps(self.model_file_dict["ModelProperties.json"])) def tearDown(self): self.tmp_dir.cleanup() def test_update_model_properties_dict(self): - update_dict = {'new': 'arg', 'newer': 'thing'} + update_dict = {"new": "arg", "newer": "thing"} jf.update_model_properties(self.model_file_dict, update_dict) - assert(self.model_file_dict['ModelProperties.json']['example'] == 'property') - assert(self.model_file_dict['ModelProperties.json']['new'] == 'arg') - assert(self.model_file_dict['ModelProperties.json']['newer'] == 'thing') + assert self.model_file_dict["ModelProperties.json"]["example"] == "property" + assert self.model_file_dict["ModelProperties.json"]["new"] == "arg" + assert self.model_file_dict["ModelProperties.json"]["newer"] == "thing" def test_update_model_properties_dict_overwrite(self): - update_dict = {'new': 'arg', 'example': 'thing'} + update_dict = {"new": "arg", "example": "thing"} jf.update_model_properties(self.model_file_dict, update_dict) - assert (self.model_file_dict['ModelProperties.json']['example'] == 'thing') - assert (self.model_file_dict['ModelProperties.json']['new'] == 'arg') + assert self.model_file_dict["ModelProperties.json"]["example"] == "thing" + assert self.model_file_dict["ModelProperties.json"]["new"] == "arg" def test_update_model_properties_dict_number(self): update_dict = {"number": 1} jf.update_model_properties(self.model_file_dict, update_dict) - assert (self.model_file_dict['ModelProperties.json']['number'] == '1') + assert self.model_file_dict["ModelProperties.json"]["number"] == "1" def test_update_model_properties_dict_round_number(self): - update_dict = {'number': 0.123456789012345} + update_dict = {"number": 0.123456789012345} jf.update_model_properties(self.model_file_dict, update_dict) - assert (self.model_file_dict['ModelProperties.json']['number'] == '0.12345678901234') + assert ( + self.model_file_dict["ModelProperties.json"]["number"] == "0.12345678901234" + ) def test_update_model_properties_str(self): - update_dict = {'new': 'arg', 'newer': 'thing'} + update_dict = {"new": "arg", "newer": "thing"} jf.update_model_properties(self.tmp_dir.name, update_dict) - with open(Path(self.tmp_dir.name) / 'ModelProperties.json', 'r') as f: + with open(Path(self.tmp_dir.name) / "ModelProperties.json", "r") as f: model_properties = json.load(f) - assert(model_properties['example'] == 'property') - assert(model_properties['new'] == 'arg') - assert(model_properties['newer'] == 'thing') + assert model_properties["example"] == "property" + assert model_properties["new"] == "arg" + assert model_properties["newer"] == "thing" def test_update_model_properties_str_overwrite(self): - update_dict = {'new': 'arg', 'example': 'thing'} + update_dict = {"new": "arg", "example": "thing"} jf.update_model_properties(self.tmp_dir.name, update_dict) - with open(Path(self.tmp_dir.name) / 'ModelProperties.json', 'r') as f: + with open(Path(self.tmp_dir.name) / "ModelProperties.json", "r") as f: model_properties = json.load(f) - assert (model_properties['example'] == 'thing') - assert (model_properties['new'] == 'arg') + assert model_properties["example"] == "thing" + assert model_properties["new"] == "arg" def test_update_model_properties_str_number(self): update_dict = {"number": 1} jf.update_model_properties(self.tmp_dir.name, update_dict) - with open(Path(self.tmp_dir.name) / 'ModelProperties.json', 'r') as f: + with open(Path(self.tmp_dir.name) / "ModelProperties.json", "r") as f: model_properties = json.load(f) - assert (model_properties['number'] == '1') + assert model_properties["number"] == "1" def test_update_model_properties_str_round_number(self): - update_dict = {'number': 0.123456789012345} + update_dict = {"number": 0.123456789012345} jf.update_model_properties(self.tmp_dir.name, update_dict) - with open(Path(self.tmp_dir.name) / 'ModelProperties.json', 'r') as f: + with open(Path(self.tmp_dir.name) / "ModelProperties.json", "r") as f: model_properties = json.load(f) - assert (model_properties['number'] == '0.12345678901234') \ No newline at end of file + assert model_properties["number"] == "0.12345678901234"