From 53cb8bb9a337fe9082ea04fb9c1ba731152d5004 Mon Sep 17 00:00:00 2001
From: djm21 <david.moore@sas.com>
Date: Wed, 6 Mar 2024 18:22:26 -0600
Subject: [PATCH 1/9] Added functionality for Model Cards in write_json_files.

---
 .../dmcas_relativeimportance.json             |  58 +++
 src/sasctl/pzmm/write_json_files.py           | 445 ++++++++++++++++++
 2 files changed, 503 insertions(+)
 create mode 100644 src/sasctl/pzmm/template_files/dmcas_relativeimportance.json

diff --git a/src/sasctl/pzmm/template_files/dmcas_relativeimportance.json b/src/sasctl/pzmm/template_files/dmcas_relativeimportance.json
new file mode 100644
index 00000000..c64cae9f
--- /dev/null
+++ b/src/sasctl/pzmm/template_files/dmcas_relativeimportance.json
@@ -0,0 +1,58 @@
+{
+    "creationTimeStamp" : "0001-01-01T00:00:00Z",
+    "modifiedTimeStamp" : "0001-01-01T00:00:00Z",
+    "revision" : 0,
+    "name" : "dmcas_relativeimportance",
+    "version" : 0,
+    "order" : 0,
+    "parameterMap" : {
+      "LABEL" : {
+        "label" : "Variable Label",
+        "length" : 256,
+        "order" : 1,
+        "parameter" : "LABEL",
+        "preformatted" : false,
+        "type" : "char",
+        "values" : [ "LABEL" ]
+      },
+      "LEVEL" : {
+        "label" : "Variable Level",
+        "length" : 10,
+        "order" : 5,
+        "parameter" : "LEVEL",
+        "preformatted" : false,
+        "type" : "char",
+        "values" : [ "LEVEL" ]
+      },
+      "ROLE" : {
+        "label" : "Role",
+        "length" : 32,
+        "order" : 4,
+        "parameter" : "ROLE",
+        "preformatted" : false,
+        "type" : "char",
+        "values" : [ "ROLE" ]
+      },
+      "RelativeImportance" : {
+        "label" : "Relative Importance",
+        "length" : 8,
+        "order" : 3,
+        "parameter" : "RelativeImportance",
+        "preformatted" : false,
+        "type" : "num",
+        "values" : [ "RelativeImportance" ]
+      },
+      "Variable" : {
+        "label" : "Variable Name",
+        "length" : 255,
+        "order" : 2,
+        "parameter" : "Variable",
+        "preformatted" : false,
+        "type" : "char",
+        "values" : [ "Variable" ]
+      }
+    },
+    "data" : [],
+    "xInteger" : false,
+    "yInteger" : false
+  }
\ No newline at end of file
diff --git a/src/sasctl/pzmm/write_json_files.py b/src/sasctl/pzmm/write_json_files.py
index 2d2fc327..2618c209 100644
--- a/src/sasctl/pzmm/write_json_files.py
+++ b/src/sasctl/pzmm/write_json_files.py
@@ -57,6 +57,7 @@ class NpEncoder(json.JSONEncoder):
 LIFT = "dmcas_lift.json"
 MAXDIFFERENCES = "maxDifferences.json"
 GROUPMETRICS = "groupMetrics.json"
+VARIMPORTANCES = 'dmcas_relativeimportance.json'
 
 
 def _flatten(nested_list: Iterable) -> Generator[Any, None, None]:
@@ -2198,3 +2199,447 @@ def remove_standard_library_packages(package_list: List[str]) -> List[str]:
             package for package in package_list if package not in py10stdlib
         ]
         return package_list
+
+    @classmethod
+    def generate_model_card(
+        cls,
+        model_prefix: str,
+        model_files: Union[str, Path, dict],
+        algorithm: str,
+        train_data: pd.DataFrame,
+        train_predictions: Union[pd.Series, list],
+        target_type: str = "Interval",
+        target_value: Union[str, int, float, None] = None,
+        interval_vars: Optional[list] = [],
+        class_vars: Optional[list] = [],
+        selection_statistic: str = "_GINI_",
+        server: str = "cas-shared-default",
+        caslib: str = "Public",
+    ):
+        """
+        Generates everything required for the model card feature within SAS Model Manager.
+        
+        This includes uploading the training data to CAS, updating ModelProperties.json to have 
+        some extra properties, and generating dmcas_relativeimportance.json.
+
+        Parameters
+        ----------
+        model_prefix : string
+            The prefix used to name files relating to the model. This is used to provide a unique
+            name to the training data table when it is uploaded to CAS.
+        model_files : string, Path, or dict
+            Either the directory location of the model files (string or Path object), or
+            a dictionary containing the contents of all the model files.
+        algorithm : str
+            The name of the algorithm used to generate the model.
+        train_data: pandas.DataFrame 
+            Training data that contains all input variables as well as the target variable.
+        train_predictions : pandas.Series, list
+            List of predictions made by the model on the training data.
+        target_type : string
+            Type the model is targeting. Currently supports "Classification" and "Interval" types. 
+            The default value is "Interval".
+        target_value : string, int, float, optional
+            Value the model is targeting for Classification models. This argument is not needed for
+            Interval models. The default value is None.
+        interval_vars : list, optional
+            A list of interval variables. The default value is an empty list.
+        class_vars : list, optional
+            A list of classification variables. The default value is an empty list.
+        selection_statistic: str, optional
+            The selection statistic chosen to score the model against other models. Can be any of the 
+            following values: "_RASE_", "_NObs_", "_GINI_", "_GAMMA_", "_MCE_", "_ASE_", "_MCLL_",
+            "_KS_", "_KSPostCutoff_", "_DIV_", "_TAU_", "_KSCut_", or "_C_". The default value is "_GINI_".
+        server: str, optional
+            The CAS server the training data will be stored on. The default value is "cas-shared-default"
+        caslib: str, optional
+            The caslib the training data will be stored on. The default value is "Public"
+        """
+        if not target_value and target_type == "Classification":
+            raise RuntimeError(
+                "For the model card data to be properly generated on a Classification "
+                "model, a target value is required."
+            )
+        if target_type not in ["Classification", "Interval"]:
+            raise RuntimeError(
+                "Only Classification and Interval target types are currently accepted."
+            )
+        if selection_statistic not in cls.valid_params:
+            raise RuntimeError(
+                "The selection statistic must be a value generated in dmcas_fitstat.json. See "
+                "the documentation for a list of valid selection statistic values."
+            )
+        if not algorithm:
+            raise RuntimeError(
+                "Either a given algorithm or a model is required for the model card."
+            )
+        try:
+            sess = current_session()
+            conn = sess.as_swat()
+        except ImportError:
+            raise RuntimeError(
+                "The `swat` package is required to generate fit statistics, ROC, and "
+                "Lift charts with the calculate_model_statistics function."
+            )
+        
+        # Upload training table to CAS. The location of the training table is returned.
+        training_table = cls.upload_training_data(
+            conn,
+            model_prefix,
+            train_data,
+            server,
+            caslib
+        )
+
+        # Generates the event percentage for Classification targets, and the event average
+        # for Interval targets
+        update_dict = cls.generate_outcome_average(
+            train_data=train_data,
+            input_variables=interval_vars + class_vars,
+            target_type=target_type,
+            target_value=target_value
+        )
+        
+        # Formats all new ModelProperties information into one dictionary that can be used to update the json file
+        update_dict['trainTable'] = training_table
+        update_dict['selectionStatistic'] = selection_statistic
+        update_dict['algorithm'] = algorithm
+        update_dict['selectionStatisticValue'] = cls.get_selection_statistic_value(model_files, selection_statistic)
+        cls.update_model_properties(model_files, update_dict)
+
+        # Generates dmcas_relativeimportance.json file
+        cls.generate_variable_importance(
+            conn,
+            model_files,
+            train_data,
+            train_predictions,
+            target_type,
+            interval_vars,
+            class_vars,
+            caslib
+        )
+        
+    @staticmethod
+    def upload_training_data(
+        conn,
+        model_prefix: str,
+        train_data: pd.DataFrame,
+        server: str = "cas-shared-default",
+        caslib: str = 'Public'
+    ):  
+        """
+        Uploads training data to CAS server.
+
+        Parameters
+        ----------
+        conn
+            SWAT connection. Used to connect to CAS server.
+        model_prefix : string
+            The prefix used to name files relating to the model. This is used to provide a unique
+            name to the training data table when it is uploaded to CAS.
+        train_data: pandas.DataFrame 
+            Training data that contains all input variables as well as the target variable.
+        server: str, optional
+            The CAS server the training data will be stored on. The default value is "cas-shared-default"
+        caslib: str, optional
+            The caslib the training data will be stored on. The default value is "Public"
+
+        Returns
+        -------
+        string
+        Returns a string that represents the location of the training table within CAS.
+        """   
+        # Upload raw training data to caslib so that data can be analyzed
+        train_data_name = model_prefix + "_train_data"
+        upload_train_data = conn.upload(
+            train_data,
+            casout={"name": train_data_name, "caslib": caslib},
+            promote=True
+        )
+
+        if upload_train_data.status is not None:
+            raise RuntimeError(
+                f'A table with the name {train_data_name} already exists in the specified caslib. Please '
+                'either delete/rename the old table or give a new name to the current table.'
+            )
+        
+        return server + '/' + caslib + '/' + train_data_name
+        
+    @staticmethod
+    def generate_outcome_average(
+        train_data: pd.DataFrame,
+        input_variables: list,
+        target_type,
+        target_value: Union[str, int, float] = None
+    ):
+        """
+        Generates the outcome average of the training data. For Interval targets, the event average
+        is generated. For Classification targets, the event average is returned.
+
+        Parameters
+        ----------
+        train_data: pandas.DataFrame 
+            Training data that contains all input variables as well as the target variable. If multiple 
+            non-input variables are included, the function will assume that the first non-input variable row 
+            is the output.
+        input_variables: list
+            A list of all input variables used by the model. Used to isolate the output variable.
+        target_type : string
+            Type the model is targeting. Currently supports "Classification" and "Interval" types.
+        target_value : string, int, float, optional
+            Value the model is targeting for Classification models. This argument is not needed for
+            Interval models. The default value is None.
+
+        Returns
+        -------
+        dict
+        Returns a dictionary with a key value pair that represents the outcome average.
+        """
+        output_var = train_data.drop(input_variables, axis=1)
+        if target_type == "Classification":
+            value_counts = output_var[output_var.columns[0]].value_counts()
+            return {'eventPercentage': value_counts[target_value]/sum(value_counts)}
+        elif target_type == "Interval":
+            return {'eventAverage': sum(value_counts[value_counts.columns[0]]) / len(value_counts)}
+
+    @staticmethod
+    def get_selection_statistic_value(
+        model_files,
+        selection_statistic
+    ):
+        """
+        Finds the value of the chosen selection statistic in dmcas_fitstat.json, which should have been
+        generated before this function has been called.
+
+        Parameters
+        ----------
+        model_files : string, Path, or dict
+            Either the directory location of the model files (string or Path object), or
+            a dictionary containing the contents of all the model files.
+        selection_statistic: str, optional
+            The selection statistic chosen to score the model against other models. Can be any of the 
+            following values: "_RASE_", "_NObs_", "_GINI_", "_GAMMA_", "_MCE_", "_ASE_", "_MCLL_",
+            "_KS_", "_KSPostCutoff_", "_DIV_", "_TAU_", "_KSCut_", or "_C_". The default value is "_GINI_".
+
+        Returns
+        -------
+        float
+        Returns the numerical value assoicated with the chosen selection statistic.
+        """
+        if isinstance(model_files, dict):
+            if FITSTAT not in model_files:
+                raise RuntimeError(
+                    "The dmcas_fitstat.json file must be generated before the model card data "
+                    "can be generated."
+                )
+            for fitstat in model_files[FITSTAT]['data']:
+                if fitstat['dataMap']['_DataRole_'] == "TRAIN":
+                    if selection_statistic not in fitstat['dataMap'] or fitstat['dataMap'][selection_statistic] == None:
+                        raise RuntimeError(
+                            "The chosen selection statistic was not generated properly. Please ensure the value has been "
+                            "properly created then try again."
+                        )
+                    return fitstat['dataMap'][selection_statistic]
+        else:
+            if not Path.exists(Path(model_files) / FITSTAT):
+                raise RuntimeError(
+                    "The dmcas_fitstat.json file must be generated before the model card data "
+                    "can be generated."
+                )
+            with open(Path(model_files) / FITSTAT, 'r') as fitstat_json:
+                fitstat_dict = json.load(fitstat_json)
+                for fitstat in fitstat_dict['data']:
+                    if fitstat['dataMap']['_DataRole_'] == "TRAIN":
+                        if selection_statistic not in fitstat['dataMap'] or fitstat['dataMap'][selection_statistic] == None:
+                            raise RuntimeError(
+                                "The chosen selection statistic was not generated properly. Please ensure the value has been "
+                                "properly created then try again."
+                            )
+                        return fitstat['dataMap'][selection_statistic]
+
+    @staticmethod
+    def update_model_properties(
+        model_files,
+        update_dict
+    ):
+        """
+        Updates the ModelProperties.json file to include properties listed in the update_dict dictionary.
+
+        Parameters
+        ----------
+        model_files : string, Path, or dict
+            Either the directory location of the model files (string or Path object), or
+            a dictionary containing the contents of all the model files.
+        update_dict : dictionary
+            A dictionary containing the key-value pairs that represent properties to be added
+            to the ModelProperties.json file.
+        """
+        if isinstance(model_files, dict):
+            if PROP not in model_files:
+                raise RuntimeError(
+                    "The ModelProperties.json file must be generated before the model card data "
+                    "can be generated."
+                    )
+            for key, value in update_dict:
+                model_files[PROP][key] = value
+        else:
+            if not Path.exists(Path(model_files) / PROP):
+                raise RuntimeError(
+                    "The ModelProperties.json file must be generated before the model card data "
+                    "can be generated."
+                )
+            with open(Path(model_files) / PROP, 'r+') as properties_json:
+                model_properties = json.load(properties_json)
+                for key, value in update_dict:
+                    model_properties[key] = value
+                properties_json.seek(0)
+                properties_json.write(json.dumps(model_properties, indent=4, cls=NpEncoder))
+                properties_json.truncate()
+
+    @classmethod
+    def generate_variable_importance(
+        cls,
+        conn,
+        model_files: Union[str, Path, dict],
+        train_data: pd.DataFrame,
+        train_predictions: Union[pd.Series, list],
+        target_type: str = "interval",
+        interval_vars: Optional[list] = [],
+        class_vars: Optional[list] = [],
+        caslib: str = "Public",
+    ):
+        """
+        Generates the dmcas_relativeimportance.json file, which is used to determine variable importance
+
+        Parameters
+        ----------
+        conn
+            A SWAT connection used to connect to the user's CAS server
+        model_files : string, Path, or dict
+            Either the directory location of the model files (string or Path object), or
+            a dictionary containing the contents of all the model files.
+        train_data: pandas.DataFrame 
+            Training data that contains all input variables as well as the target variable.
+        train_predictions : pandas.Series, list
+            List of predictions made by the model on the training data.
+        target_type : string, optional
+            Type the model is targeting. Currently supports "Classification" and "Interval" types.
+            The default value is "Interval".
+        interval_vars : list, optional
+            A list of interval variables. The default value is an empty list.
+        class_vars : list, optional
+            A list of classification variables. The default value is an empty list.
+        caslib: str, optional
+            The caslib the training data will be stored on. The default value is "Public"
+        """
+        try:
+            sess = current_session()
+            conn = sess.as_swat()
+        except ImportError:
+            raise RuntimeError(
+                "The `swat` package is required to generate fit statistics, ROC, and "
+                "Lift charts with the calculate_model_statistics function."
+            )
+        # Remove target variable from training data by selecting only input variable columns
+        x_train_data = train_data[interval_vars + class_vars]
+        # Upload scored training data to run variable importance on
+        x_train_data.insert(0, "Prediction", train_predictions, True)
+        conn.upload(
+            x_train_data, 
+            casout={"name": "train_data", "replace": True, "caslib": caslib}
+        )
+
+        # Load actionset necessary to generate variable importance
+        conn.loadactionset('dataPreprocess')
+        request_packages = list()
+        if target_type == "classification":
+            method = "DTREE"
+            treeCrit = "Entropy"
+        elif target_type == "interval":
+            method = "RTREE"
+            treeCrit = 'RSS'
+        else:
+            raise RuntimeError(
+                "The selected model type is unsupported. Currently, only models that have interval or classification target types are supported."
+            )
+        request_packages = list()
+        if interval_vars:
+            request_packages.append({
+                "name": 'BIN',
+                "inputs": [{"name": var} for var in interval_vars],
+                "targets": [{"name": "Prediction"}],
+                "discretize":{
+                    "method":method, 
+                    "arguments":{
+                        "minNBins":1,
+                        "maxNBins":8, 
+                        "treeCrit":treeCrit,
+                        "contingencyTblOpts":{"inputsMethod": 'BUCKET', "inputsNLevels": 100}, 
+                        "overrides": {"minNObsInBin": 5, "binMissing": True, "noDataLowerUpperBound": True}
+                    }
+                }
+            })
+        if class_vars:
+            request_packages.append({
+                "name": 'BIN_NOM',
+                "inputs": [{"name": var} for var in class_vars],
+                "targets": [{"name": "Prediction"}],
+                "catTrans":{
+                    "method":method, 
+                    "arguments":{
+                        "minNBins":1,
+                        "maxNBins":8, 
+                        "treeCrit":treeCrit,
+                        "overrides": {"minNObsInBin": 5, "binMissing": True}
+                    }
+                }
+            })
+        var_data = conn.dataPreprocess.transform(
+            table={"name": "test_data", "caslib": caslib},
+            requestPackages=request_packages,
+            evaluationStats=True,
+            percentileMaxIterations=10, 
+            percentileTolerance=0.00001, 
+            distinctCountLimit=5000, 
+            sasVarNameLength=True, 
+            outputTableOptions={"inputVarPrintOrder": True},
+            sasProcClient=True
+        )
+        var_importances = var_data['VarTransInfo'][['Variable', 'RelVarImportance']]
+        var_importances = var_importances.sort_values(by=['RelVarImportance'], ascending=False).reset_index(drop=True)
+        relative_importances = list()
+        for index, row in var_importances.iterrows():
+            if row['Variable'] in interval_vars:
+                level = "INTERVAL"
+            elif row['Variable'] in class_vars:
+                level = "NOMINAL"
+            relative_importances.append({
+                "dataMap" : {
+                    "LABEL": "",
+                    "LEVEL": level,
+                    "ROLE": "INPUT",
+                    "RelativeImportance": str(row['RelVarImportance']),
+                    "Variable": row['Variable']
+                },
+                "rowNumber": index+1
+            })
+        with open('./dmcas_relativeimportance.json', 'r') as f:
+            relative_importance_json = json.load(f)
+        relative_importance_json['data'] = relative_importances
+
+        if isinstance(model_files, dict):
+            model_files[VARIMPORTANCES] = json.dumps(relative_importance_json, indent=4, cls=NpEncoder)
+            if cls.notebook_output:
+                print(
+                    f"{VARIMPORTANCES} was successfully written and saved to "
+                    f"model files dictionary."
+                )
+        else:
+            with open(Path(model_files) / VARIMPORTANCES, 'w') as json_file:
+                json_file.write(json.dumps(relative_importance_json, indent=4, cls=NpEncoder))
+            if cls.notebook_output:
+                print(
+                    f"{VARIMPORTANCES} was successfully written and saved to "
+                    f"{Path(model_files) / VARIMPORTANCES}"
+           
+                )
\ No newline at end of file

From 850f05498e0bed264715cccd9010b391f2752666 Mon Sep 17 00:00:00 2001
From: djm21 <david.moore@sas.com>
Date: Wed, 6 Mar 2024 19:53:14 -0600
Subject: [PATCH 2/9] fixed some bugs to allow for model card files to generate
 correctly.

---
 src/sasctl/pzmm/write_json_files.py | 34 +++++++++++++++--------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/sasctl/pzmm/write_json_files.py b/src/sasctl/pzmm/write_json_files.py
index 2618c209..d905acde 100644
--- a/src/sasctl/pzmm/write_json_files.py
+++ b/src/sasctl/pzmm/write_json_files.py
@@ -2208,7 +2208,7 @@ def generate_model_card(
         algorithm: str,
         train_data: pd.DataFrame,
         train_predictions: Union[pd.Series, list],
-        target_type: str = "Interval",
+        target_type: str = "interval",
         target_value: Union[str, int, float, None] = None,
         interval_vars: Optional[list] = [],
         class_vars: Optional[list] = [],
@@ -2237,10 +2237,10 @@ def generate_model_card(
         train_predictions : pandas.Series, list
             List of predictions made by the model on the training data.
         target_type : string
-            Type the model is targeting. Currently supports "Classification" and "Interval" types. 
+            Type the model is targeting. Currently supports "classification" and "interval" types. 
             The default value is "Interval".
         target_value : string, int, float, optional
-            Value the model is targeting for Classification models. This argument is not needed for
+            Value the model is targeting for classification models. This argument is not needed for
             Interval models. The default value is None.
         interval_vars : list, optional
             A list of interval variables. The default value is an empty list.
@@ -2255,14 +2255,14 @@ def generate_model_card(
         caslib: str, optional
             The caslib the training data will be stored on. The default value is "Public"
         """
-        if not target_value and target_type == "Classification":
+        if not target_value and target_type == "classification":
             raise RuntimeError(
-                "For the model card data to be properly generated on a Classification "
+                "For the model card data to be properly generated on a classification "
                 "model, a target value is required."
             )
-        if target_type not in ["Classification", "Interval"]:
+        if target_type not in ["classification", "interval"]:
             raise RuntimeError(
-                "Only Classification and Interval target types are currently accepted."
+                "Only classification and interval target types are currently accepted."
             )
         if selection_statistic not in cls.valid_params:
             raise RuntimeError(
@@ -2396,10 +2396,10 @@ def generate_outcome_average(
         Returns a dictionary with a key value pair that represents the outcome average.
         """
         output_var = train_data.drop(input_variables, axis=1)
-        if target_type == "Classification":
+        if target_type == "classification":
             value_counts = output_var[output_var.columns[0]].value_counts()
             return {'eventPercentage': value_counts[target_value]/sum(value_counts)}
-        elif target_type == "Interval":
+        elif target_type == "interval":
             return {'eventAverage': sum(value_counts[value_counts.columns[0]]) / len(value_counts)}
 
     @staticmethod
@@ -2480,8 +2480,8 @@ def update_model_properties(
                     "The ModelProperties.json file must be generated before the model card data "
                     "can be generated."
                     )
-            for key, value in update_dict:
-                model_files[PROP][key] = value
+            for key in update_dict:
+                model_files[PROP][key] = update_dict[key]
         else:
             if not Path.exists(Path(model_files) / PROP):
                 raise RuntimeError(
@@ -2490,8 +2490,8 @@ def update_model_properties(
                 )
             with open(Path(model_files) / PROP, 'r+') as properties_json:
                 model_properties = json.load(properties_json)
-                for key, value in update_dict:
-                    model_properties[key] = value
+                for key in update_dict:
+                    model_properties[key] = update_dict[key]
                 properties_json.seek(0)
                 properties_json.write(json.dumps(model_properties, indent=4, cls=NpEncoder))
                 properties_json.truncate()
@@ -2595,7 +2595,7 @@ def generate_variable_importance(
                 }
             })
         var_data = conn.dataPreprocess.transform(
-            table={"name": "test_data", "caslib": caslib},
+            table={"name": "train_data", "caslib": caslib},
             requestPackages=request_packages,
             evaluationStats=True,
             percentileMaxIterations=10, 
@@ -2623,7 +2623,10 @@ def generate_variable_importance(
                 },
                 "rowNumber": index+1
             })
-        with open('./dmcas_relativeimportance.json', 'r') as f:
+        json_template_path = (
+            Path(__file__).resolve().parent / f"template_files/{VARIMPORTANCES}"
+        )
+        with open(json_template_path, 'r') as f:
             relative_importance_json = json.load(f)
         relative_importance_json['data'] = relative_importances
 
@@ -2641,5 +2644,4 @@ def generate_variable_importance(
                 print(
                     f"{VARIMPORTANCES} was successfully written and saved to "
                     f"{Path(model_files) / VARIMPORTANCES}"
-           
                 )
\ No newline at end of file

From c20c1629aef3aabd20765da4318f6fe1e33eb7dd Mon Sep 17 00:00:00 2001
From: djm21 <david.moore@sas.com>
Date: Sun, 10 Mar 2024 22:21:28 -0500
Subject: [PATCH 3/9] Updated update_model_properties to give proper
 type/length to model properties

---
 src/sasctl/pzmm/write_json_files.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/sasctl/pzmm/write_json_files.py b/src/sasctl/pzmm/write_json_files.py
index d905acde..4345b6eb 100644
--- a/src/sasctl/pzmm/write_json_files.py
+++ b/src/sasctl/pzmm/write_json_files.py
@@ -2481,7 +2481,10 @@ def update_model_properties(
                     "can be generated."
                     )
             for key in update_dict:
-                model_files[PROP][key] = update_dict[key]
+                if not isinstance(update_dict[key], str):
+                    model_files[PROP][key] = str(round(update_dict[key], 14))
+                else:
+                    model_files[PROP][key] = update_dict[key]
         else:
             if not Path.exists(Path(model_files) / PROP):
                 raise RuntimeError(
@@ -2490,8 +2493,10 @@ def update_model_properties(
                 )
             with open(Path(model_files) / PROP, 'r+') as properties_json:
                 model_properties = json.load(properties_json)
-                for key in update_dict:
-                    model_properties[key] = update_dict[key]
+                if not isinstance(update_dict[key], str):
+                    model_files[PROP][key] = str(round(update_dict[key], 14))
+                else:
+                    model_files[PROP][key] = update_dict[key]
                 properties_json.seek(0)
                 properties_json.write(json.dumps(model_properties, indent=4, cls=NpEncoder))
                 properties_json.truncate()

From 3913c309d9a50c77563f2fbf234158d76d21092d Mon Sep 17 00:00:00 2001
From: djm21 <david.moore@sas.com>
Date: Tue, 12 Mar 2024 14:59:16 -0500
Subject: [PATCH 4/9] Added some tests for model cards + fixed some model card
 errors

---
 src/sasctl/pzmm/write_json_files.py |  55 +++++----
 tests/unit/test_write_json_files.py | 179 ++++++++++++++++++++++++++++
 2 files changed, 206 insertions(+), 28 deletions(-)

diff --git a/src/sasctl/pzmm/write_json_files.py b/src/sasctl/pzmm/write_json_files.py
index 4345b6eb..70a3ee9b 100644
--- a/src/sasctl/pzmm/write_json_files.py
+++ b/src/sasctl/pzmm/write_json_files.py
@@ -2374,7 +2374,7 @@ def generate_outcome_average(
     ):
         """
         Generates the outcome average of the training data. For Interval targets, the event average
-        is generated. For Classification targets, the event average is returned.
+        is generated. For Classification targets, the event percentage is returned.
 
         Parameters
         ----------
@@ -2395,17 +2395,23 @@ def generate_outcome_average(
         dict
         Returns a dictionary with a key value pair that represents the outcome average.
         """
+        import numbers
         output_var = train_data.drop(input_variables, axis=1)
         if target_type == "classification":
             value_counts = output_var[output_var.columns[0]].value_counts()
             return {'eventPercentage': value_counts[target_value]/sum(value_counts)}
         elif target_type == "interval":
-            return {'eventAverage': sum(value_counts[value_counts.columns[0]]) / len(value_counts)}
+            if not isinstance(output_var[output_var.columns[0]].iloc[0], numbers.Number):
+                raise ValueError("Detected output column is not numeric. Please ensure that " +
+                                 "the correct output column is being passed, and that no extra columns " +
+                                 "are in front of the output column. This function assumes that the first " +
+                                 "non-input column is the output column.jf")
+            return {'eventAverage': sum(output_var[output_var.columns[0]]) / len(output_var)}
 
     @staticmethod
     def get_selection_statistic_value(
-        model_files,
-        selection_statistic
+        model_files: Union[str, Path, dict],
+        selection_statistic: str = "_GINI_"
     ):
         """
         Finds the value of the chosen selection statistic in dmcas_fitstat.json, which should have been
@@ -2493,10 +2499,11 @@ def update_model_properties(
                 )
             with open(Path(model_files) / PROP, 'r+') as properties_json:
                 model_properties = json.load(properties_json)
-                if not isinstance(update_dict[key], str):
-                    model_files[PROP][key] = str(round(update_dict[key], 14))
-                else:
-                    model_files[PROP][key] = update_dict[key]
+                for key in update_dict:
+                    if not isinstance(update_dict[key], str):
+                        model_properties[key] = str(round(update_dict[key], 14))
+                    else:
+                        model_properties[key] = update_dict[key]
                 properties_json.seek(0)
                 properties_json.write(json.dumps(model_properties, indent=4, cls=NpEncoder))
                 properties_json.truncate()
@@ -2537,14 +2544,6 @@ def generate_variable_importance(
         caslib: str, optional
             The caslib the training data will be stored on. The default value is "Public"
         """
-        try:
-            sess = current_session()
-            conn = sess.as_swat()
-        except ImportError:
-            raise RuntimeError(
-                "The `swat` package is required to generate fit statistics, ROC, and "
-                "Lift charts with the calculate_model_statistics function."
-            )
         # Remove target variable from training data by selecting only input variable columns
         x_train_data = train_data[interval_vars + class_vars]
         # Upload scored training data to run variable importance on
@@ -2573,12 +2572,12 @@ def generate_variable_importance(
                 "name": 'BIN',
                 "inputs": [{"name": var} for var in interval_vars],
                 "targets": [{"name": "Prediction"}],
-                "discretize":{
-                    "method":method, 
-                    "arguments":{
-                        "minNBins":1,
-                        "maxNBins":8, 
-                        "treeCrit":treeCrit,
+                "discretize": {
+                    "method": method,
+                    "arguments": {
+                        "minNBins": 1,
+                        "maxNBins": 8,
+                        "treeCrit": treeCrit,
                         "contingencyTblOpts":{"inputsMethod": 'BUCKET', "inputsNLevels": 100}, 
                         "overrides": {"minNObsInBin": 5, "binMissing": True, "noDataLowerUpperBound": True}
                     }
@@ -2589,12 +2588,12 @@ def generate_variable_importance(
                 "name": 'BIN_NOM',
                 "inputs": [{"name": var} for var in class_vars],
                 "targets": [{"name": "Prediction"}],
-                "catTrans":{
-                    "method":method, 
-                    "arguments":{
-                        "minNBins":1,
-                        "maxNBins":8, 
-                        "treeCrit":treeCrit,
+                "catTrans": {
+                    "method": method,
+                    "arguments": {
+                        "minNBins": 1,
+                        "maxNBins": 8,
+                        "treeCrit": treeCrit,
                         "overrides": {"minNObsInBin": 5, "binMissing": True}
                     }
                 }
diff --git a/tests/unit/test_write_json_files.py b/tests/unit/test_write_json_files.py
index 412759f2..19cbd4f9 100644
--- a/tests/unit/test_write_json_files.py
+++ b/tests/unit/test_write_json_files.py
@@ -16,11 +16,14 @@
 import warnings
 from pathlib import Path
 from unittest.mock import patch
+import math
 
 import numpy as np
 import pandas as pd
 import pytest
 from sklearn.model_selection import train_test_split
+from sklearn import datasets
+from sklearn.linear_model import LogisticRegression
 from sklearn.tree import DecisionTreeClassifier
 
 import sasctl.pzmm as pzmm
@@ -43,6 +46,37 @@
     {"name": "REASON_HomeImp", "type": "integer"},
 ]
 
+class BadModel:
+    attr = None
+
+@pytest.fixture
+def bad_model():
+    return BadModel()
+
+
+@pytest.fixture
+def train_data():
+    """Returns the Iris data set as (X, y)"""
+    raw = datasets.load_iris()
+    iris = pd.DataFrame(raw.data, columns=raw.feature_names)
+    iris = iris.join(pd.DataFrame(raw.target))
+    iris.columns = ["SepalLength", "SepalWidth", "PetalLength", "PetalWidth", "Species"]
+    iris["Species"] = iris["Species"].astype("category")
+    iris.Species.cat.categories = raw.target_names
+    return iris.iloc[:, 0:4], iris["Species"]
+
+
+@pytest.fixture
+def sklearn_model(train_data):
+    """Returns a simple Scikit-Learn model"""
+    X, y = train_data
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        model = LogisticRegression(
+            multi_class="multinomial", solver="lbfgs", max_iter=1000
+        )
+        model.fit(X, y)
+    return model
 
 @pytest.fixture
 def change_dir():
@@ -849,3 +883,148 @@ def test_errors(self):
                         jf.assess_model_bias(
                             score_table, sensitive_values, actual_values
                         )
+
+
+class TestModelCardGeneration(unittest.TestCase):
+    def test_generate_outcome_average_interval(self):
+        df = pd.DataFrame({"input": [3, 2, 1], "output": [1, 2, 3]})
+        assert (
+            jf.generate_outcome_average(df, ["input"], "interval") ==
+            {'eventAverage': 2.0}
+        )
+
+    def test_generate_outcome_average_classification(self):
+        df = pd.DataFrame({"input": [3, 2], "output": [0, 1]})
+        event_percentage = jf.generate_outcome_average(df, ["input"], "classification", 1)
+        assert('eventPercentage' in event_percentage)
+
+    def test_generate_outcome_average_interval_non_numeric_output(self):
+        df = pd.DataFrame({"input": [3, 2, 1], "output": ["one", "two", "three"]})
+        with pytest.raises(ValueError):
+            jf.generate_outcome_average(df, ["input"], "interval")
+
+
+class TestGetSelectionStatisticValue(unittest.TestCase):
+    model_file_dict = {
+        "dmcas_fitstat.json": {
+            "data": [
+                {
+                    "dataMap": {
+                        "_GINI_": 1,
+                        "_C_": 2,
+                        "_TAU_": None,
+                        "_DataRole_": "TRAIN"
+                    }
+                }
+            ]
+        }
+    }
+    tmp_dir = tempfile.TemporaryDirectory()
+    with open(Path(tmp_dir.name) / "dmcas_fitstat.json", "w+") as f:
+        f.write(json.dumps(model_file_dict['dmcas_fitstat.json']))
+
+    def test_get_statistic_dict_default(self):
+        selection_statistic = jf.get_selection_statistic_value(self.model_file_dict)
+        assert(selection_statistic == 1)
+
+    def test_get_statistic_dict_custom(self):
+        selection_statistic = jf.get_selection_statistic_value(self.model_file_dict, "_C_")
+        assert(selection_statistic == 2)
+
+    def test_get_blank_statistic_dict(self):
+        with pytest.raises(RuntimeError):
+            jf.get_selection_statistic_value(self.model_file_dict, "_TAU_")
+
+    def test_get_statistics_path_default(self):
+        selection_statistic = jf.get_selection_statistic_value(Path(self.tmp_dir.name))
+        assert(selection_statistic == 1)
+
+    def test_get_statistics_path_custom(self):
+        selection_statistic = jf.get_selection_statistic_value(Path(self.tmp_dir.name), "_C_")
+        assert(selection_statistic == 2)
+
+    def test_get_blank_statistic_path(self):
+        with pytest.raises(RuntimeError):
+            jf.get_selection_statistic_value(Path(self.tmp_dir.name), "_TAU_")
+
+    def test_get_statistics_str_default(self):
+        selection_statistic = jf.get_selection_statistic_value(self.tmp_dir.name)
+        assert (selection_statistic == 1)
+
+    def test_get_statistics_str_custom(self):
+        selection_statistic = jf.get_selection_statistic_value(self.tmp_dir.name, "_C_")
+        assert (selection_statistic == 2)
+
+    def test_get_blank_statistic_str(self):
+        with pytest.raises(RuntimeError):
+            jf.get_selection_statistic_value(self.tmp_dir.name, "_TAU_")
+
+
+class TestUpdateModelProperties(unittest.TestCase):
+    def setUp(self):
+        self.model_file_dict = {
+            "ModelProperties.json":
+                {
+                    "example": "property"
+                }
+        }
+        self.tmp_dir = tempfile.TemporaryDirectory()
+        with open(Path(self.tmp_dir.name) / "ModelProperties.json", "w+") as f:
+            f.write(json.dumps(self.model_file_dict['ModelProperties.json']))
+
+    def tearDown(self):
+        self.tmp_dir.cleanup()
+
+    def test_update_model_properties_dict(self):
+        update_dict = {'new': 'arg', 'newer': 'thing'}
+        jf.update_model_properties(self.model_file_dict, update_dict)
+        assert(self.model_file_dict['ModelProperties.json']['example'] == 'property')
+        assert(self.model_file_dict['ModelProperties.json']['new'] == 'arg')
+        assert(self.model_file_dict['ModelProperties.json']['newer'] == 'thing')
+
+    def test_update_model_properties_dict_overwrite(self):
+        update_dict = {'new': 'arg', 'example': 'thing'}
+        jf.update_model_properties(self.model_file_dict, update_dict)
+        assert (self.model_file_dict['ModelProperties.json']['example'] == 'thing')
+        assert (self.model_file_dict['ModelProperties.json']['new'] == 'arg')
+
+    def test_update_model_properties_dict_number(self):
+        update_dict = {"number": 1}
+        jf.update_model_properties(self.model_file_dict, update_dict)
+        assert (self.model_file_dict['ModelProperties.json']['number'] == '1')
+
+    def test_update_model_properties_dict_round_number(self):
+        update_dict = {'number': 0.123456789012345}
+        jf.update_model_properties(self.model_file_dict, update_dict)
+        assert (self.model_file_dict['ModelProperties.json']['number'] == '0.12345678901234')
+
+    def test_update_model_properties_str(self):
+        update_dict = {'new': 'arg', 'newer': 'thing'}
+        jf.update_model_properties(self.tmp_dir.name, update_dict)
+        with open(Path(self.tmp_dir.name) / 'ModelProperties.json', 'r') as f:
+            model_properties = json.load(f)
+            assert(model_properties['example'] == 'property')
+            assert(model_properties['new'] == 'arg')
+            assert(model_properties['newer'] == 'thing')
+
+    def test_update_model_properties_str_overwrite(self):
+        update_dict = {'new': 'arg', 'example': 'thing'}
+        jf.update_model_properties(self.tmp_dir.name, update_dict)
+        with open(Path(self.tmp_dir.name) / 'ModelProperties.json', 'r') as f:
+            model_properties = json.load(f)
+            assert (model_properties['example'] == 'thing')
+            assert (model_properties['new'] == 'arg')
+
+    def test_update_model_properties_str_number(self):
+        update_dict = {"number": 1}
+        jf.update_model_properties(self.tmp_dir.name, update_dict)
+        with open(Path(self.tmp_dir.name) / 'ModelProperties.json', 'r') as f:
+            model_properties = json.load(f)
+            assert (model_properties['number'] == '1')
+
+    def test_update_model_properties_str_round_number(self):
+        update_dict = {'number': 0.123456789012345}
+        jf.update_model_properties(self.tmp_dir.name, update_dict)
+        with open(Path(self.tmp_dir.name) / 'ModelProperties.json', 'r') as f:
+            model_properties = json.load(f)
+            assert (model_properties['number'] == '0.12345678901234')
\ No newline at end of file

From fc85adce542add5fd44d41c0da61f43bd7eb054a Mon Sep 17 00:00:00 2001
From: djm21 <david.moore@sas.com>
Date: Mon, 18 Mar 2024 11:55:37 -0500
Subject: [PATCH 5/9] changed "interval" to "prediction" + changed
 calculate_model_statistics to allow for prediction models

---
 src/sasctl/pzmm/write_json_files.py | 123 +++++++++++++++++-----------
 1 file changed, 75 insertions(+), 48 deletions(-)

diff --git a/src/sasctl/pzmm/write_json_files.py b/src/sasctl/pzmm/write_json_files.py
index 70a3ee9b..1d00bd0e 100644
--- a/src/sasctl/pzmm/write_json_files.py
+++ b/src/sasctl/pzmm/write_json_files.py
@@ -1174,6 +1174,7 @@ def calculate_model_statistics(
         train_data: Union[DataFrame, List[list], Type["numpy.array"]] = None,
         test_data: Union[DataFrame, List[list], Type["numpy.array"]] = None,
         json_path: Union[str, Path, None] = None,
+        target_type: str = "classification"
     ) -> Union[dict, None]:
         """
         Calculates fit statistics (including ROC and Lift curves) from datasets and then
@@ -1214,6 +1215,9 @@ def calculate_model_statistics(
             Dataset pertaining to the test data. The default value is None.
         json_path : str or Path, optional
             Location for the output JSON files. The default value is None.
+        target_type: str, optional
+            Type of target the model is trying to find. Currently supports "classification"
+            and "prediction" types. The default value is "classification".
 
         Returns
         -------
@@ -1260,18 +1264,26 @@ def calculate_model_statistics(
                 data,
                 casout={"name": "assess_dataset", "replace": True, "caslib": "Public"},
             )
-
-            conn.percentile.assess(
-                table={"name": "assess_dataset", "caslib": "Public"},
-                response="predict",
-                pVar="predict_proba",
-                event=str(target_value),
-                pEvent=str(prob_value) if prob_value else str(0.5),
-                inputs="actual",
-                fitStatOut={"name": "FitStat", "replace": True, "caslib": "Public"},
-                rocOut={"name": "ROC", "replace": True, "caslib": "Public"},
-                casout={"name": "Lift", "replace": True, "caslib": "Public"},
-            )
+            if target_type == 'classification':
+                conn.percentile.assess(
+                    table={"name": "assess_dataset", "caslib": "Public"},
+                    response="predict",
+                    pVar="predict_proba",
+                    event=str(target_value),
+                    pEvent=str(prob_value) if prob_value else str(0.5),
+                    inputs="actual",
+                    fitStatOut={"name": "FitStat", "replace": True, "caslib": "Public"},
+                    rocOut={"name": "ROC", "replace": True, "caslib": "Public"},
+                    casout={"name": "Lift", "replace": True, "caslib": "Public"},
+                )
+            else:
+                conn.percentile.assess(
+                    table={"name": "assess_dataset", "caslib": "Public"},
+                    response="predict",
+                    inputs="actual",
+                    fitStatOut={"name": "FitStat", "replace": True, "caslib": "Public"},
+                    casout={"name": "Lift", "replace": True, "caslib": "Public"}
+                )
 
             fitstat_dict = (
                 pd.DataFrame(conn.CASTable("FitStat", caslib="Public").to_frame())
@@ -1280,11 +1292,11 @@ def calculate_model_statistics(
                 .to_dict()
             )
             json_dict[0]["data"][i]["dataMap"].update(fitstat_dict)
-
-            roc_df = pd.DataFrame(conn.CASTable("ROC", caslib="Public").to_frame())
-            roc_dict = cls.apply_dataframe_to_json(json_dict[1]["data"], i, roc_df)
-            for j in range(len(roc_dict)):
-                json_dict[1]["data"][j].update(roc_dict[j])
+            if target_type == 'classification':
+                roc_df = pd.DataFrame(conn.CASTable("ROC", caslib="Public").to_frame())
+                roc_dict = cls.apply_dataframe_to_json(json_dict[1]["data"], i, roc_df)
+                for j in range(len(roc_dict)):
+                    json_dict[1]["data"][j].update(roc_dict[j])
 
             lift_df = pd.DataFrame(conn.CASTable("Lift", caslib="Public").to_frame())
             lift_dict = cls.apply_dataframe_to_json(json_dict[2]["data"], i, lift_df, 1)
@@ -1293,19 +1305,26 @@ def calculate_model_statistics(
 
         if json_path:
             for i, name in enumerate([FITSTAT, ROC, LIFT]):
-                with open(Path(json_path) / name, "w") as json_file:
-                    json_file.write(json.dumps(json_dict[i], indent=4, cls=NpEncoder))
-                if cls.notebook_output:
-                    print(
-                        f"{name} was successfully written and saved to "
-                        f"{Path(json_path) / name}"
-                    )
+                if not (name == ROC and target_type == "prediction"):
+                    with open(Path(json_path) / name, "w") as json_file:
+                        json_file.write(json.dumps(json_dict[i], indent=4, cls=NpEncoder))
+                    if cls.notebook_output:
+                        print(
+                            f"{name} was successfully written and saved to "
+                            f"{Path(json_path) / name}"
+                        )
         else:
-            return {
-                FITSTAT: json.dumps(json_dict[0], indent=4, cls=NpEncoder),
-                ROC: json.dumps(json_dict[1], indent=4, cls=NpEncoder),
-                LIFT: json.dumps(json_dict[2], indent=4, cls=NpEncoder),
-            }
+            if target_type == 'classification':
+                return {
+                    FITSTAT: json.dumps(json_dict[0], indent=4, cls=NpEncoder),
+                    ROC: json.dumps(json_dict[1], indent=4, cls=NpEncoder),
+                    LIFT: json.dumps(json_dict[2], indent=4, cls=NpEncoder),
+                }
+            else:
+                return {
+                    FITSTAT: json.dumps(json_dict[0], indent=4, cls=NpEncoder),
+                    LIFT: json.dumps(json_dict[2], indent=4, cls=NpEncoder),
+                }
 
     @staticmethod
     def check_for_data(
@@ -2208,11 +2227,11 @@ def generate_model_card(
         algorithm: str,
         train_data: pd.DataFrame,
         train_predictions: Union[pd.Series, list],
-        target_type: str = "interval",
+        target_type: str = "classificaiton",
         target_value: Union[str, int, float, None] = None,
         interval_vars: Optional[list] = [],
         class_vars: Optional[list] = [],
-        selection_statistic: str = "_GINI_",
+        selection_statistic: str = None,
         server: str = "cas-shared-default",
         caslib: str = "Public",
     ):
@@ -2237,19 +2256,22 @@ def generate_model_card(
         train_predictions : pandas.Series, list
             List of predictions made by the model on the training data.
         target_type : string
-            Type the model is targeting. Currently supports "classification" and "interval" types. 
-            The default value is "Interval".
+            Type of target the model is trying to find. Currently supports "classification" and "prediction" types.
+            The default value is "classification".
         target_value : string, int, float, optional
             Value the model is targeting for classification models. This argument is not needed for
-            Interval models. The default value is None.
+            prediction models. The default value is None.
         interval_vars : list, optional
             A list of interval variables. The default value is an empty list.
         class_vars : list, optional
             A list of classification variables. The default value is an empty list.
         selection_statistic: str, optional
-            The selection statistic chosen to score the model against other models. Can be any of the 
-            following values: "_RASE_", "_NObs_", "_GINI_", "_GAMMA_", "_MCE_", "_ASE_", "_MCLL_",
-            "_KS_", "_KSPostCutoff_", "_DIV_", "_TAU_", "_KSCut_", or "_C_". The default value is "_GINI_".
+            The selection statistic chosen to score the model against other models. Classification
+            models can take any of the following values: "_RASE_", "_GINI_", "_GAMMA_", "_MCE_", 
+            "_ASE_", "_MCLL_", "_KS_", "_KSPostCutoff_", "_DIV_", "_TAU_", "_KSCut_", or "_C_". 
+            Prediction models can take any of the following values: "_ASE_", "_DIV_", "_RASE_", "_MAE_", 
+            "_RMAE_", "_MSLE_", "_RMSLE_" The default value is "_KS_" for classification models and 
+            "_ASE_" for prediction models.
         server: str, optional
             The CAS server the training data will be stored on. The default value is "cas-shared-default"
         caslib: str, optional
@@ -2260,10 +2282,15 @@ def generate_model_card(
                 "For the model card data to be properly generated on a classification "
                 "model, a target value is required."
             )
-        if target_type not in ["classification", "interval"]:
+        if target_type not in ["classification", "prediction"]:
             raise RuntimeError(
-                "Only classification and interval target types are currently accepted."
+                "Only classification and prediction target types are currently accepted."
             )
+        if selection_statistic is None:
+            if target_type is 'classification':
+                selection_statistic = '_KS_'
+            elif target_type is 'prediction':
+                selection_statistic = "_ASE_"
         if selection_statistic not in cls.valid_params:
             raise RuntimeError(
                 "The selection statistic must be a value generated in dmcas_fitstat.json. See "
@@ -2292,7 +2319,7 @@ def generate_model_card(
         )
 
         # Generates the event percentage for Classification targets, and the event average
-        # for Interval targets
+        # for prediction targets
         update_dict = cls.generate_outcome_average(
             train_data=train_data,
             input_variables=interval_vars + class_vars,
@@ -2373,7 +2400,7 @@ def generate_outcome_average(
         target_value: Union[str, int, float] = None
     ):
         """
-        Generates the outcome average of the training data. For Interval targets, the event average
+        Generates the outcome average of the training data. For prediction targets, the event average
         is generated. For Classification targets, the event percentage is returned.
 
         Parameters
@@ -2385,10 +2412,10 @@ def generate_outcome_average(
         input_variables: list
             A list of all input variables used by the model. Used to isolate the output variable.
         target_type : string
-            Type the model is targeting. Currently supports "Classification" and "Interval" types.
+            Type the model is targeting. Currently supports "classification" and "prediction" types.
         target_value : string, int, float, optional
             Value the model is targeting for Classification models. This argument is not needed for
-            Interval models. The default value is None.
+            prediction models. The default value is None.
 
         Returns
         -------
@@ -2400,7 +2427,7 @@ def generate_outcome_average(
         if target_type == "classification":
             value_counts = output_var[output_var.columns[0]].value_counts()
             return {'eventPercentage': value_counts[target_value]/sum(value_counts)}
-        elif target_type == "interval":
+        elif target_type == "prediction":
             if not isinstance(output_var[output_var.columns[0]].iloc[0], numbers.Number):
                 raise ValueError("Detected output column is not numeric. Please ensure that " +
                                  "the correct output column is being passed, and that no extra columns " +
@@ -2515,7 +2542,7 @@ def generate_variable_importance(
         model_files: Union[str, Path, dict],
         train_data: pd.DataFrame,
         train_predictions: Union[pd.Series, list],
-        target_type: str = "interval",
+        target_type: str = "classification",
         interval_vars: Optional[list] = [],
         class_vars: Optional[list] = [],
         caslib: str = "Public",
@@ -2535,8 +2562,8 @@ def generate_variable_importance(
         train_predictions : pandas.Series, list
             List of predictions made by the model on the training data.
         target_type : string, optional
-            Type the model is targeting. Currently supports "Classification" and "Interval" types.
-            The default value is "Interval".
+            Type the model is targeting. Currently supports "classification" and "prediction" types.
+            The default value is "classification".
         interval_vars : list, optional
             A list of interval variables. The default value is an empty list.
         class_vars : list, optional
@@ -2564,7 +2591,7 @@ def generate_variable_importance(
             treeCrit = 'RSS'
         else:
             raise RuntimeError(
-                "The selected model type is unsupported. Currently, only models that have interval or classification target types are supported."
+                "The selected model type is unsupported. Currently, only models that have prediction or classification target types are supported."
             )
         request_packages = list()
         if interval_vars:

From 7b14935a7545e0a84aca143c8222d46ef50ead94 Mon Sep 17 00:00:00 2001
From: djm21 <david.moore@sas.com>
Date: Tue, 26 Mar 2024 01:06:20 -0400
Subject: [PATCH 6/9] Update pzmm_binary_classification_model_import notebook
 to include model card generation

---
 ...m_binary_classification_model_import.ipynb | 20 ++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/examples/pzmm_binary_classification_model_import.ipynb b/examples/pzmm_binary_classification_model_import.ipynb
index e540b703..458b5014 100644
--- a/examples/pzmm_binary_classification_model_import.ipynb
+++ b/examples/pzmm_binary_classification_model_import.ipynb
@@ -740,7 +740,7 @@
    ],
    "source": [
     "import getpass\n",
-    "def write_model_stats(x_train, y_train, test_predict, test_proba, y_test, model, path):\n",
+    "def write_model_stats(x_train, y_train, test_predict, test_proba, y_test, model, path, prefix):\n",
     "    # Calculate train predictions\n",
     "    train_predict = model.predict(x_train)\n",
     "    train_proba = model.predict_proba(x_train)\n",
@@ -757,6 +757,20 @@
     "        test_data=test_data, \n",
     "        json_path=path\n",
     "    )\n",
+    "\n",
+    "    full_training_data = pd.concat([y_train.reset_index(drop=True), x_train.reset_index(drop=True)], axis=1)\n",
+    "\n",
+    "    pzmm.JSONFiles.generate_model_card(\n",
+    "        model_prefix=prefix,\n",
+    "        model_files = path,\n",
+    "        algorithm = str(type(model).__name__),\n",
+    "        train_data = full_training_data,\n",
+    "        train_predictions=train_predict,\n",
+    "        target_type='classification',\n",
+    "        target_value=1,\n",
+    "        interval_vars=predictor_columns,\n",
+    "        selection_statistic='_RASE_',\n",
+    "    )\n",
     "        \n",
     "username = getpass.getpass()\n",
     "password = getpass.getpass()\n",
@@ -766,8 +780,8 @@
     "\n",
     "test_predict = [y_dtc_predict, y_rfc_predict, y_gbc_predict]\n",
     "test_proba = [y_dtc_proba, y_rfc_proba, y_gbc_proba]\n",
-    "for (mod, pred, proba, path) in zip(model, test_predict, test_proba, zip_folder):\n",
-    "    write_model_stats(x_train, y_train, pred, proba, y_test, mod, path)"
+    "for (mod, pred, proba, path, prefix) in zip(model, test_predict, test_proba, zip_folder, model_prefix):\n",
+    "    write_model_stats(x_train, y_train, pred, proba, y_test, mod, path, prefix)"
    ]
   },
   {

From e7c37918169909dc2b6819534462730fa6a1613f Mon Sep 17 00:00:00 2001
From: djm21 <david.moore@sas.com>
Date: Mon, 8 Apr 2024 16:34:25 -0500
Subject: [PATCH 7/9] added dmcas_misc file generation capability

---
 src/sasctl/pzmm/write_json_files.py | 81 ++++++++++++++++++++++++++++-
 1 file changed, 80 insertions(+), 1 deletion(-)

diff --git a/src/sasctl/pzmm/write_json_files.py b/src/sasctl/pzmm/write_json_files.py
index 1d00bd0e..04da0260 100644
--- a/src/sasctl/pzmm/write_json_files.py
+++ b/src/sasctl/pzmm/write_json_files.py
@@ -58,6 +58,7 @@ class NpEncoder(json.JSONEncoder):
 MAXDIFFERENCES = "maxDifferences.json"
 GROUPMETRICS = "groupMetrics.json"
 VARIMPORTANCES = 'dmcas_relativeimportance.json'
+MISC = 'dmcas_misc.json'
 
 
 def _flatten(nested_list: Iterable) -> Generator[Any, None, None]:
@@ -1174,7 +1175,8 @@ def calculate_model_statistics(
         train_data: Union[DataFrame, List[list], Type["numpy.array"]] = None,
         test_data: Union[DataFrame, List[list], Type["numpy.array"]] = None,
         json_path: Union[str, Path, None] = None,
-        target_type: str = "classification"
+        target_type: str = "classification", 
+        cutoff: Optional[float] = None
     ) -> Union[dict, None]:
         """
         Calculates fit statistics (including ROC and Lift curves) from datasets and then
@@ -2345,6 +2347,12 @@ def generate_model_card(
             class_vars,
             caslib
         )
+
+        # Generates dmcas_misc.json file
+        cls.generate_misc(
+            conn,
+            model_files
+        )
         
     @staticmethod
     def upload_training_data(
@@ -2675,4 +2683,75 @@ def generate_variable_importance(
                 print(
                     f"{VARIMPORTANCES} was successfully written and saved to "
                     f"{Path(model_files) / VARIMPORTANCES}"
+                )
+    
+    @classmethod
+    def generate_misc(
+            cls,
+            model_files: Union[str, Path, dict]
+    ):
+        """
+        Generates the dmcas_relativeimportance.json file, which is used to determine variable importance
+
+        Parameters
+        ----------
+        conn
+            A SWAT connection used to connect to the user's CAS server
+        model_files : string, Path, or dict
+            Either the directory location of the model files (string or Path object), or
+            a dictionary containing the contents of all the model files.
+        """
+        if isinstance(model_files, dict):
+            if ROC not in model_files:
+                raise RuntimeError(
+                    "The ModelProperties.json file must be generated before the model card data "
+                    "can be generated."
+                    )
+            roc_table = model_files[ROC]
+        else:
+            if not Path.exists(Path(model_files) / ROC):
+                raise RuntimeError(
+                    "The ModelProperties.json file must be generated before the model card data "
+                    "can be generated."
+                )
+            with open(Path(model_files) / ROC, 'r') as roc_file:
+                roc_table = json.load(roc_file)
+        correct_text = ["CORRECT", "INCORRECT", "CORRECT", "INCORRECT"]
+        outcome_values = ['1', '0', '0', '1']
+        misc_data = list()
+        # Iterates through ROC table to get TRAIN, TEST, and VALIDATE data with a cutoff of .5
+        for i in range(50, 300, 100):
+            roc_data = roc_table['data'][i]['dataMap']
+            correctness_values = [roc_data['_TP_'], roc_data['_FP_'], roc_data['_TN_'], roc_data['_FN_']]
+            for (c_text, c_val, o_val) in zip(correct_text, correctness_values, outcome_values):
+                misc_data.append({
+                    "CorrectText": c_text,
+                    "Outcome": o_val,
+                    "_Count_": c_val,
+                    "_DataRole_": roc_data['_DataRole_'],
+                    "_cutoffSource_": "Default",
+                    "_cutoff_": "0.5"
+                })
+        
+        json_template_path = (
+            Path(__file__).resolve().parent / f"template_files/{MISC}"
+        )
+        with open(json_template_path, 'r') as f:
+            misc_json = json.load(f)
+        misc_json['data'] = misc_data
+
+        if isinstance(model_files, dict):
+            model_files[MISC] = json.dumps(misc_json, indent=4, cls=NpEncoder)
+            if cls.notebook_output:
+                print(
+                    f"{MISC} was successfully written and saved to "
+                    f"model files dictionary."
+                )
+        else:
+            with open(Path(model_files) / MISC, 'w') as json_file:
+                json_file.write(json.dumps(misc_json, indent=4, cls=NpEncoder))
+            if cls.notebook_output:
+                print(
+                    f"{MISC} was successfully written and saved to "
+                    f"{Path(model_files) / MISC}"
                 )
\ No newline at end of file

From 4b7ebca1ff87babd624a0dfbad8638153d1d0936 Mon Sep 17 00:00:00 2001
From: djm21 <david.moore@sas.com>
Date: Tue, 9 Apr 2024 14:32:04 -0500
Subject: [PATCH 8/9] fixed formatting for misc json

---
 src/sasctl/pzmm/write_json_files.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/sasctl/pzmm/write_json_files.py b/src/sasctl/pzmm/write_json_files.py
index 04da0260..15f9700a 100644
--- a/src/sasctl/pzmm/write_json_files.py
+++ b/src/sasctl/pzmm/write_json_files.py
@@ -2724,14 +2724,18 @@ def generate_misc(
             roc_data = roc_table['data'][i]['dataMap']
             correctness_values = [roc_data['_TP_'], roc_data['_FP_'], roc_data['_TN_'], roc_data['_FN_']]
             for (c_text, c_val, o_val) in zip(correct_text, correctness_values, outcome_values):
-                misc_data.append({
-                    "CorrectText": c_text,
-                    "Outcome": o_val,
-                    "_Count_": c_val,
-                    "_DataRole_": roc_data['_DataRole_'],
-                    "_cutoffSource_": "Default",
-                    "_cutoff_": "0.5"
-                })
+                misc_data.append(
+                    {
+                        "dataMap": {
+                            "CorrectText": c_text,
+                            "Outcome": o_val,
+                            "_Count_": c_val,
+                            "_DataRole_": roc_data['_DataRole_'],
+                            "_cutoffSource_": "Default",
+                            "_cutoff_": "0.5"
+                        },
+                        "rowNumber": len(misc_data) + 1
+                    })
         
         json_template_path = (
             Path(__file__).resolve().parent / f"template_files/{MISC}"

From 48d2ab5df54d2e94c2e7de62df51b0ece823baa8 Mon Sep 17 00:00:00 2001
From: djm21 <david.moore@sas.com>
Date: Tue, 9 Apr 2024 14:47:22 -0500
Subject: [PATCH 9/9] Added changes to fitstat for classification models

---
 src/sasctl/pzmm/write_json_files.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/sasctl/pzmm/write_json_files.py b/src/sasctl/pzmm/write_json_files.py
index 15f9700a..fdcd7334 100644
--- a/src/sasctl/pzmm/write_json_files.py
+++ b/src/sasctl/pzmm/write_json_files.py
@@ -1299,6 +1299,16 @@ def calculate_model_statistics(
                 roc_dict = cls.apply_dataframe_to_json(json_dict[1]["data"], i, roc_df)
                 for j in range(len(roc_dict)):
                     json_dict[1]["data"][j].update(roc_dict[j])
+                    if(roc_dict[j]["dataMap"]["_KS_"] == 1):
+                        fitstat_data = {
+                            "_KS_": roc_dict[j]["dataMap"]["_KS_"],
+                            "_KS2_": roc_dict[j]["dataMap"]["_KS2_"],
+                            "_C_": roc_dict[j]["dataMap"]["_C_"],
+                            "_Gini_": roc_dict[j]["dataMap"]["_Gini_"],
+                            "_Gamma_": roc_dict[j]["dataMap"]["_Gamma_"],
+                            "_Tau_": roc_dict[j]["dataMap"]["_Tau_"]
+                        }
+                    json_dict[0]["data"][i]["dataMap"].update(fitstat_data)
 
             lift_df = pd.DataFrame(conn.CASTable("Lift", caslib="Public").to_frame())
             lift_dict = cls.apply_dataframe_to_json(json_dict[2]["data"], i, lift_df, 1)