diff --git a/doc/conf.py b/doc/conf.py index 80ec7e7b..5fef0d6f 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -44,23 +44,32 @@ "sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx.ext.viewcode", - "numpydoc", # "sphinx.ext.napoleon", + "sphinx.ext.napoleon", "sphinx.ext.intersphinx", "sphinx.ext.todo", ] intersphinx_mapping = { - "python": ("https://docs.python.org/3", None), + "python": ("https://docs.python.org/3/", None), + "pandas": ("https://pandas.pydata.org/docs/", None), + "sklearn": ("https://scikit-learn.org/stable/", None), + "numpy": ("https://numpy.org/doc/stable/", None), "swat": ("https://sassoftware.github.io/python-swat/", None), "pytest": ("https://docs.pytest.org/en/latest/", None), "betamax": ("https://betamax.readthedocs.io/en/latest/", None), - "requests": ("https://2.python-requests.org/en/master/", None), - "tox": ("https://tox.wiki/en/latest/objects.inv", None), - "flake8": ("https://flake8.pycqa.org/en/latest/objects.inv", None), + "requests": ("https://requests.readthedocs.io/en/latest/", None), + "tox": ("https://tox.wiki/en/latest/", None), + "flake8": ("https://flake8.pycqa.org/en/latest/", None), } autosummary_generate = True +# Napoleon settings +napoleon_google_docstring = False +napoleon_numpy_docstring = True +napoleon_use_ivar = True +napoleon_use_rtype = False + todo_include_todos = True todo_emit_warnings = True @@ -92,6 +101,17 @@ # The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" +# Suppress warnings 'py:class reference target not found: ' +nitpicky = True +nitpick_ignore = [ + ('py:class','optional'), + ('py:class','json.encoder.JSONEncoder'), # I don't understand why it can't find it + ('py:class','Response'), + ('py:class','Request'), + ('py:class','_io.BytesIO'), + ('py:class','sasctl.utils.pymas.ds2.Ds2Variable'), # not sure what is wrong + ('py:class','sasctl._services.service.Service') # should the Service class be documented? +] # -- Options for HTML output ------------------------------------------------- diff --git a/doc/index.rst b/doc/index.rst index c51bb081..32e81555 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -4,11 +4,6 @@ sasctl Version |version| -.. toctree:: - :maxdepth: 3 - :hidden: true - - index Introduction ------------ @@ -313,7 +308,7 @@ Logging +++++++ All logging is handled through the built-in :mod:`logging` module with standard module-level loggers. The one exception -to this is :class:`.Session` request/response logging. Sessions contain a :attr:`~sasctl.core.Session.message_log` which is exclusively used +to this is :class:`.Session` request/response logging. Sessions contain a `message_log` which is exclusively used to record requests and responses made through the session. Message recording can be configured on a per-session basis by updating this logger, or the ``sasctl.core.session`` logger can be configured to control all message recording by all sessions. @@ -497,7 +492,7 @@ Python versions. Useful Tox Commands +++++++++++++++++++ -:mod:`tox` is used to automate common development tasks such as testing, linting, and building documentation. +:mod:`tox` is used to automate common development tasks such as testing, linting, and building documentation. Running :program:`tox` from the project root directory will automatically build virtual environments for all Python interpreters found on the system and then install the required packages necessary to perform a given task. The simplest way to run Tox is: diff --git a/src/sasctl/_services/cas_management.py b/src/sasctl/_services/cas_management.py index b8e44e91..90d4108f 100644 --- a/src/sasctl/_services/cas_management.py +++ b/src/sasctl/_services/cas_management.py @@ -29,9 +29,10 @@ def check_keys(valid_keys: list, input_keys: list, parameters: str): String describing the type of parameters that are being tested. - Returns - ------- - raises ValueError if input_keys are not valid + Raises + ------ + ValueError + if input_keys are not valid """ if not all(key in valid_keys for key in input_keys): raise ValueError( @@ -55,10 +56,12 @@ def check_required_key( String describing the type of parameters that are being tested. - Returns - ------- - raises ValueError if required_key is not present. - raises TypeError if required_key is neither a list or a string. + Raises + ------ + ValueError + if required_key is not present. + TypeError + if required_key is neither a list or a string. """ if isinstance(required_key, str): if required_key not in input_keys: @@ -225,7 +228,7 @@ def get_caslib(cls, name: str, server: str = None): Returns ------- - RestObj + RestObj or None """ server = server or DEFAULT_SERVER @@ -278,7 +281,7 @@ def get_table(cls, name: str, caslib: Union[str, dict] = None, server: str = Non Returns ------- - RestObj + RestObj or None """ caslib = caslib or DEFAULT_CASLIB @@ -309,7 +312,7 @@ def upload_file( Parameters ---------- - file : str or file-like object + file : str or typing.TextIO File containing data to upload or path to the file. name : str Name of the table to create @@ -321,7 +324,8 @@ def upload_file( header : bool, optional Whether the first row of data contains column headers. Defaults to True. - format_ : {"csv", "xls", "xlsx", "sas7bdat", "sashdat"}, optional + format_ : str, optional + Choose from {"csv", "xls", "xlsx", "sas7bdat", "sashdat"}. File of input `file`. Not required if format can be discerned from the file path. detail : dict, optional @@ -637,6 +641,11 @@ def del_table( Returns ------- RestObj + + Raises + ------ + ValueError + If `query_params` is empty """ server = server or DEFAULT_SERVER diff --git a/src/sasctl/_services/concepts.py b/src/sasctl/_services/concepts.py index f5ed2685..a74d3ef0 100644 --- a/src/sasctl/_services/concepts.py +++ b/src/sasctl/_services/concepts.py @@ -37,7 +37,7 @@ def assign_concepts( Parameters ---------- - documents : str or dict or list_like: + documents : str or dict or list Documents to analyze. May be either the URI to a CAS table where the documents are currently stored, or an iterable of strings containing the documents' text. @@ -55,8 +55,9 @@ def assign_concepts( model output_postfix : str, optional Text to be added to the end of all output table names. - match_type : {'all', 'longest', 'best'}, optional - Type of matches to return. Defaults to 'all'. + match_type : str, optional + Choose from ``{'all', 'longest', 'best'}``. + Type of matches to return. Defaults to 'all'. enable_facts : bool, optional Whether to enable facts in the results. Defaults to False. language : str, optional @@ -71,8 +72,8 @@ def assign_concepts( See Also -------- - cas_management.get_caslib - cas_management.get_table + .cas_management.CASManagement.get_caslib + .cas_management.CASManagement.get_table """ if documents is None: diff --git a/src/sasctl/_services/data_sources.py b/src/sasctl/_services/data_sources.py index 71789bff..92140952 100644 --- a/src/sasctl/_services/data_sources.py +++ b/src/sasctl/_services/data_sources.py @@ -227,7 +227,7 @@ def table_uri(cls, table): Parameters ---------- - table : dict or CASTable + table : dict or swat.cas.table.CASTable Returns ------- diff --git a/src/sasctl/_services/files.py b/src/sasctl/_services/files.py index fcce2183..f439d9d5 100644 --- a/src/sasctl/_services/files.py +++ b/src/sasctl/_services/files.py @@ -41,13 +41,13 @@ def create_file(cls, file, folder=None, filename=None, expiration=None): Parameters ---------- - file : str, pathlib.Path, or file_like + file : str, pathlib.Path, or typing.BinaryIO Path to the file to upload or a file-like object. folder : str or dict, optional - Name, or, or folder information as returned by :func:`.get_folder`. + Name or folder information as returned by :meth:`.get_folder`. filename : str, optional Name to assign to the uploaded file. Defaults to the filename if `file` is a path, otherwise required. - expiration : datetime, optional + expiration : datetime.datetime, optional A timestamp that indicates when to expire the file. Defaults to no expiration. Returns @@ -97,7 +97,8 @@ def get_file_content(cls, file): Returns ------- - content + str + content """ file = cls.get_file(file) diff --git a/src/sasctl/_services/folders.py b/src/sasctl/_services/folders.py index eb33cc63..03cf7150 100644 --- a/src/sasctl/_services/folders.py +++ b/src/sasctl/_services/folders.py @@ -71,11 +71,12 @@ def get_folder(cls, folder, refresh=False): ---------- folder : str or dict May be one of: - - folder name - - folder ID - - folder path - - folder delegate string - - dictionary representation of the folder + + - folder name + - folder ID + - folder path + - folder delegate string + - dictionary representation of the folder refresh : bool, optional Obtain an updated copy of the folder. diff --git a/src/sasctl/_services/microanalytic_score.py b/src/sasctl/_services/microanalytic_score.py index 6ce2b6b9..08d04a35 100644 --- a/src/sasctl/_services/microanalytic_score.py +++ b/src/sasctl/_services/microanalytic_score.py @@ -102,12 +102,12 @@ def execute_module_step(cls, module, step, return_dict=True, **kwargs): return_dict : bool, optional Whether the results should be returned as a dictionary instead of a tuple - kwargs : any + **kwargs Passed as arguments to the module step Returns ------- - any + Any Results of the step execution. Returned as a dictionary if `return_dict` is True, otherwise returned as a tuple if more than one value is returned, otherwise the single value. @@ -180,8 +180,10 @@ def create_module( name : str description : str source : str - language : str { 'python', 'ds2' } - scope : str { 'public', 'private' } + language : str + Choose from ``{'python','ds2'}`` + scope : str + Choose from ``{'public','private'}`` Returns ------- diff --git a/src/sasctl/_services/model_management.py b/src/sasctl/_services/model_management.py index c90a2660..e91b1aa6 100644 --- a/src/sasctl/_services/model_management.py +++ b/src/sasctl/_services/model_management.py @@ -448,16 +448,16 @@ def create_custom_kpi( model : str or dict The name or id of the model, or a dictionary representation of the model. project : str or dict - The name or id of the project, or a dictionary representation of - the project. + The name or id of the project, or a dictionary representation of + the project. timeLabel : str or list Label associated with the dataset used within the performance definition. kpiName : str or list Name of the custom KPI. kpiValue : int or float or list Value of the custom KPI. - timeSK : int or list, by default None - Indicator for the MM_STD_KPI table to denote performance task order. + timeSK : int or list + Default is None. Indicator for the MM_STD_KPI table to denote performance task order. """ from .model_repository import ModelRepository diff --git a/src/sasctl/_services/model_publish.py b/src/sasctl/_services/model_publish.py index 5ad3cae2..c3fa225f 100644 --- a/src/sasctl/_services/model_publish.py +++ b/src/sasctl/_services/model_publish.py @@ -230,7 +230,8 @@ def create_destination( ---------- name : str Name of the publishing destination. - type_ : {'cas', 'mas', 'hadoop', 'teradata'} + type_ : str + Choose from ``{'cas', 'mas', 'hadoop', 'teradata'}`` Type of publishing definition being created cas_server : str, optional Name of the CAS server. Defaults to 'cas-shared-default'. diff --git a/src/sasctl/_services/model_repository.py b/src/sasctl/_services/model_repository.py index 4788a706..dfbbb95d 100644 --- a/src/sasctl/_services/model_repository.py +++ b/src/sasctl/_services/model_repository.py @@ -65,7 +65,7 @@ def get_astore(cls, model): Returns ---------- - binary? + BinaryIO """ # TODO: Download binary object? @@ -296,10 +296,10 @@ def create_model( properties : dict, optional Custom model properties provided as name: value pairs. Allowed types are int, float, string, datetime.date, and datetime.datetime - input_variables : array_like, optional + input_variables : Iterable, optional Model input variables. By default, these are the same as the model project. - output_variables : array_like, optional + output_variables : Iterable, optional Model output variables. By default, these are the same as the model project. project_version : str @@ -817,10 +817,10 @@ def list_project_versions(cls, project): list of dicts List of dicts representing different project versions. Dict key/value pairs are as follows. - name : str - id : str - number : str - modified : datetime + - name : str + - id : str + - number : str + - modified : datetime """ project_info = cls.get_project(project) diff --git a/src/sasctl/_services/saslogon.py b/src/sasctl/_services/saslogon.py index 807ffdda..64dad1f5 100644 --- a/src/sasctl/_services/saslogon.py +++ b/src/sasctl/_services/saslogon.py @@ -46,7 +46,7 @@ def create_client( The ID to be assigned to the client. client_secret : str The client secret used for authentication. - scopes : list of string, optional + scopes : list of str, optional Specifies the levels of access that the client will be able to obtain on behalf of users when not using client credential authentication. If `allow_password` or `allow_auth_code` are @@ -160,7 +160,7 @@ def list_clients(cls, start_index=None, count=None, descending=False): ---------- start_index : int, optional Index of first client to return. Defaults to 1. - count : int, optiona; + count : int, optional Number of clients to retrieve. Defaults to 100. descending : bool, optional Whether to clients should be returned in descending order. diff --git a/src/sasctl/_services/sentiment_analysis.py b/src/sasctl/_services/sentiment_analysis.py index a8d72a8b..e228aab3 100644 --- a/src/sasctl/_services/sentiment_analysis.py +++ b/src/sasctl/_services/sentiment_analysis.py @@ -37,7 +37,7 @@ def analyze_sentiment( Parameters ---------- - documents : str or dict or list_like: + documents : str or dict or Iterable Documents to analyze. May be either the URI to a CAS table where the documents are currently stored, or an iterable of strings containing the documents' text. @@ -67,8 +67,8 @@ def analyze_sentiment( See Also -------- - cas_management.get_caslib - cas_management.get_table + .cas_management.CASManagement.get_caslib + .cas_management.CASManagement.get_table """ if current_session().version_info() >= 4: diff --git a/src/sasctl/_services/service.py b/src/sasctl/_services/service.py index 7958f201..b1fd1497 100644 --- a/src/sasctl/_services/service.py +++ b/src/sasctl/_services/service.py @@ -78,7 +78,7 @@ def request(cls, verb, path, session=None, format_="auto", **kwargs): text: Response.text auto: `RestObj` constructed from JSON if possible, otherwise same as `text`. - kwargs : any + **kwargs Additional arguments are passed to the session `request` method. Returns @@ -369,7 +369,7 @@ def _get_rel(cls, item, rel, *args, func=None, filter_=None): ---------- item : str or dict rel : str - args : any + *args Passed to `func` func : function, optional Callable that takes (item, *args) and returns a RestObj of `item` @@ -408,7 +408,7 @@ def _monitor_job(cls, job, max_retries=60): Returns ------- - job + job : dict Raises ------ diff --git a/src/sasctl/_services/text_categorization.py b/src/sasctl/_services/text_categorization.py index de4cd9cf..80cc2330 100644 --- a/src/sasctl/_services/text_categorization.py +++ b/src/sasctl/_services/text_categorization.py @@ -31,7 +31,7 @@ def categorize( Parameters ---------- - documents : str or dict or list_like: + documents : str or dict or Iterable Documents to parse. May be either the URI to a CAS table where the documents are currently stored, or an iterable of strings containing the documents' text. @@ -60,8 +60,8 @@ def categorize( See Also -------- - cas_management.get_caslib - cas_management.get_table + .cas_management.CASManagement.get_caslib + .cas_management.CASManagement.get_table """ if current_session().version_info() >= 4: diff --git a/src/sasctl/_services/text_parsing.py b/src/sasctl/_services/text_parsing.py index 1bb21776..07b4fb26 100644 --- a/src/sasctl/_services/text_parsing.py +++ b/src/sasctl/_services/text_parsing.py @@ -16,10 +16,10 @@ class TextParsing(Service): Parsing is a key operation in understanding your data. Parsing a document involves the following analyses: - - Identifying terms used in the document - - Recognizing parts of speech for each term - - Identifying which terms are entities (person, country, and so on) - - Resolving synonyms, misspellings, and so on + - Identifying terms used in the document + - Recognizing parts of speech for each term + - Identifying which terms are entities (person, country, and so on) + - Resolving synonyms, misspellings, and so on The output tables that are generated during parsing can also be used in downstream analyses such as topic generation. @@ -56,7 +56,7 @@ def parse_documents( Parameters ---------- - documents : str or dict or list_like: + documents : str or dict or Iterable Documents to parse. May be either the URI to a CAS table where the documents are currently stored, or an iterable of strings containing the documents' text. @@ -97,8 +97,8 @@ def parse_documents( See Also -------- - cas_management.get_caslib - cas_management.get_table + .cas_management.CASManagement.get_caslib + .cas_management.CASManagement.get_table """ if current_session().version_info() >= 4: diff --git a/src/sasctl/core.py b/src/sasctl/core.py index b5b69435..c8fc3f7a 100644 --- a/src/sasctl/core.py +++ b/src/sasctl/core.py @@ -113,13 +113,13 @@ def current_session(*args, **kwargs): If call with no arguments, the current session instance is returned, or None if no session has been created yet. If called with an existing session instance, that session will be set as the default. Otherwise, a new - `Session` is instantiated using the provided arguments and set as the + :class:`Session` is instantiated using the provided arguments and set as the default. Parameters ---------- - args : any - kwargs : any + *args + **kwargs Returns ------- @@ -237,19 +237,20 @@ class Session(requests.Session): Parameters ---------- - hostname : str or swat.CAS - Name of the server to connect to or an established swat.CAS session. + hostname : str or swat.cas.connection.CAS + Name of the server to connect to or an established CAS session. username : str, optional Username for authentication. Not required if `host` is a CAS connection, if Kerberos is used, or if `token` is provided. If using Kerberos and an explicit - username is desired, maybe be a string in 'user@REALM' format. + username is desired, maybe be a string in `'user@REALM'` format. password : str, optional Password for authentication. Not required when `host` is a CAS connection, `authinfo` is provided, `token` is provided, or Kerberos is used. authinfo : str, optional Path to a .authinfo or .netrc file from which credentials should be pulled. - protocol : {'http', 'https'} + protocol : str + Choose from ``{'http', 'https'}``. Whether to use HTTP or HTTPS connections. Defaults to `https`. port : int, optional Port number for the connection if a non-standard port is used. @@ -261,7 +262,7 @@ class Session(requests.Session): OAuth token to use for authorization. client_id : str, optional Client ID requesting access. Use if connection to Viya should be - made using `client_credentials` method. + made using "client_credentials" method. client_secret : str, optional Client secret for client requesting access. Required if `client_id` is provided. @@ -271,9 +272,9 @@ class Session(requests.Session): ---------- message_log : logging.Logger A log to which all REST request and response messages will be sent. Attach a handler using - `add_logger()` to capture these messages. + :meth:`add_logger()` to capture these messages. - filters : list of callable + filters : list of Callable A collection of functions that will be called with each request and response object *prior* to logging the messages, allowing any sensitive information to be removed first. @@ -450,11 +451,11 @@ def add_logger(self, handler, level=None): A Handler instance to use for logging the requests and responses. level : int, optional The logging level to assign to the handler. Ignored if handler's - logging level is already set. Defaults to DEBUG. + logging level is already set. Defaults to :data:`logging.DEBUG`. Returns ------- - handler + logging.Handler .. versionadded:: 1.2.0 @@ -478,7 +479,7 @@ def add_stderr_logger(self, level=None): Parameters ---------- level : int, optional - The logging level of the handler. Defaults to logging.DEBUG + The logging level of the handler. Defaults to :data:`logging.DEBUG` Returns ------- @@ -497,19 +498,19 @@ def as_swat(self, server=None, **kwargs): ---------- server : str, optional The logical name of the CAS server, not the hostname. Defaults to "cas-shared-default". - kwargs : any - Additional arguments to pass to the `swat.CAS` constructor. Can be used to override this method's - default behavior or customize the CAS session. + **kwargs + Additional arguments to pass to the :class:`swat.CAS ` constructor. + Can be used to override this method's default behavior or customize the CAS session. Returns ------- - swat.CAS + swat.cas.connection.CAS An active SWAT connection Raises ------ RuntimeError - If `swat` package is not available. + If :mod:`swat` package is not available. Examples -------- @@ -896,12 +897,12 @@ def _get_authorization_token( This method supports multiple authentication methods: - - an existing OAuth2 token - - password authentication - - client credentials - - Kerberos - - cached tokens (from previous authorization codes) - - authorization code + - an existing OAuth2 token + - password authentication + - client credentials + - Kerberos + - cached tokens (from previous authorization codes) + - authorization code If authentication using client credentials fails because the client_credentials grant type is not allowed the token cache will @@ -1253,10 +1254,11 @@ def _request_token_with_oauth( """Request a token from the SAS SASLogon service. Supports four different flows: - - authenticate with a username & password and receive a token - - authenticate with a client id & secret and receive a token - - provide an authorization code and receive a token - - provide a refresh token and receive a new token + + - authenticate with a username & password and receive a token + - authenticate with a client id & secret and receive a token + - provide an authorization code and receive a token + - provide a refresh token and receive a new token Parameters ---------- @@ -1472,17 +1474,17 @@ class PageIterator: Parameters ---------- obj : RestObj - An instance of `RestObj` containing any initial items and a link to + An instance of :class:`RestObj` containing any initial items and a link to retrieve additional items. session : Session - The `Session` instance to use for requesting additional items. Defaults - to current_session() + The :class:`Session` instance to use for requesting additional items. Defaults + to :meth:`current_session()` threads : int Number of threads allocated to downloading additional pages. Yields ------ - List[RestObj] + list of RestObj Items contained in the current page """ @@ -1586,17 +1588,17 @@ def _request_async(self, start): class PagedItemIterator: """Iterates through a collection that must be "paged" from the server. - Uses `PageIterator` to transparently download pages of items from the server + Uses :class:`PageIterator` to transparently download pages of items from the server as needed. Parameters ---------- obj : RestObj - An instance of `RestObj` containing any initial items and a link to + An instance of :class:`RestObj` containing any initial items and a link to retrieve additional items. session : Session - The `Session` instance to use for requesting additional items. Defaults - to current_session() + The :class:`Session` instance to use for requesting additional items. Defaults + to :meth:`current_session()` threads : int Number of threads allocated to downloading additional items. @@ -1651,11 +1653,11 @@ def __iter__(self): class PagedListIterator: - """Iterates over an instance of PagedList + """Iterates over an instance of :class:`PagedList` Parameters ---------- - l : list-like + l : list """ @@ -1686,11 +1688,11 @@ class PagedList(list): Parameters ---------- obj : RestObj - An instance of `RestObj` containing any initial items and a link to + An instance of :class:`RestObj` containing any initial items and a link to retrieve additional items. session : Session, optional - The `Session` instance to use for requesting additional items. Defaults - to current_session() + The :class:`Session` instance to use for requesting additional items. Defaults + to :meth:`current_session()` threads : int, optional Number of threads allocated to loading additional items. @@ -1783,8 +1785,9 @@ class VersionInfo: Release cadence for Viya 4. Should be one of 'stable' or 'LTS'. release : str, optional Release number for Viya 4. Two formats are currently possible: - - YYYY.R.U where R is the LTS release number in YYYY and U is the updates since R - - YYYY.MM where MM is the month of the release. + + - YYYY.R.U where R is the LTS release number in YYYY and U is the updates since R + - YYYY.MM where MM is the month of the release. """ @@ -1904,8 +1907,8 @@ def get(path, **kwargs): ---------- path : str The path portion of the URL. - kwargs : any - Passed to `request`. + **kwargs + Passed to :meth:`request`. Returns ------- @@ -1928,8 +1931,8 @@ def head(path, **kwargs): ---------- path : str The path portion of the URL. - kwargs : any - Passed to `request`. + **kwargs + Passed to :meth:`request`. Returns ------- @@ -1946,8 +1949,8 @@ def post(path, **kwargs): ---------- path : str The path portion of the URL. - kwargs : any - Passed to `request`. + **kwargs + Passed to :meth:`request`. Returns ------- @@ -1967,8 +1970,8 @@ def put(path, item=None, **kwargs): item : RestObj, optional A existing object to PUT. If provided, ETag and Content-Type headers will automatically be specified. - kwargs : any - Passed to `request`. + **kwargs + Passed to :meth:`request`. Returns ------- @@ -1996,8 +1999,8 @@ def delete(path, **kwargs): ---------- path : str The path portion of the URL. - kwargs : any - Passed to `request`. + **kwargs + Passed to :meth:`request`. Returns ------- @@ -2018,21 +2021,22 @@ def request(verb, path, session=None, format="auto", **kwargs): Path portion of URL to request. session : Session, optional Defaults to `current_session()`. - format : {'auto', 'rest', 'response', 'content', 'json', 'text'} + format : str The format of the return response. Defaults to `auto`. - rest: `RestObj` constructed from JSON. - response: the raw `Response` object. - content: Response.content - json: Response.json() - text: Response.text - auto: `RestObj` constructed from JSON if possible, otherwise same as - `text`. - kwargs : any - Additional arguments are passed to the session `request` method. + + - `rest`: :class:`RestObj` constructed from JSON. + - `response`: the raw :class:`requests.Response` object. + - `content`: :attr:`requests.Response.content` + - `json`: :meth:`requests.Response.json` + - `text`: :attr:`requests.Response.text` + - `auto`: :class:`RestObj` constructed from JSON if possible, otherwise same as `text`. + **kwargs + Additional arguments are passed to the session :meth:`Session.request` method. Returns ------- str, bytes, or requests.Response + """ session = session or current_session() @@ -2110,7 +2114,7 @@ def request_link(obj, rel, **kwargs): ---------- obj : dict rel : str - kwargs : any + **kwargs Passed to :func:`request` Returns @@ -2150,7 +2154,7 @@ def uri_as_str(obj): def _unwrap(json): - """Converts a JSON response to one or more `RestObj` instances. + """Converts a JSON response to one or more :class:`RestObj` instances. If the JSON contains a .items property, only those items are converted and returned. @@ -2364,9 +2368,8 @@ def platform_version(): Returns ------- - string : {'3.5', '4.0'} - - SAS Viya version number + str + SAS Viya version number '3.5' or '4.0' """ warnings.warn( diff --git a/src/sasctl/pzmm/git_integration.py b/src/sasctl/pzmm/git_integration.py index 57a2ee1a..afabb4ef 100644 --- a/src/sasctl/pzmm/git_integration.py +++ b/src/sasctl/pzmm/git_integration.py @@ -51,20 +51,20 @@ def get_zipped_model( Parameters ---------- - model : string or RestObj + model : str or RestObj Model name, UUID, or RestObj which identifies the model. If only the model name is provided, the project name must also be supplied. - git_path : string or Path + git_path : str or pathlib.Path Base directory of the git repository. - project : string or RestObj, optional + project : str or RestObj, optional Project identifier, which is required when only the model name is supplied. Default is None. Returns ------- - model_name : string + model_name : str Name of the model retrieved from SAS Model Manager. - project_name : string + project_name : str Name of the project the model was retrieved from in SAS Model Manager. """ # Find the specified model and pull down the contents in a zip format @@ -162,7 +162,7 @@ def model_exists(project, name, force): Parameters ---------- - project : string or dict + project : str or dict The name or id of the model project, or a dictionary representation of the project. name : str or dict The name of the model. @@ -225,11 +225,11 @@ def pull_viya_model( Parameters ---------- - model : string or RestObj + model : str or RestObj A string or JSON response representing the model to be pulled down - git_path : string or Path + git_path : str or pathlib.Path Base directory of the git repository. - project : string or RestObj, optional + project : str or RestObj, optional A string or JSON response representing the project the model exists in, default is None. """ # Try to pull down the model assuming a UUID or RestObj is provided @@ -289,12 +289,12 @@ def push_git_model( Parameters ---------- - git_path : string or Path + git_path : str or pathlib.Path Base directory of the git repository or path which includes project and model directories. - model_name : string, optional + model_name : str, optional Name of model to be imported, by default None - project_name : string, optional + project_name : str, optional Name of project the model is imported from, by default None project_version : str, optional Name of project version to import model in to. Default @@ -336,13 +336,13 @@ def git_repo_push(cls, git_path, commit_message, remote="origin", branch="main") Parameters ---------- - git_path : string or Path + git_path : str or pathlib.Path Base directory of the git repository. - commit_message : string + commit_message : str Commit message for the new commit remote : str, optional Remote name for the remote repository, by default 'origin' - branch : string + branch : str Branch name for the target pull branch from remote, by default 'main' """ check_git_status() @@ -359,11 +359,11 @@ def git_repo_pull(cls, git_path, remote="origin", branch="main"): Parameters ---------- - git_path : string or Path + git_path : str or pathlib.Path Base directory of the git repository. - remote : string + remote : str Remote name for the remote repository, by default 'origin' - branch : string + branch : str Branch name for the target pull branch from remote, by default 'main' """ check_git_status() @@ -380,9 +380,9 @@ def push_git_project(cls, git_path, project=None): Parameters ---------- - git_path : string or Path + git_path : str or pathlib.Path Base directory of the git repository or the project directory. - project : string or RestObj + project : str or RestObj Project name, UUID, or JSON response from SAS Model Manager. """ # Check to see if provided project argument is a valid project on SAS Model @@ -419,9 +419,9 @@ def pull_mm_project(cls, git_path, project): Parameters ---------- - git_path : string or Path + git_path : str or pathlib.Path Base directory of the git repository. - project : string or RestObj + project : str or RestObj The name or id of the model project, or a RestObj representation of the project. """ diff --git a/src/sasctl/pzmm/import_model.py b/src/sasctl/pzmm/import_model.py index 9c1a39c8..56e50fda 100644 --- a/src/sasctl/pzmm/import_model.py +++ b/src/sasctl/pzmm/import_model.py @@ -72,11 +72,11 @@ def project_exists( project. response : str, dict, or RestObj, optional JSON response of the get_project() call to model repository service. - target_values : list of strings, optional + target_values : list of str, optional A list of target values for the target variable. This argument and the score_metrics argument dictate the handling of the predicted values from the prediction method. The default value is None. - model_files : string, Path, or dict + model_files : str, pathlib.Path, or dict Either the directory location of the model files (string or Path object), or a dictionary containing the contents of all the model files. overwrite_project_properties : bool, optional @@ -230,20 +230,21 @@ def import_model( The following are generated by this function if a path is provided in the model_files argument: - * '*Score.py' - The Python score code file for the model. - * '*.zip' - The zip archive of the relevant model files. In Viya 3.5 the Python - score code is not present in this initial zip file. + + * '\*Score.py' + The Python score code file for the model. + * '\*.zip' + The zip archive of the relevant model files. In Viya 3.5 the Python + score code is not present in this initial zip file. Parameters ---------- - model_files : string, Path, or dict + model_files : str, pathlib.Path, or dict Either the directory location of the model files (string or Path object), or a dictionary containing the contents of all the model files. - model_prefix : string + model_prefix : str The variable for the model name that is used when naming model files. - (For example: hmeqClassTree + [Score.py || .pickle]). + (For example: 'hmeqClassTree + [Score.py || .pickle])'. project : str, dict, or RestObj The name or id of the model project, or a dictionary representation of the project. @@ -251,34 +252,36 @@ def import_model( The `DataFrame` object contains the training data, and includes only the predictor columns. The write_score_code function currently supports int(64), float(64), and string data types for scoring. The default value is None. - predict_method : [function -> list, list], optional + predict_method : Callable or list, optional The Python function used for model predictions and the expected output types. The expected output types can be passed as example values or as the value types. For example, if the model is a Scikit-Learn DecisionTreeClassifier, then pass either of the following: - * [sklearn.tree.DecisionTreeClassifier.predict, ["A"]] - * [sklearn.tree.DecisionTreeClassifier.predict_proba, [0.4, float]] + + * [sklearn.tree.DecisionTreeClassifier.predict, ["A"]] + * [sklearn.tree.DecisionTreeClassifier.predict_proba, [0.4, float]] + The default value is None. - score_metrics : string list, optional + score_metrics : list of str, optional The scoring score_metrics for the model. For classification models, it is - assumed that the first value in the list represents the classification + assumed that the first value in the list represents the classification output. This function supports single and multi-class classification models. The default value is None. - pickle_type : string, optional + pickle_type : str, optional Indicator for the package used to serialize the model file to be uploaded to SAS Model Manager. The default value is `pickle`. - project_version : string, optional + project_version : str, optional The project version to import the model in to on SAS Model Manager. The default value is "latest". overwrite_model : bool, optional Set whether models with the same name should be overwritten when attempting to import the model. The default value is False. - score_cas : boolean, optional + score_cas : bool, optional Sets whether models registered to SAS Viya 3.5 should be able to be scored and validated through both CAS and SAS Micro Analytic Service. If set to false, then the model will only be able to be scored and validated through SAS Micro Analytic Service. The default value is True. - missing_values : boolean, optional + missing_values : bool, optional Sets whether data handled by the score code will impute for missing values. The default value is False. mlflow_details : dict, optional @@ -286,8 +289,8 @@ def import_model( read_mlflow_model_file function. The default value is None. predict_threshold : float, optional The prediction threshold for normalized probability score_metrics. Values - are expected to be between 0 and 1. The default value is None. - target_values : list of strings, optional + are expected to be between 0 and 1. The default value is None. + target_values : list of str, optional A list of target values for the target variable. This argument and the score_metrics argument dictate the handling of the predicted values from the prediction method. The order of the target values should reflect the @@ -300,23 +303,17 @@ def import_model( index should match the index of the target outcome in target_values. If target_values are not given, this index should indicate whether the the target probability variable is the first or second variable returned by the model. The default value is 1. - kwargs : dict, optional + **kwargs Other keyword arguments are passed to the following function: - * sasctl.pzmm.ScoreCode.write_score_code(..., - binary_h2o_model=False, - binary_string=None, - model_file_name=None, - mojo_model=False, - statsmodels_model=False, - tf_keras_model=False - ) + :meth:`.ScoreCode.write_score_code` + Returns ------- RestObj JSON response from the POST API call to SAS Model Manager for importing a zipped model - model_files : dict, str, or Path + model_files : dict, str, or pathlib.Path Dictionary representation of all files or the path the model files were generated from. """ diff --git a/src/sasctl/pzmm/mlflow_model.py b/src/sasctl/pzmm/mlflow_model.py index deaf0131..e8ae1089 100644 --- a/src/sasctl/pzmm/mlflow_model.py +++ b/src/sasctl/pzmm/mlflow_model.py @@ -18,17 +18,17 @@ def read_mlflow_model_file(cls, m_path=Path.cwd()): Parameters ---------- - m_path : str or Path object, optional - Directory path of the MLFlow model files. Default is the current working - directory. + m_path : str or pathlib.Path, optional + Directory path of the MLFlow model files. Default is the current working + directory. Returns ------- var_dict : dict Model properties and metadata - inputs_dict : list of dicts + inputs_dict : list of dict Model input variables - outputs_dict : list of dicts + outputs_dict : list of dict Model output variables """ with open(Path(m_path) / "MLmodel", "r") as m_file: diff --git a/src/sasctl/pzmm/model_parameters.py b/src/sasctl/pzmm/model_parameters.py index 39bea0ad..b8a1b9ab 100644 --- a/src/sasctl/pzmm/model_parameters.py +++ b/src/sasctl/pzmm/model_parameters.py @@ -98,12 +98,12 @@ def generate_hyperparameters( Parameters ---------- - model : Python object + model : Any Python object representing the model. model_prefix : str Name used to create model files. (e.g. (model_prefix) + "Hyperparameters.json") - pickle_path : str, Path + pickle_path : str, pathlib.Path Directory location of model files. """ @@ -263,7 +263,7 @@ def add_hyperparameters(cls, model: Union[str, dict, RestObj], **kwargs) -> None ---------- model : str, dict, or RestObj The name or id of the model, or a dictionary representation of the model. - kwargs + **kwargs Named variables pairs representing hyperparameters to be added to the hyperparameter file. """ @@ -320,7 +320,7 @@ def get_project_kpis( Returns ------- - kpi_table_df : pandas DataFrame + kpi_table_df : pandas.DataFrame A pandas DataFrame representing the MM_STD_KPI table. Note that SAS missing values are replaced with pandas-valid missing values. """ diff --git a/src/sasctl/pzmm/pickle_model.py b/src/sasctl/pzmm/pickle_model.py index 2e416d26..ac91e98d 100644 --- a/src/sasctl/pzmm/pickle_model.py +++ b/src/sasctl/pzmm/pickle_model.py @@ -37,17 +37,18 @@ def pickle_trained_model( object. The following files are generated by this function: - * '*.pickle' - Binary pickle file containing a trained model. - * '*.mojo' - Archived H2O.ai MOJO file containing a trained model. + + * '\*.pickle' + Binary pickle file containing a trained model. + * '\*.mojo' + Archived H2O.ai MOJO file containing a trained model. Parameters --------------- - model_prefix : str or Path + model_prefix : str or pathlib.Path Variable name for the model to be displayed in SAS Open Model Manager (i.e. hmeqClassTree + [Score.py || .pickle]). - trained_model : model object + trained_model : Any The trained model to be exported. pickle_path : str, optional File location for the output pickle file. The default value is None. @@ -67,7 +68,7 @@ def pickle_trained_model( Returns ------- - binary_string : binary str + binary_string : str When the is_binary_string flag is set to True, return a binary string representation of the model instead of a pickle or MOJO file. dict diff --git a/src/sasctl/pzmm/write_json_files.py b/src/sasctl/pzmm/write_json_files.py index e13a49ca..42d5e6aa 100644 --- a/src/sasctl/pzmm/write_json_files.py +++ b/src/sasctl/pzmm/write_json_files.py @@ -5,7 +5,7 @@ import ast import importlib import json -import math +# import math #not used import pickle import pickletools import sys @@ -129,7 +129,7 @@ def write_var_json( is_input : bool, optional Boolean flag to check if generating the input or output variable JSON. The default value is True. - json_path : str or Path, optional + json_path : str or pathlib.Path, optional File location for the output JSON file. The default value is None. Returns @@ -172,12 +172,12 @@ def generate_variable_properties( Parameters ---------- - input_data : pandas.Dataframe or pandas.Series + input_data : pandas.DataFrame or pandas.Series Dataset for either the input or output example data for the model. Returns ------- - dict_list : list of dicts + dict_list : list of dict List of dictionaries containing the variable properties. """ # Check if input_data is a Series or DataFrame @@ -316,7 +316,7 @@ def write_model_properties_json( classification model. Providing > 2 target values will supply the values for the different target events as a custom property. An error is raised if only 1 target value is supplied. The default value is None. - json_path : str or Path, optional + json_path : str or pathlib.Path, optional Path for an output ModelProperties.json file to be generated. If no value is supplied a dict is returned instead. The default value is None. model_desc : str, optional @@ -331,7 +331,7 @@ def write_model_properties_json( train_table : str, optional The path to the model's training table within SAS Viya. The default value is an empty string. - properties : List of dict, optional + properties : list of dict, optional List of custom properties to be shown in the user-defined properties section of the model in SAS Model Manager. Dict entries should contain the `name`, `value`, and `type` keys. The default value is an empty list. @@ -484,7 +484,7 @@ def write_file_metadata_json( model_prefix : str The variable for the model name that is used when naming model files. For example: hmeqClassTree + [Score.py | .pickle]. - json_path : str or Path, optional + json_path : str or pathlib.Path, optional Path for an output ModelProperties.json file to be generated. If no value is supplied a dict is returned instead. The default value is None. is_h2o_model : bool, optional @@ -545,29 +545,30 @@ def input_fit_statistics( There are three modes to add fit parameters to the JSON file: - 1. Call the function with additional tuple arguments containing - the name of the parameter, its value, and the partition that it - belongs to. + 1. Call the function with additional tuple arguments containing + the name of the parameter, its value, and the partition that it + belongs to. - 2. Provide line by line user input prompted by the function. + 2. Provide line by line user input prompted by the function. - 3. Import values from a CSV file. Format should contain the above - tuple in each row. + 3. Import values from a CSV file. Format should contain the above + tuple in each row. The following are the base statistical parameters SAS Viya supports: - * RASE = Root Average Squared Error - * NObs = Sum of Frequencies - * GINI = Gini Coefficient - * GAMMA = Gamma - * MCE = Misclassification Rate - * ASE = Average Squared Error - * MCLL = Multi-Class Log Loss - * KS = KS (Youden) - * KSPostCutoff = ROC Separation - * DIV = Divisor for ASE - * TAU = Tau - * KSCut = KS Cutoff - * C = Area Under ROC + + * RASE = Root Average Squared Error + * NObs = Sum of Frequencies + * GINI = Gini Coefficient + * GAMMA = Gamma + * MCE = Misclassification Rate + * ASE = Average Squared Error + * MCLL = Multi-Class Log Loss + * KS = KS (Youden) + * KSPostCutoff = ROC Separation + * DIV = Divisor for ASE + * TAU = Tau + * KSCut = KS Cutoff + * C = Area Under ROC This function outputs a JSON file named "dmcas_fitstat.json". @@ -583,7 +584,7 @@ def input_fit_statistics( data_role). For example, a sample parameter call would be 'NObs', 3488, or 'TRAIN'. Variable data_role is typically either TRAIN, TEST, or VALIDATE or 1, 2, 3 respectively. The default value is None. - json_path : str or Path, optional + json_path : str or pathlib.Path, optional Location for the output JSON file. The default value is None. Returns @@ -688,13 +689,13 @@ def user_input_fitstat(cls, data: List[dict]) -> List[dict]: Parameters ---------- - data : list of dicts + data : list of dict List of dicts for the data values of each parameter. Split into the three valid partitions (TRAIN, TEST, VALIDATE). Returns ------- - list of dicts + list of dict List of dicts with the user provided values inputted. """ while True: @@ -795,37 +796,40 @@ def assess_model_bias( score_table : pandas.DataFrame Data structure containing actual values, predicted or predicted probability values, and sensitive variable values. All columns in the score table must have valid variable names. - sensitive_values : string or list of strings + sensitive_values : str or list of str Sensitive variable name or names in score_table. The variable name must follow SAS naming conventions (no spaces and the name cannot begin with a number or symbol). - actual_values : string + actual_values : str Variable name containing the actual values in score_table. The variable name must follow SAS naming conventions (no spaces and the name cannot begin with a number or symbol). - pred_values : string, required for regression problems, otherwise not used + pred_values : str + Required for regression problems, otherwise not used Variable name containing the predicted values in score_table. The variable name must follow SAS naming conventions (no spaces and the name cannot begin with a number or symbol).Required for regression problems. The default value is None. - prob_values : list of strings, required for classification problems, otherwise not used - A list of variable names containing the predicted probability values in the score table. The first element - should represent the predicted probability of the target class. Required for classification problems. Default - is None. - levels: List of strings, integers, booleans, required for classification problems, otherwise not used + prob_values : list of str + Required for classification problems, otherwise not used + A list of variable names containing the predicted probability values in the score table. The first element + should represent the predicted probability of the target class. Required for classification problems. Default + is None. + levels : list of str, list of int, or list of bool + Required for classification problems, otherwise not used List of classes of a nominal target in the order they were passed in prob_values. Levels must be passed as a string. Default is None. - json_path : str or Path, optional + json_path : str or pathlib.Path, optional Location for the output JSON files. If a path is passed, the json files will populate in the directory and the function will return None, unless return_dataframes is True. Otherwise, the function will return the json - strings in a dictionary (dict["maxDifferences.json"] and dict["groupMetrics.json"]). The default value is - None. + strings in a dictionary (dict["maxDifferences.json"] and dict["groupMetrics.json"]). The default value is + None. cutoff : float, optional Cutoff value for confusion matrix. Default is 0.5. - datarole : string, optional + datarole : str, optional The data being used to assess bias (i.e. 'TEST', 'VALIDATION', etc.). Default is 'TEST.' - return_dataframes : boolean, optional + return_dataframes : bool, optional If true, the function returns the pandas data frames used to create the JSON files and a table for bias metrics. If a JSON path is passed, then the function will return a dictionary that only includes the data frames (dict["maxDifferencesData"], dict["groupMetricData"], and dict["biasMetricsData"]). If a JSON path is - not passed, the function will return a dictionary with the three tables and the two JSON strings + not passed, the function will return a dictionary with the three tables and the two JSON strings (dict["maxDifferences.json"] and dict["groupMetrics.json"]). The default value is False. Returns @@ -837,7 +841,7 @@ def assess_model_bias( Raises ------ RuntimeError - If swat is not installed, this function cannot perform the necessary + If :mod:`swat` is not installed, this function cannot perform the necessary calculations. ValueError @@ -958,16 +962,17 @@ def format_max_differences( ) -> DataFrame: """ Converts a list of max differences DataFrames into a singular DataFrame + Parameters ---------- - maxdiff_dfs: List[DataFrame] + maxdiff_dfs: list of pandas.DataFrame A list of max_differences DataFrames returned by CAS - datarole : string, optional + datarole : str, optional The data being used to assess bias (i.e. 'TEST', 'VALIDATION', etc.). Default is 'TEST.' Returns ------- - DataFrame + pandas.DataFrame A singluar DataFrame containing all max differences data """ maxdiff_df = pd.concat(maxdiff_dfs) @@ -992,24 +997,27 @@ def format_group_metrics( ) -> DataFrame: """ Converts list of group metrics DataFrames to a single DataFrame + Parameters ---------- - groupmetrics_dfs: List[DataFrame] + groupmetrics_dfs: list of pandas.DataFrame List of group metrics DataFrames generated by CASAction - pred_values : string, required for regression problems, otherwise not used + pred_values : str + Required for regression problems, otherwise not used. Variable name containing the predicted values in score_table. The variable name must follow SAS naming conventions (no spaces and the name cannot begin with a number or symbol).Required for regression problems. The default value is None. - prob_values : list of strings, required for classification problems, otherwise not used - A list of variable names containing the predicted probability values in the score table. The first element - should represent the predicted probability of the target class. Required for classification problems. Default - is None. - datarole : string, optional - The data being used to assess bias (i.e. 'TEST', 'VALIDATION', etc.). Default is 'TEST.' + prob_values : list of str + Required for classification problems, otherwise not used + A list of variable names containing the predicted probability values in the score table. The first element + should represent the predicted probability of the target class. Required for classification problems. Default + is None. + datarole : str, optional + The data being used to assess bias (i.e. 'TEST', 'VALIDATION', etc.). Default is 'TEST'. Returns ------- - DataFrame + pandas.DataFrame A singular DataFrame containing formatted data for group metrics """ # adding group metrics dataframes and adding values/ formatting @@ -1067,33 +1075,37 @@ def bias_dataframes_to_json( ): """ Properly formats data from FairAITools CAS Action Set into a JSON readable formats + Parameters ---------- - groupmetrics: DataFrame + groupmetrics: pandas.DataFrame A DataFrame containing the group metrics data - maxdifference: DataFrame + maxdifference: pandas.DataFrame A DataFrame containing the max difference data n_sensitivevariables: int The total number of sensitive values - actual_values : String + actual_values : str Variable name containing the actual values in score_table. The variable name must follow SAS naming conventions (no spaces and the name cannot begin with a number or symbol). - prob_values : list of strings, required for classification problems, otherwise not used - A list of variable names containing the predicted probability values in the score table. The first element - should represent the predicted probability of the target class. Required for classification problems. Default - is None. - levels: List of strings, required for classification problems, otherwise not used - List of classes of a nominal target in the order they were passed in prob_values. Levels must be passed as a + prob_values : list of str + Required for classification problems, otherwise not used + A list of variable names containing the predicted probability values in the score table. The first element + should represent the predicted probability of the target class. Required for classification problems. Default + is None. + levels: list of str + Required for classification problems, otherwise not used + List of classes of a nominal target in the order they were passed in `prob_values`. Levels must be passed as a string. Default is None. - pred_values : string, required for regression problems, otherwise not used - Variable name containing the predicted values in score_table. The variable name must follow SAS naming - conventions (no spaces and the name cannot begin with a number or symbol).Required for regression problems. + pred_values : str + Required for regression problems, otherwise not used + Variable name containing the predicted values in `score_table`. The variable name must follow SAS naming + conventions (no spaces and the name cannot begin with a number or symbol). Required for regression problems. The default value is None. - json_path : str or Path, optional + json_path : str or pathlib.Path, optional Location for the output JSON files. If a path is passed, the json files will populate in the directory and the function will return None, unless return_dataframes is True. Otherwise, the function will return the json - strings in a dictionary (dict["maxDifferences.json"] and dict["groupMetrics.json"]). The default value is - None. + strings in a dictionary (dict["maxDifferences.json"] and dict["groupMetrics.json"]). The default value is + None. Returns ------- @@ -1181,9 +1193,9 @@ def calculate_model_statistics( cls, target_value: Union[str, int, float], prob_value: Union[int, float, None] = None, - validate_data: Union[DataFrame, List[list], Type["numpy.array"]] = None, - train_data: Union[DataFrame, List[list], Type["numpy.array"]] = None, - test_data: Union[DataFrame, List[list], Type["numpy.array"]] = None, + validate_data: Union[DataFrame, List[list], Type["numpy.ndarray"]] = None, + train_data: Union[DataFrame, List[list], Type["numpy.ndarray"]] = None, + test_data: Union[DataFrame, List[list], Type["numpy.ndarray"]] = None, json_path: Union[str, Path, None] = None, target_type: str = "classification", cutoff: Optional[float] = None, @@ -1204,10 +1216,11 @@ def calculate_model_statistics( Datasets can be provided in the following forms, with the assumption that data is ordered as `actual`, `predict`, and `probability` respectively: + * pandas dataframe: the actual and predicted values are their own columns - * numpy array: the actual and predicted values are their own columns or rows and - ordered such that the actual values come first and the predicted second * list: the actual and predicted values are their own indexed entry + * numpy array: the actual and predicted values are their own columns or rows \ + and ordered such that the actual values come first and the predicted second If a json_path is supplied, then this function outputs a set of JSON files named "dmcas_fitstat.json", "dmcas_roc.json", "dmcas_lift.json". @@ -1219,13 +1232,13 @@ def calculate_model_statistics( prob_value : int or float, optional The threshold value for model predictions to indicate an event occurred. The default value is 0.5. - validate_data : pandas.DataFrame, list of list, or numpy array, optional + validate_data : pandas.DataFrame, list of list, or numpy.ndarray, optional Dataset pertaining to the validation data. The default value is None. - train_data : pandas.DataFrame, list of list, or numpy array, optional + train_data : pandas.DataFrame, list of list, or numpy.ndarray, optional Dataset pertaining to the training data. The default value is None. - test_data : pandas.DataFrame, list of list, or numpy array, optional + test_data : pandas.DataFrame, list of list, or numpy.ndarray, optional Dataset pertaining to the test data. The default value is None. - json_path : str or Path, optional + json_path : str or pathlib.Path, optional Location for the output JSON files. The default value is None. target_type: str, optional Type of target the model is trying to find. Currently supports "classification" @@ -1359,20 +1372,20 @@ def calculate_model_statistics( @staticmethod def check_for_data( - validate: Union[DataFrame, List[list], Type["numpy.array"]] = None, - train: Union[DataFrame, List[list], Type["numpy.array"]] = None, - test: Union[DataFrame, List[list], Type["numpy.array"]] = None, + validate: Union[DataFrame, List[list], Type["numpy.ndarray"]] = None, + train: Union[DataFrame, List[list], Type["numpy.ndarray"]] = None, + test: Union[DataFrame, List[list], Type["numpy.ndarray"]] = None, ) -> list: """ Check which datasets were provided and return a list of flags. Parameters ---------- - validate : pandas.DataFrame, list of list, or numpy array, optional + validate : pandas.DataFrame, list of list, or numpy.ndarray, optional Dataset pertaining to the validation data. The default value is None. - train : pandas.DataFrame, list of list, or numpy array, optional + train : pandas.DataFrame, list of list, or numpy.ndarray, optional Dataset pertaining to the training data. The default value is None. - test : pandas.DataFrame, list of list, or numpy array, optional + test : pandas.DataFrame, list of list, or numpy.ndarray, optional Dataset pertaining to the test data. The default value is None. Returns @@ -1400,7 +1413,7 @@ def check_for_data( @staticmethod def stat_dataset_to_dataframe( - data: Union[DataFrame, List[list], Type["numpy.array"]], + data: Union[DataFrame, List[list], Type["numpy.ndarray"]], target_value: Union[str, int, float] = None, target_type: str = "classification", ) -> DataFrame: @@ -1416,7 +1429,7 @@ def stat_dataset_to_dataframe( Parameters ---------- - data : pandas.DataFrame, list of list, or numpy array + data : pandas.DataFrame, list of list, or numpy.ndarray Dataset representing the actual and predicted values of the model. May also include the prediction probabilities. target_value : str, int, or float, optional @@ -1528,7 +1541,7 @@ def read_json_file(path: Union[str, Path]) -> Any: Parameters ---------- - path : str or Path + path : str or pathlib.Path Location of the JSON file to be opened. Returns @@ -1636,9 +1649,9 @@ def create_requirements_json( Parameters ---------- - model_path : str or Path, optional + model_path : str or pathlib.Path, optional The path to a Python project, by default the current working directory. - output_path : str or Path, optional + output_path : str or pathlib.Path, optional The path for the output requirements.json file. The default value is None. Returns @@ -1757,7 +1770,7 @@ def get_code_dependencies( Parameters ---------- - model_path : string or Path, optional + model_path : str or pathlib.Path, optional File location for the output JSON file. The default value is the current working directory. @@ -1784,7 +1797,7 @@ def find_imports(file_path: Union[str, Path]) -> List[str]: Parameters ---------- - file_path : str or Path + file_path : str or pathlib.Path File location for the Python file to be parsed. Returns @@ -1822,13 +1835,13 @@ def get_pickle_file(pickle_folder: Union[str, Path] = Path.cwd()) -> List[Path]: Parameters ---------- - pickle_folder : str or Path + pickle_folder : str or pathlib.Path File location for the input pickle file. The default value is the current working directory. Returns ------- - list of Path + list of pathlib.Path A list of pickle files. """ return [ @@ -1844,7 +1857,7 @@ def get_pickle_dependencies(cls, pickle_file: Union[str, Path]) -> List[str]: Parameters ---------- - pickle_file : str or Path + pickle_file : str or pathlib.Path The file where you stored pickle data. Returns @@ -1871,9 +1884,10 @@ def get_package_names(stream: Union[bytes, str]) -> List[str]: This code has been adapted from the following stackoverflow example and utilizes the pickletools package. + Credit: modified from - https://stackoverflow.com/questions/64850179/inspecting-a-pickle-dump-for - -dependencies + https://stackoverflow.com/questions/64850179/inspecting-a-pickle-dump-for-dependencies + More information here: https://github.com/python/cpython/blob/main/Lib/pickletools.py @@ -1884,7 +1898,7 @@ def get_package_names(stream: Union[bytes, str]) -> List[str]: Returns ------- - List of str + list of str List of package names found as module dependencies in the pickle file. """ # Collect opcodes, arguments, and position values from the pickle stream diff --git a/src/sasctl/pzmm/write_score_code.py b/src/sasctl/pzmm/write_score_code.py index 5fffc231..778d14e2 100644 --- a/src/sasctl/pzmm/write_score_code.py +++ b/src/sasctl/pzmm/write_score_code.py @@ -49,14 +49,14 @@ def write_score_code( The following files are generated by this function if score_code_path: - - '*_score.py' + - '\*_score.py' - The Python score code file for the model. - 'dcmas_epscorecode.sas' (for SAS Viya 3.5 models) - - Python score code wrapped in DS2 and prepared for CAS scoring or - publishing. + - Python score code wrapped in DS2 and prepared for CAS scoring or\ + publishing. - 'dmcas_packagescorecode.sas' (for SAS Viya 3.5 models) - - Python score code wrapped in DS2 and prepared for SAS Microanalytic Score - scoring or publishing. + - Python score code wrapped in DS2 and prepared for SAS Microanalytic \ + Score scoring or publishing. The function determines the type of model based on the following arguments: output_variables, target_values, predict_threshold. As an example, consider the @@ -65,16 +65,18 @@ def write_score_code( For a binary classification model, where the model is determining if a flower is or is not the `setosa` species, the following can be passed: - - score_metrics = ["Setosa"] or ["Setosa", "Setosa_Proba"], - - target_values = ["1", "0"], - - predict_threshold = ["0.4"] + + - score_metrics = ["Setosa"] or ["Setosa", "Setosa_Proba"], + - target_values = ["1", "0"], + - predict_threshold = ["0.4"] For a multi-classification model, where the model is determining if a flower is one of three species, the following can be passed: - - score_metrics = ["Species"] or ["Species", "Setosa_Proba", - "Versicolor_Proba", "Virginica_Proba"] - - target_values = ["Setosa", "Versicolor", "Virginica"] - - predict_threshold = None + + - score_metrics = ["Species"] or ["Species", "Setosa_Proba",\ + "Versicolor_Proba", "Virginica_Proba"] + - target_values = ["Setosa", "Versicolor", "Virginica"] + - predict_threshold = None Disclaimer: The score code that is generated is designed to be a working template for any Python model, but is not guaranteed to work out of the box for @@ -85,18 +87,20 @@ def write_score_code( model_prefix : str The variable for the model name that is used when naming model files. (For example: hmeqClassTree + [Score.py || .pickle]). - input_data : DataFrame or list of dict - The `DataFrame` object contains the training data, and includes only the + input_data : pandas.DataFrame or list of dict + The :class:`pandas.DataFrame` object contains the training data, and includes only the predictor columns. The write_score_code function currently supports int(64), float(64), and string data types for scoring. Providing a list of dict objects signals that the model files are being created from an MLFlow model. - predict_method : [function -> list, list] + predict_method : Callable or list of Any The Python function used for model predictions and the expected output types. The expected output types can be passed as example values or as the value types. For example, if the model is a Scikit-Learn DecisionTreeClassifier, then pass either of the following: - * [sklearn.tree.DecisionTreeClassifier.predict, ["A"]] - * [sklearn.tree.DecisionTreeClassifier.predict_proba, [0.4, float]] + + * [sklearn.tree.DecisionTreeClassifier.predict, ["A"]] + * [sklearn.tree.DecisionTreeClassifier.predict_proba, [0.4, float]] + target_variable : str, optional Target variable to be predicted by the model. The default value is None. target_values : list of str, optional @@ -127,7 +131,7 @@ def write_score_code( and validated through both CAS and SAS Micro Analytic Service. If set to false, then the model will only be able to be scored and validated through SAS Micro Analytic Service. The default value is True. - score_code_path : str or Path, optional + score_code_path : str or pathlib.Path, optional Path for output score code file(s) to be generated. If no value is supplied a dict is returned instead. The default value is None. target_index : int, optional @@ -135,7 +139,7 @@ def write_score_code( index should match the index of the target outcome in target_values. If target_values are not given, this index should indicate whether the the target probability variable is the first or second variable returned by the model. The default value is 1. - kwargs + **kwargs Other keyword arguments are passed to one of the following functions: * sasctl.pzmm.ScoreCode._write_imports(pickle_type, mojo_model=None, binary_h2o_model=None, binary_string=None) @@ -325,7 +329,7 @@ def upload_and_copy_score_resources( ---------- model : str, dict, or RestObj The name or id of the model, or a dictionary representation of the model. - files : list of file objects + files : list of Any The list of score resource files to upload. Returns @@ -1106,7 +1110,7 @@ def _predictions_to_metrics( A list of target values for the target variable. The default value is None. predict_threshold : float, optional The prediction threshold for normalized probability score_metrics. Values - are expected to be between 0 and 1. The default value is None. + are expected to be between 0 and 1. The default value is None. h2o_model : bool, optional Flag to indicate that the model is an H2O.ai model. The default value is False. diff --git a/src/sasctl/pzmm/zip_model.py b/src/sasctl/pzmm/zip_model.py index 0a51ed56..47beee68 100644 --- a/src/sasctl/pzmm/zip_model.py +++ b/src/sasctl/pzmm/zip_model.py @@ -1,6 +1,6 @@ # Copyright (c) 2020, SAS Institute Inc., Cary, NC, USA. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 -import io +from io import BytesIO import zipfile from pathlib import Path from typing import Optional, Union @@ -15,7 +15,7 @@ def _filter_files(file_dir: Union[str, Path], is_viya4: Optional[bool] = False) Parameters ---------- file_dir : str or Path - Location of *.json, *.pickle, *.mojo, and *Score.py files. + Location of \*.json, \*.pickle, \*.mojo, and \*Score.py files. is_viya4 : bool, optional Boolean to indicate difference in logic between SAS Viya 3.5 and SAS Viya 4. For Viya 3.5 models, ignore score code that is already in place in the file @@ -47,7 +47,7 @@ def zip_files( model_files: Union[dict, str, Path], model_prefix: str, is_viya4: Optional[bool] = False, - ) -> io.BytesIO: + ) -> BytesIO: """ Combines all JSON files with the model pickle file and associated score code file into a single archive ZIP file. @@ -58,7 +58,7 @@ def zip_files( Parameters ---------- - model_files : str, Path, or dict + model_files : str, pathlib.Path, or dict Either the directory location of the model files (string or Path object), or a dictionary containing the contents of all the model files. model_prefix : str @@ -93,4 +93,4 @@ def zip_files( with open( str(Path(model_files) / (model_prefix + ".zip")), "rb" ) as zip_file: - return io.BytesIO(zip_file.read()) + return BytesIO(zip_file.read()) diff --git a/src/sasctl/tasks.py b/src/sasctl/tasks.py index 91a64f1d..5324a06b 100644 --- a/src/sasctl/tasks.py +++ b/src/sasctl/tasks.py @@ -357,10 +357,10 @@ def register_model( Parameters ---------- - model : swat.CASTable or sklearn.BaseEstimator - The model to register. If an instance of ``swat.CASTable`` the table - is assumed to hold an ASTORE, which will be downloaded and used to - construct the model to register. If a scikit-learn estimator, the + model : swat.cas.table.CASTable or sklearn.base.BaseEstimator + The model to register. If an instance of :class:`swat.cas.table.CASTable` + the table is assumed to hold an ASTORE, which will be downloaded and used + to construct the model to register. If a scikit-learn estimator, the model will be pickled and uploaded to the registry and score code will be generated for publishing the model to MAS. name : str @@ -371,14 +371,15 @@ def register_model( repository : str or dict, optional The name or id of the repository, or a dictionary representation of the repository. If omitted, the default repository will be used. - X : DataFrame, type, list of type, or dict of str: type, optional + X : pandas.DataFrame, type, list of type, or dict of str: type, optional The expected type for each input value of the target function. Can be omitted if target function includes type hints. If a DataFrame is provided, the columns will be inspected to determine type information. If a single type is provided, all columns will be assumed to be that type, otherwise a list of column types or a dictionary of column_name: type may be provided. - version : {'new', 'latest', int}, optional + version : str or int, optional + If str choose from ``{'new', 'latest'}``. Version number of the project in which the model should be created. Defaults to 'new'. files : list @@ -395,13 +396,13 @@ def register_model( modeler : str, optional The name of the user who created the model. Will default ot the current user if not specified. - input : DataFrame, type, list of type, or dict of str: type, optional + input : pandas.DataFrame, type, list of type, or dict of str: type, optional Deprecated, use `X` instead. Returns ------- model : RestObj - The newly registered model as an instance of ``RestObj`` + The newly registered model as an instance of :class:`.core.RestObj` Notes ----- @@ -550,7 +551,7 @@ def publish_model( replace : bool, optional Whether to overwrite the model if it already exists in the `destination` - kwargs : optional + **kwargs additional arguments will be passed to the underlying publish functions. @@ -670,7 +671,7 @@ def update_model_performance(data, model, label, refresh=True): Parameters ---------- - data : Dataframe + data : pandas.DataFrame model : str or dict The name or id of the model, or a dictionary representation of the model. @@ -683,12 +684,12 @@ def update_model_performance(data, model, label, refresh=True): Returns ------- - CASTable + swat.cas.table.CASTable The CAS table containing the performance data. See Also -------- - :meth:`model_management.create_performance_definition <.ModelManagement.create_performance_definition>` + :meth:`model_management.create_performance_definition <.ModelManagement.create_performance_definition>` .. versionadded:: v1.3 @@ -885,7 +886,7 @@ def get_project_kpis( Column value to be filtered, by default None Returns ------- - kpiTableDf : DataFrame + kpiTableDf : pandas.DataFrame A pandas DataFrame representing the MM_STD_KPI table. Note that SAS missing values are replaced with pandas valid missing values. """ diff --git a/src/sasctl/utils/astore.py b/src/sasctl/utils/astore.py index 0d338015..91a0de09 100644 --- a/src/sasctl/utils/astore.py +++ b/src/sasctl/utils/astore.py @@ -23,9 +23,9 @@ def create_package(table, input=None): Parameters ---------- - table : swat.CASTable + table : swat.cas.table.CASTable The CAS table containing an ASTORE or score code. - input : DataFrame, type, list of type, or dict of str: type, optional + input : pandas.DataFrame, type, list of type, or dict of str, optional The expected type for each input value of the target function. Can be omitted if target function includes type hints. If a DataFrame is provided, the columns will be inspected to determine type information. @@ -40,7 +40,7 @@ def create_package(table, input=None): See Also -------- - model_repository.import_model_from_zip + ~.model_repository.ModelRepository.import_model_from_zip """ if swat is None: @@ -63,9 +63,9 @@ def create_package_from_datastep(table, input=None): Parameters ---------- - table : swat.CASTable + table : swat.cas.table.CASTable The CAS table containing the score code. - input : DataFrame, type, list of type, or dict of str: type, optional + input : pandas.DataFrame, type, list of type, or dict of str, optional The expected type for each input value of the target function. Can be omitted if target function includes type hints. If a DataFrame is provided, the columns will be inspected to determine type information. @@ -80,7 +80,7 @@ def create_package_from_datastep(table, input=None): See Also -------- - model_repository.import_model_from_zip + ~.model_repository.ModelRepository.import_model_from_zip """ dscode = table.to_frame().loc[0, "DataStepSrc"] @@ -151,7 +151,7 @@ def create_package_from_astore(table): Parameters ---------- - table : swat.CASTable + table : swat.cas.table.CASTable The CAS table containing the ASTORE. Returns @@ -161,7 +161,7 @@ def create_package_from_astore(table): See Also -------- - model_repository.import_model_from_zip + ~.model_repository.ModelRepository.import_model_from_zip """ files = create_files_from_astore(table) @@ -174,7 +174,7 @@ def create_files_from_astore(table): Parameters ---------- - table : swat.CASTable + table : swat.cas.table.CASTable The CAS table containing the ASTORE. Returns diff --git a/src/sasctl/utils/pymas/core.py b/src/sasctl/utils/pymas/core.py index 04f3c00c..f3908097 100644 --- a/src/sasctl/utils/pymas/core.py +++ b/src/sasctl/utils/pymas/core.py @@ -35,7 +35,7 @@ def build_wrapper_function( Parameters ---------- - func : function or str + func : Callable or str Function name or an instance of Function which will be wrapped variables : list of DS2Variable array_input : bool @@ -43,7 +43,7 @@ def build_wrapper_function( passing to `func` name : str, optional Name of the generated wrapper function. Defaults to 'wrapper'. - setup : iterable + setup : Iterable Python source code lines to be executed during package setup return_msg : bool, optional Deprecated. @@ -53,7 +53,7 @@ def build_wrapper_function( Returns ------- str - the Python definition for the wrapper function. + The Python definition for the wrapper function. Notes ----- @@ -164,13 +164,13 @@ def wrap_predict_method(func, variables, **kwargs): Parameters ---------- - func : function or str + func : Callable or str Function name or an instance of Function which will be wrapped. Assumed to behave as `.predict()` methods. variables : list of DS2Variable Input and output variables for the function - kwargs : any - Will be passed to `build_wrapper_function`. + **kwargs + Will be passed to :meth:`build_wrapper_function`. Returns ------- @@ -194,12 +194,12 @@ def wrap_predict_proba_method(func, variables, **kwargs): Parameters ---------- - func : function or str + func : Callable or str Function name or an instance of Function which will be wrapped. Assumed to behave as `.predict_proba()` methods. variables : list of DS2Variable Input and output variables for the function - kwargs : any + **kwargs Will be passed to `build_wrapper_function`. Returns @@ -232,7 +232,7 @@ def from_inline( Parameters ---------- - func : function + func : Callable A Python function object to be used input_types : list of type, optional The expected type for each input value of `func`. Can be ommitted if @@ -346,13 +346,13 @@ def from_pickle( Parameters ---------- - file : str or bytes or file_like + file : str or bytes or io.BytesIO Pickled object to use. String is assumed to be a path to a picked file, file_like is assumed to be an open file handle to a pickle object, and bytes is assumed to be the raw pickled bytes. func_name : str Name of the target function to call - input_types : DataFrame, type, list of type, or dict of str: type, optional + input_types : pandas.DataFrame, type, list of type, or dict of str, optional The expected type for each input value of the target function. Can be omitted if target function includes type hints. If a DataFrame is provided, the columns will be inspected to determine type information. @@ -522,7 +522,7 @@ class PyMAS: return_msg : bool Deprecated. Whether the DS2-generated return message should be included. - kwargs : any + **kwargs Passed to :func:`build_wrapper_function`. """ @@ -628,7 +628,8 @@ def score_code(self, input_table=None, output_table=None, columns=None, dest="MA The name of the table where execution results will be written columns : list of str Names of the columns from `table` that will be passed to `func` - dest : str {'MAS', 'EP', 'CAS', 'Python'} + dest : str + Choose from ``{'MAS', 'EP', 'CAS', 'Python'}``. Specifies the publishing destination for the score code to ensure that compatible code is generated. diff --git a/src/sasctl/utils/pymas/ds2.py b/src/sasctl/utils/pymas/ds2.py index 4c95c01f..d143a65f 100644 --- a/src/sasctl/utils/pymas/ds2.py +++ b/src/sasctl/utils/pymas/ds2.py @@ -157,7 +157,7 @@ def add_method(self, name, target, variables): Name of the DS2 method to create. target : str Name of the Python method to call - variables : list of :class:`DS2Variable` + variables : list of DS2Variable List of input and output variables for the method. Returns @@ -617,8 +617,9 @@ def pymas_statement(self, python_var_name=None): python_var_name : str Python variable name. - Returns - ------- + Raises + ------ + ValueError """ diff --git a/src/sasctl/utils/pymas/python.py b/src/sasctl/utils/pymas/python.py index 5cfe619c..fc4be11b 100644 --- a/src/sasctl/utils/pymas/python.py +++ b/src/sasctl/utils/pymas/python.py @@ -21,7 +21,7 @@ def ds2_variables(input, output_vars=False, names=None): Parameters ---------- - input : function or OrderedDict or Pandas DataFrame or Numpy or OrderedDict + input : Callable or OrderedDict[str, tuple] or OrderedDict[str, type] or pandas.DataFrame a function or mapping parameter names to (type, is_output) output_vars : bool @@ -34,7 +34,7 @@ def ds2_variables(input, output_vars=False, names=None): Returns ------- - list + list of DS2Variable Examples -------- @@ -104,7 +104,7 @@ def parse_type_hints(func, skip_var="self"): Parameters ---------- - func : function + func : Callable the object to inspect for parameters skip_var : str diff --git a/tox.ini b/tox.ini index f76566b6..f776b4a5 100644 --- a/tox.ini +++ b/tox.ini @@ -58,8 +58,7 @@ deps = # tests: lightgbm ; platform_system != "Darwin" # lightgmb seems to have build issues on MacOS # doc skips install, so explicitly add minimum packages doc: sphinx - doc: numpydoc - doc: pydata-sphinx-theme + doc: pydata_sphinx_theme doc: pyyaml setenv = @@ -75,7 +74,7 @@ passenv = commands = clean: coverage erase - unit: {posargs:pytest -rsx --cov={envsitepackagesdir}/sasctl --cov-report=xml:./.reports/unit.xml --cov-append tests/unit/} + unit: {posargs:pytest --cov={envsitepackagesdir}/sasctl --cov-report=xml:./.reports/unit.xml --cov-append tests/unit/} integration: {posargs:pytest --cov={envsitepackagesdir}/sasctl --cov-report=xml:./.reports/integration.xml --cov-append tests/integration/} # Uncomment when tests are working again for scenarios # scenarios: {posargs:pytest --cov={envsitepackagesdir}/sasctl --cov-report=xml:./.reports/scenarios.xml --cov-append tests/scenarios/}