feat: Jira-1883- Implementing Generate Report Functionality (#37)

Co-authored-by: Jonathan Yu <[email protected]>
provena · Dec 20, 2024 · 9f6207c · 9f6207c
1 parent 5b29c3e
commit 9f6207c
Show file tree

Hide file tree

Showing 7 changed files with 226 additions and 21 deletions.
diff --git a/docs/example-client-workflow.ipynb b/docs/example-client-workflow.ipynb
@@ -632,6 +632,46 @@
     "print(\"Current job status:\", job_result.status) "
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Generating Report Functionality - Allows You To Generate Report (Word Document, .docx) For Study Close Out Reports from a Model Run Or Study. \n",
+    "\n",
+    "There are two potential approaches, you can take to generate the report: \n",
+    "\n",
+    " - 1- Default Path: If you don't provide a custom ```file_path``` parameter, it will store the generated word file in your relative directory (This is the directory where you are running the code from). \n",
+    "\n",
+    " - 2- Custom Path: If you provide a custom ```file_path``` parameter, even if the file path/directory does not exist, it will be automatically made and your file will be saved inside that directory. \n",
+    "\n",
+    "   If you provide a file path, and the file path already exists your will be saved inside that existing directory as well. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from ProvenaInterfaces.ProvenanceAPI import GenerateReportRequest\n",
+    "from ProvenaInterfaces.RegistryModels import ItemSubType\n",
+    "\n",
+    "\n",
+    "# Generate's report document in your relative directory.\n",
+    "await client.prov_api.generate_report(report_request = GenerateReportRequest(\n",
+    "        id = \"10378.1/1968661\", \n",
+    "        item_subtype=ItemSubType.STUDY,\n",
+    "        depth=1\n",
+    "    ))\n",
+    "\n",
+    "# Generate's report document in a specified directory\n",
+    "await client.prov_api.generate_report(report_request = GenerateReportRequest(\n",
+    "        id = \"10378.1/1968661\", \n",
+    "        item_subtype=ItemSubType.STUDY,\n",
+    "        depth=1\n",
+    "    ), file_path=\"./idontexistpath/butinhere/\")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},

diff --git a/src/provenaclient/clients/client_helpers.py b/src/provenaclient/clients/client_helpers.py
@@ -462,6 +462,53 @@ async def validated_get_request(client: ClientService, params: Optional[Mapping[
         raise Exception(
             f"{error_message} Exception: {e}") from e
 
+async def validated_post_request(
+    client: ClientService,
+    params: Optional[Mapping[str, Optional[ParamTypes]]],
+    json_body: Optional[JsonData],
+    url: str,
+    error_message: str, 
+    headers: Optional[Dict[str,Any]] = None
+) -> Response: 
+
+    """
+    A generic POST request method for endpoints where the response
+    does not parse into a Pydantic model. This method handles cases
+    like file downloads or when raw data is expected.
+
+    Args:
+        client (ClientService): The client being used. Relies on client interface.
+        params (Optional[Mapping[str, Optional[ParamTypes]]]): The params if any.
+        json_body (Optional[JsonData]): JSON data to send with the request, if any.
+        url (str): The URL to make the POST request to.
+        error_message (str): The error message to embed in other exceptions.
+        headers: The headers to include in hte POST request, if any.
+
+    Raises:
+        e: Exception depending on the error.
+
+    Returns:
+        Response: The raw HTTP response object for further processing.
+    """
+
+    # Prepare and setup the API request.
+    get_auth = client._auth.get_auth  # Get bearer auth
+    filtered_params = build_params_exclude_none(params if params else {})
+
+    try:
+        response = await HttpClient.make_post_request(url=url, data=json_body, params=filtered_params, auth=get_auth(), headers = headers)
+
+        handle_err_codes(
+            response=response,
+            error_message=error_message
+        )
+        return response
+
+    except BaseException as e:
+        raise e
+    except Exception as e:
+        raise Exception(
+            f"{error_message} Exception: {e}") from e
 
 async def parsed_post_request_none_return(client: ClientService, params: Optional[Mapping[str, Optional[ParamTypes]]], json_body: Optional[JsonData], url: str, error_message: str) -> None:
     """
@@ -505,7 +552,7 @@ async def parsed_post_request_none_return(client: ClientService, params: Optiona
     except Exception as e:
         raise Exception(
             f"{error_message} Exception: {e}") from e
-    
+
 
 async def parsed_delete_request_non_return(client: ClientService, params: Optional[Mapping[str, Optional[ParamTypes]]], url: str, error_message: str) -> None:
     """

diff --git a/src/provenaclient/clients/prov_client.py b/src/provenaclient/clients/prov_client.py
@@ -21,7 +21,7 @@
 from provenaclient.utils.helpers import *
 from provenaclient.clients.client_helpers import *
 from provenaclient.models.general import HealthCheckResponse
-from ProvenaInterfaces.ProvenanceAPI import LineageResponse, ModelRunRecord, RegisterModelRunResponse, RegisterBatchModelRunRequest, RegisterBatchModelRunResponse, ConvertModelRunsResponse, PostUpdateModelRunResponse, PostUpdateModelRunInput
+from ProvenaInterfaces.ProvenanceAPI import LineageResponse, ModelRunRecord, RegisterModelRunResponse, RegisterBatchModelRunRequest, RegisterBatchModelRunResponse, ConvertModelRunsResponse, PostUpdateModelRunResponse, PostUpdateModelRunInput, GenerateReportRequest
 from ProvenaInterfaces.RegistryAPI import ItemModelRun
 
 
@@ -32,6 +32,7 @@ class ProvAPIEndpoints(str, Enum):
     POST_MODEL_RUN_REGISTER = "/model_run/register"
     POST_MODEL_RUN_UPDATE = "/model_run/update"
     POST_MODEL_RUN_REGISTER_BATCH = "/model_run/register_batch"
+    POST_GENERATE_REPORT = "/explore/generate/report"
     GET_EXPLORE_UPSTREAM = "/explore/upstream"
     GET_EXPLORE_DOWNSTREAM = "/explore/downstream"
     GET_EXPLORE_SPECIAL_CONTRIBUTING_DATASETS = "/explore/special/contributing_datasets"
@@ -539,3 +540,45 @@ async def regenerate_csv_from_model_run_batch(self, batch_id: str) -> str:
         )
 
         return response.text
+
+    async def generate_report(self, report_request: GenerateReportRequest) -> ByteString:
+        """Generates a provenance report from a Study or Model Run Entity containing the
+        associated inputs, model runs and outputs involved. 
+        
+        The report is generated in `.docx` format by making a POST request to the API.
+
+        Parameters
+        ----------
+        report_request : GenerateReportRequest
+            The request object containing the parameters for generating the report, including the `id`, 
+            `item_subtype`, and `depth`.
+
+        Returns
+        -------
+        ByteString
+            The raw byte content of the generated `.docx` file. The type of the returned content will be either 
+            `bytes` or `bytearray`, which can be directly saved to a file.
+        
+        Raises
+        ------
+        AssertionError
+            If the response content is not found or is not in the expected `bytes` or `bytearray` format.
+        """
+
+        response = await validated_post_request(
+            client=self, 
+            url=self._build_endpoint(ProvAPIEndpoints.POST_GENERATE_REPORT), 
+            error_message=f"Something has gone wrong during report generation for node with id {report_request.id}", 
+            json_body=py_to_dict(report_request),
+            params=None, 
+            headers = {
+                "Content-Type": "application/json",  # Indicates the body is JSON
+                "Accept": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",  # Indicates the response type
+            }
+        )
+
+        # Validate that byte content is present, before returning to the user. 
+        assert response.content, f"Failed to generate report for node with id {report_request.id} - Response content not found!"
+        assert isinstance(response.content, (bytes, bytearray)), "Unexpected content type from server. Expected bytes or bytearray!"
+
+        return response.content
diff --git a/src/provenaclient/models/general.py b/src/provenaclient/models/general.py
@@ -13,9 +13,10 @@
 '''
 
 from typing import Any, Dict, Optional, Type, TypedDict, List
-from pydantic import BaseModel, ValidationError, validator
+from pydantic import BaseModel, Field,  ValidationError, validator
+from ProvenaInterfaces.RegistryAPI import ItemSubType, Node
 from ProvenaInterfaces.ProvenanceAPI import LineageResponse
-from ProvenaInterfaces.RegistryAPI import Node
+
 
 class HealthCheckResponse(BaseModel):
     message: str
@@ -61,4 +62,4 @@ class CustomLineageResponse(LineageResponse):
     """
 
     graph: Optional[CustomGraph] #type:ignore
-
+
diff --git a/src/provenaclient/modules/prov.py b/src/provenaclient/modules/prov.py
@@ -2,14 +2,17 @@
 Created Date: Monday June 17th 2024 +1000
 Author: Peter Baker
 -----
-Last Modified: Monday June 17th 2024 4:45:39 pm +1000
-Modified By: Peter Baker
+Last Modified: Friday November 29th 2024 4:21:39 pm +1000
+Modified By: Parth Kulkarni
 -----
 Description: Provenance API L3 module. Includes the ProvAPI sub module. Contains IO helper functions for writing/reading files.
 -----
 HISTORY:
 Date      	By	Comments
 ----------	---	---------------------------------------------------------
+
+29-11-2024 | Parth Kulkarni | Added generate-report functionality. 
+
 '''
 
 from provenaclient.auth.manager import AuthManager
@@ -20,15 +23,15 @@
 from provenaclient.utils.helpers import read_file_helper, write_file_helper, get_and_validate_file_path
 from typing import List
 from provenaclient.models.general import CustomLineageResponse, HealthCheckResponse
-from ProvenaInterfaces.ProvenanceAPI import ModelRunRecord, ConvertModelRunsResponse, RegisterModelRunResponse, RegisterBatchModelRunRequest, RegisterBatchModelRunResponse, PostUpdateModelRunResponse
+from ProvenaInterfaces.ProvenanceAPI import LineageResponse, ModelRunRecord, ConvertModelRunsResponse, RegisterModelRunResponse, RegisterBatchModelRunRequest, RegisterBatchModelRunResponse, PostUpdateModelRunResponse, GenerateReportRequest
 from ProvenaInterfaces.RegistryAPI import ItemModelRun
 from ProvenaInterfaces.SharedTypes import StatusResponse
 
 # L3 interface.
 
 PROV_API_DEFAULT_SEARCH_DEPTH = 3
 DEFAULT_CONFIG_FILE_NAME = "prov-api.env"
-
+DEFAULT_RELATIVE_FILE_PATH = "./"
 
 class ProvAPIAdminSubModule(ModuleService):
     _prov_api_client: ProvClient
@@ -506,3 +509,33 @@ async def regenerate_csv_from_model_run_batch(self, batch_id: str, file_path: Op
             write_file_helper(file_path=file_path, content=csv_text)
 
         return csv_text
+
+    async def generate_report(self, report_request:GenerateReportRequest, file_path: str = DEFAULT_RELATIVE_FILE_PATH) -> None:
+        """Generates a provenance report from a Study or Model Run Entity containing the
+        associated inputs, model runs and outputs involved. 
+        
+        The report is generated in `.docx` and saved at relative directory level.
+
+        Parameters
+        ----------
+        report_request : GenerateReportRequest
+            The request object containing the parameters for generating the report, including the `id`, 
+            `item_subtype`, and `depth`.
+        """
+        # Calls API endpoint to generate report document.
+        generated_word_file = await self._prov_api_client.generate_report(
+            report_request=report_request
+        )
+
+        # Sanitize the id to avoid file system errors
+        sanitized_filename = report_request.id.replace("/", "_") + " - Study Close Out Report.docx"
+
+        # Append file path and file-name together 
+        file_path = file_path + sanitized_filename
+
+        # Writes content into word docx file.
+        write_file_helper(file_path=file_path,content = generated_word_file)
+
+
+
+
diff --git a/src/provenaclient/utils/helpers.py b/src/provenaclient/utils/helpers.py
@@ -134,16 +134,18 @@ def validate_existing_path(file_path: str) -> None :
         raise Exception(f"Path validation failed. Exception {e}")
 
 
-def write_file_helper(file_path: str, content: str) -> None:
+def write_file_helper(file_path: str, content: Union[str, bytes]) -> None:
     """
-    Writes provided content to a file.
+    Writes the provided content (string or bytes) to a file at the specified file path.
 
     Parameters
     ----------
     file_name : str
         The name of the file to write content into.
-    content : str
-        The content to be written into the file.
+    content : Union[str, bytes]
+        The content to be written to the file. It can be either:
+            - A `str`, which will be written in text mode.
+            - A `bytes` object, which will be written in binary mode.
 
     Raises
     ------
@@ -152,11 +154,22 @@ def write_file_helper(file_path: str, content: str) -> None:
     Exception
         For non-I/O related exceptions that may occur during file writing.
     """
-
+    
     try:
-        # Write to file
-        with open(file_path, 'w') as file:
-            file.write(content)
+
+        if not os.path.exists(path=file_path): 
+            # Make the path, before writing to a file. 
+            os.makedirs(os.path.dirname(file_path), exist_ok=True)
+
+        if isinstance(content, str):
+            # Write to file
+            with open(file_path, 'w') as file:
+                file.write(content)
+
+        if isinstance(content, bytes):
+            # Write to file in byte mode
+            with open(file_path, 'wb') as file: 
+                file.write(content)
 
     except IOError as e:
         raise IOError(f"Failed to file {file_path} due to I/O error: {e}")
@@ -193,7 +206,7 @@ def read_file_helper(file_path: str) -> str:
             return file_content
 
     except Exception as e:
-        raise Exception(f"Error with file. Exception {e}")
+        raise Exception(f"Error with file. Exception {e}") 
 
 def build_params_exclude_none(params: Mapping[str, Optional[ParamTypes]]) -> Dict[str, ParamTypes]:
     """

diff --git a/tests/adhoc.py b/tests/adhoc.py
@@ -11,11 +11,26 @@
 import os
 import random
 
+from provenaclient.utils.config import APIOverrides
+
 
 async def main() -> None:
+
+    api_overrides = APIOverrides(
+        datastore_api_endpoint_override="https://f1835-data-api.dev.rrap-is.com",
+        registry_api_endpoint_override="https://f1835-registry-api.dev.rrap-is.com",
+        prov_api_endpoint_override="https://f1835-prov-api.dev.rrap-is.com",
+        search_api_endpoint_override="https://f1835-search-api.dev.rrap-is.com",
+        search_service_endpoint_override="https://f1835-search.dev.rrap-is.com",
+        handle_service_api_endpoint_override="https://f1835-handle.dev.rrap-is.com",
+        jobs_service_api_endpoint_override="https://f1835-job-api.dev.rrap-is.com",
+    )
+
+
     config = Config(
         domain="dev.rrap-is.com",
-        realm_name="rrap"
+        realm_name="rrap", 
+        api_overrides= api_overrides
     )
 
     auth = DeviceFlow(config=config,
@@ -278,8 +293,21 @@ def random_num() -> int: return random.randint(100, 1000)
     # my_dataset = await client.datastore.interactive_dataset(dataset_id="10378.1/1948400")
     # await my_dataset.download_all_files(destination_directory="./")
 
-   """
-
+    """
+
+    await client.prov_api.generate_report(report_request = GenerateReportRequest(
+        id = "10378.1/1968661", 
+        item_subtype=ItemSubType.STUDY,
+        depth=1
+    ))
+
+    await client.prov_api.generate_report(report_request = GenerateReportRequest(
+        id = "10378.1/1968661", 
+        item_subtype=ItemSubType.STUDY,
+        depth=1
+    ), file_path="./idontexistpath/butinhere/")
+
+
     response = await client.prov_api.explore_upstream(
         starting_id="10378.1/1965416", 
         depth=2