Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

129 export resource uuids #148

Merged
merged 9 commits into from
Mar 4, 2024
Merged
9 changes: 9 additions & 0 deletions importer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,12 @@ The coverage report `coverage.html` will be at the working directory
- The script will check to see if every user has a keycloak uuid that has a Practitioner uuid that matches the one provided in the csv file
- Note that if none of the Practitioner uuids match then all will be deleted
- Set `cascade_delete` to True or False if you would like to automatically delete any linked resources. If you set it to False, and there are any linked resources, then the resources will NOT be deleted

## 10. Export resources from API endpoint to CSV file
- Run `python3 main.py --export_resources True --parameter _lastUpdated --value gt2023-08-01 --limit 20 --resource_type Location --log_level info`
- `export_resources` can either be True or False, checks if it is True and exports the resources
- The `parameter` is used as a filter for the resources. The set default parameter is "_lastUpdated", other examples include, "name"
- The `value` is where you pass the actual parameter value to filter the resources. The set default value is "gt2023-01-01", other examples include, "Good Health Clinic 1"
- The `limit` is the number of resources exported at a time
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's add the default here as well

- Specify the `resource_type` you want to export, different resource_types are exported to different csv_files
- The csv_file containing the exported resources is labelled using the current time, to know when the resources were exported for example, csv/2024-02-21-12-21-export_Location.csv
127 changes: 120 additions & 7 deletions importer/main.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import csv
import json
import uuid
Expand Down Expand Up @@ -117,7 +118,7 @@ def handle_request(request_type, payload, url):
# and sets the user password
def create_user(user):
(firstName, lastName, username, email, id, userType, _, keycloakGroupID,
keycloakGroupName, applicationID, password) = user
keycloakGroupName, applicationID, password) = user

with open("json_payloads/keycloak_user_payload.json") as json_file:
payload_string = json_file.read()
Expand Down Expand Up @@ -300,7 +301,7 @@ def location_extras(resource, payload_string):

# custom extras for careTeams
def care_team_extras(
resource, payload_string, load_type, c_participants, c_orgs, ftype
resource, payload_string, load_type, c_participants, c_orgs, ftype
):
orgs_list = []
participant_list = []
Expand Down Expand Up @@ -873,6 +874,109 @@ def clean_duplicates(users, cascade_delete):
logging.info("No Practitioners found")


# Create a csv file and initialize the CSV writer
def write_csv(data, resource_type, fieldnames):
logging.info("Writing to csv file")
path = 'csv/exports'
if not os.path.exists(path):
os.makedirs(path)

current_time = datetime.now().strftime("%Y-%m-%d-%H-%M")
csv_file = f"{path}/{current_time}-export_{resource_type}.csv"
with open(csv_file, 'w', newline='') as file:
csv_writer = csv.writer(file)
csv_writer.writerow(fieldnames)
csv_writer.writerows(data)


def get_base_url():
return config.fhir_base_url


# This function exports resources from the API to a csv file
def export_resources_to_csv(resource_type, parameter, value, limit):
base_url = get_base_url()
resource_url = "/".join([str(base_url), resource_type])
if len(parameter) > 0:
resource_url = (
resource_url + "?" + parameter + "=" + value + "&_count=" + str(limit)
)
response = handle_request("GET", "", resource_url)
if response[1] == 200:
resources = json.loads(response[0])
data = []
if "entry" in resources:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we check this with resources["entry"] instead ?

if resource_type == "Location":
elements = ["name", "status", "method", "id", "identifier", "parentName", "parentID", "type",
"typeCode",
"physicalType", "physicalTypeCode"]
elif resource_type == "Organization":
elements = ["name", "active", "method", "id", "identifier", "alias"]
elif resource_type == "CareTeam":
elements = ["name", "status", "method", "id", "identifier", "organizations", "participants"]
else:
elements = []
for x in resources["entry"]:
rl = []
orgs_list = []
participants_list = []
for element in elements:
try:
if element == "method":
value = "update"
elif element == "active":
value = x["resource"]["active"]
elif element == "identifier":
value = x["resource"]["identifier"][0]["value"]
elif element == "organizations":
organizations = x["resource"]["managingOrganization"]
for index, value in enumerate(organizations):
reference = x["resource"]["managingOrganization"][index]["reference"]
new_reference = reference.split("/", 1)[1]
display = x["resource"]["managingOrganization"][index]["display"]
organization = ":".join([new_reference, display])
orgs_list.append(organization)
string = "|".join(map(str, orgs_list))
value = string
elif element == "participants":
participants = x["resource"]["participant"]
for index, value in enumerate(participants):
reference = x["resource"]["participant"][index]["member"]["reference"]
new_reference = reference.split("/", 1)[1]
display = x["resource"]["participant"][index]["member"]["display"]
participant = ":".join([new_reference, display])
participants_list.append(participant)
string = "|".join(map(str, participants_list))
value = string
elif element == "parentName":
value = x["resource"]["partOf"]["display"]
elif element == "parentID":
reference = x["resource"]["partOf"]["reference"]
value = reference.split("/", 1)[1]
elif element == "type":
value = x["resource"]["type"][0]["coding"][0]["display"]
elif element == "typeCode":
value = x["resource"]["type"][0]["coding"][0]["code"]
elif element == "physicalType":
value = x["resource"]["physicalType"]["coding"][0]["display"]
elif element == "physicalTypeCode":
value = x["resource"]["physicalType"]["coding"][0]["code"]
elif element == "alias":
value = x["resource"]["alias"][0]
else:
value = x["resource"][element]
except KeyError:
value = ""
rl.append(value)
data.append(rl)
write_csv(data, resource_type, elements)
logging.info("Successfully written to csv")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is nothing here checking that this was actually successful
I tried exporting with an invalid date time, python3 main.py --export_resources True --parameter _lastUpdated --value gt2023-02-29 --limit 20 --resource_type Location --log_level info
I got an error and no export but this still printed successful

else:
logging.info("No Resources Found")
else:
logging.error(f"Failed to retrieve resource. Status code: {response[1]} response: {response[0]}")


class ResponseFilter(logging.Filter):
def __init__(self, param=None):
self.param = param
Expand Down Expand Up @@ -907,7 +1011,7 @@ def filter(self, record):


@click.command()
@click.option("--csv_file", required=True)
@click.option("--csv_file", required=False)
@click.option("--access_token", required=False)
@click.option("--resource_type", required=False)
@click.option("--assign", required=False)
Expand All @@ -916,11 +1020,14 @@ def filter(self, record):
@click.option("--roles_max", required=False, default=500)
@click.option("--cascade_delete", required=False, default=False)
@click.option("--only_response", required=False)
@click.option(
"--log_level", type=click.Choice(["DEBUG", "INFO", "ERROR"], case_sensitive=False)
)
@click.option("--log_level", type=click.Choice(["DEBUG", "INFO", "ERROR"], case_sensitive=False))
@click.option("--export_resources", required=False)
@click.option("--parameter", required=False, default="_lastUpdated")
@click.option("--value", required=False, default="gt2023-01-01")
@click.option("--limit", required=False, default=1000)
def main(
csv_file, access_token, resource_type, assign, setup, group, roles_max, cascade_delete, only_response, log_level
csv_file, access_token, resource_type, assign, setup, group, roles_max, cascade_delete, only_response, log_level,
export_resources, parameter, value, limit
):
if log_level == "DEBUG":
logging.basicConfig(filename='importer.log', encoding='utf-8', level=logging.DEBUG)
Expand All @@ -936,6 +1043,12 @@ def main(
start_time = datetime.now()
logging.info("Start time: " + start_time.strftime("%H:%M:%S"))

if export_resources == "True":
logging.info("Starting export...")
logging.info("Exporting " + resource_type)
export_resources_to_csv(resource_type, parameter, value, limit)
exit()

# set access token
if access_token:
global global_access_token
Expand Down
100 changes: 100 additions & 0 deletions importer/test_main.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import json
import unittest
from datetime import datetime
from jsonschema import validate
from mock import patch
from main import (
read_csv,
write_csv,
build_payload,
build_org_affiliation,
extract_matches,
create_user_resources,
export_resources_to_csv,
)


Expand All @@ -18,6 +21,36 @@ def test_read_csv(self):
self.assertIsInstance(records, list)
self.assertEqual(len(records), 3)

def test_write_csv(self):
self.test_data = [
[
"e2e-mom",
"True",
"update",
"caffe509-ae56-4d42-945e-7b4c161723d1",
"d93ae7c3-73c0-43d1-9046-425a3466ecec",
"handy",
],
[
"e2e-skate",
"True",
"update",
"2d4feac9-9ab5-4585-9b33-e5abd14ceb0f",
"58605ed8-7217-4bf3-8122-229b6f47fa64",
"foolish",
],
]
self.test_resource_type = "test_organization"
self.test_fieldnames = ["name", "active", "method", "id", "identifier", "alias"]
write_csv(self.test_data, self.test_resource_type, self.test_fieldnames)
self.assertIsInstance(self.test_data, list)
self.assertEqual(len(self.test_data), 2)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like these assert statements are not testing the result of your function call

current_time = datetime.now().strftime("%Y-%m-%d-%H-%M")
expected_csv_file_path = (
f"csv/exports/{current_time}-export_{self.test_resource_type}.csv"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is possible that this current_time gotten in the test is different from the current_time gotten when the function was called

)
self.assertTrue(expected_csv_file_path, "CSV file created in expected location")

@patch("main.get_resource")
def test_build_payload_organizations(self, mock_get_resource):
mock_get_resource.return_value = "1"
Expand Down Expand Up @@ -390,6 +423,73 @@ def test_update_resource_with_non_existing_id_fails(self, mock_get_resource):
"Trying to update a Non-existent resource", str(raised_error.exception)
)

@patch("main.write_csv")
@patch("main.handle_request")
@patch("main.get_base_url")
def test_export_resource_to_csv(
self, mock_get_base_url, mock_handle_request, mock_write_csv
):
mock_get_base_url.return_value = "https://example.smartregister.org/fhir"
mock_response_data = {
"entry": [
{
"resource": {
"name": "City1",
"status": "active",
"id": "ba787982-b973-4bd5-854e-eacbe161e297",
"identifier": [
{"value": "ba787 982-b973-4bd5-854e-eacbe161e297"}
],
"partOf": {
"display": "test location-1",
"reference": "Location/18fcbc2e-4240-4a84-a270"
"-7a444523d7b6",
},
"type": [
{"coding": [{"display": "Jurisdiction", "code": "jdn"}]}
],
"physicalType": {
"coding": [{"display": "Jurisdiction", "code": "jdn"}]
},
}
}
]
}
string_response = json.dumps(mock_response_data)
mock_response = (string_response, 200)
mock_handle_request.return_value = mock_response
test_data = [
[
"City1",
"active",
"update",
"ba787982-b973-4bd5-854e-eacbe161e297",
"ba787 982-b973-4bd5-854e-eacbe161e297",
"test location-1",
"18fcbc2e-4240-4a84-a270-7a444523d7b6",
"Jurisdiction",
"jdn",
"Jurisdiction",
"jdn",
]
]
test_elements = [
"name",
"status",
"method",
"id",
"identifier",
"parentName",
"parentID",
"type",
"typeCode",
"physicalType",
"physicalTypeCode",
]
resource_type = "Location"
export_resources_to_csv("Location", "_lastUpdated", "gt2023-08-01", 1)
mock_write_csv.assert_called_once_with(test_data, resource_type, test_elements)


if __name__ == "__main__":
unittest.main()
Loading