diff --git a/lib/workload/stateless/stacks/metadata-manager/README.md b/lib/workload/stateless/stacks/metadata-manager/README.md index 559423a14..e1dbf3ff8 100644 --- a/lib/workload/stateless/stacks/metadata-manager/README.md +++ b/lib/workload/stateless/stacks/metadata-manager/README.md @@ -100,6 +100,33 @@ Some important notes of the sync: Please refer to the [tracking-sheet-service](proc/service/tracking_sheet_srv.py) implementation. +### Custom CSV File Loader + +The application also supports loading data from a custom CSV file. The CSV file should have the following columns: + +| Sheet Header | Table | Field Name | +|----------------------|--------------|--------------------| +| Individual_id | `Individual` | individual_id | +| individual_id_source | `Individual` | subject_id | +| subject_id | `Subject` | subject_id | +| sample_id | `Sample` | sample_id | +| external_sample_id | `Sample` | external_sample_id | +| source | `Sample` | source | +| library_id | `Library` | library_id | +| phenotype | `Library` | phenotype | +| workflow | `Library` | workflow | +| quality | `Library` | quality | +| type | `Library` | type | +| coverage | `Library` | coverage | +| assay | `Library` | assay | +| project_name | `Project` | project_id | +| project_owner | `Contact` | contact_id | + +The CSV file should be in a presigned URL format, where the loader will read and insert to the database. +To trigger the loader please look at `./deploy/README.md` for more info. + +Please refer to the [load-csv-service](proc/service/load_csv_srv.py) implementation. + ### Audit Data The application is configured with [django-simple-history](https://django-simple-history.readthedocs.io/en/latest/) diff --git a/lib/workload/stateless/stacks/metadata-manager/proc/service/load_csv_srv.py b/lib/workload/stateless/stacks/metadata-manager/proc/service/load_csv_srv.py index a62178f41..f1a49164b 100644 --- a/lib/workload/stateless/stacks/metadata-manager/proc/service/load_csv_srv.py +++ b/lib/workload/stateless/stacks/metadata-manager/proc/service/load_csv_srv.py @@ -67,18 +67,18 @@ def load_metadata_csv(df: pd.DataFrame): # ------------------------------ idv = None individual_id = record.get('individual_id') - source = record.get('source') + idv_source = record.get('individual_id_source') - if individual_id and source: + if individual_id and idv_source: idv, is_idv_created, is_idv_updated = Individual.objects.update_or_create_if_needed( search_key={ "individual_id": individual_id, - "source": source + "source": idv_source }, data={ "individual_id": individual_id, - "source": source + "source": idv_source } ) if is_idv_created: @@ -124,7 +124,7 @@ def load_metadata_csv(df: pd.DataFrame): sample, is_smp_created, is_smp_updated = Sample.objects.update_or_create_if_needed( search_key={"sample_id": sample_id}, data={ - "sample_id": record.get('sample_id'), + "sample_id": sample_id, "external_sample_id": record.get('external_sample_id'), "source": get_value_from_human_readable_label(Source.choices, record.get('source')), } @@ -142,9 +142,9 @@ def load_metadata_csv(df: pd.DataFrame): if contact_id: contact, is_ctc_created, is_ctc_updated = Contact.objects.update_or_create_if_needed( - search_key={"contact_id": record.get('project_owner')}, + search_key={"contact_id": contact_id}, data={ - "contact_id": record.get('project_owner'), + "contact_id": contact_id, } ) if is_ctc_created: @@ -160,9 +160,9 @@ def load_metadata_csv(df: pd.DataFrame): project_id = record.get('project_name') if project_id: project, is_prj_created, is_prj_updated = Project.objects.update_or_create_if_needed( - search_key={"project_id": record.get('project_name')}, + search_key={"project_id":project_id}, data={ - "project_id": record.get('project_name'), + "project_id": project_id, } ) if is_prj_created: