Skip to content

Commit

Permalink
Merge pull request #398 from apriltuesday/issue-393
Browse files Browse the repository at this point in the history
Add record creation date to evidence
  • Loading branch information
apriltuesday authored Oct 11, 2023
2 parents b73c37c + d2076c9 commit d55aced
Show file tree
Hide file tree
Showing 10 changed files with 1,158 additions and 1,137 deletions.
2 changes: 1 addition & 1 deletion OT_SCHEMA_VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.4.0
022fabb
3 changes: 1 addition & 2 deletions bin/trait_mapping/create_table_for_manual_curation.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ def previous_and_replacement_mappings(trait_name, previous_mappings):
if trait_name not in previous_mappings:
yield '', ''
return
for uri in previous_mappings[trait_name]:
label = get_ontology_label(uri)
for uri, label in previous_mappings[trait_name]:
trait_status = get_trait_status(uri)
trait_string = '|'.join([uri, label, 'NOT_SPECIFIED', 'previously-used', trait_status])
replacement_string = find_replacement_mapping(uri)
Expand Down
5 changes: 5 additions & 0 deletions cmat/clinvar_xml_io/clinvar_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ def date(self):
"""This tracks the latest update date, counting even minor technical updates."""
return self.rcv.attrib['DateLastUpdated']

@property
def created_date(self):
"""This tracks the date the record was first made public on ClinVar."""
return self.rcv.attrib['DateCreated']

@property
def last_evaluated_date(self):
"""This tracks the latest (re)evaluation date for the clinical interpretation.
Expand Down
12 changes: 12 additions & 0 deletions cmat/output_generation/clinvar_to_evidence_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,15 @@ def clinvar_to_evidence_strings(string_to_efo_mappings, variant_to_gene_mappings
return report


def format_creation_date(s):
if not s:
return None
m = re.search('\d{4}-\d{2}-\d{2}', s)
if m and m.group(0):
return m.group(0)
return None


def generate_evidence_string(clinvar_record, allele_origins, disease_name, disease_source_id, disease_mapped_efo_id,
consequence_attributes):
"""Generates an evidence string based on ClinVar record and some additional attributes."""
Expand Down Expand Up @@ -235,6 +244,9 @@ def generate_evidence_string(clinvar_record, allele_origins, disease_name, disea
# RCV identifier.
'studyId': clinvar_record.accession,

# Record creation date, formatted as YYYY-MM-DD
'releaseDate': format_creation_date(clinvar_record.created_date),

# VARIANT ATTRIBUTES.
'targetFromSourceId': consequence_attributes.ensembl_gene_id,
'variantFunctionalConsequenceId': consequence_attributes.so_term.accession,
Expand Down
8 changes: 4 additions & 4 deletions data-exploration/filter_clinvar_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
import logging
import xml.etree.ElementTree as ElementTree

from eva_cttv_pipeline.clinvar_xml_io.clinvar_xml_io import ClinVarRecord, find_mandatory_unique_element
from eva_cttv_pipeline.evidence_string_generation.clinvar_to_evidence_strings import get_consequence_types
from eva_cttv_pipeline.evidence_string_generation.consequence_type import process_consequence_type_file

from cmat.clinvar_xml_io import ClinVarRecord
from cmat.clinvar_xml_io.xml_parsing import find_mandatory_unique_element
from cmat.output_generation.clinvar_to_evidence_strings import get_consequence_types
from cmat.output_generation.consequence_type import process_consequence_type_file

logging.basicConfig()
logger = logging.getLogger(__name__)
Expand Down
2,260 changes: 1,130 additions & 1,130 deletions tests/output_generation/resources/end2end/expected/evidence_strings.json

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"15258582",
"15322982"
],
"releaseDate": "2012-08-13",
"studyId": "RCV000002127",
"targetFromSourceId": "ENSG00000139988",
"variantFunctionalConsequenceId": "SO_0001583",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"diseaseFromSource": "Skeletal dysplasia",
"diseaseFromSourceId": "C0410528",
"diseaseFromSourceMappedId": "HP_0002652",
"releaseDate": "2017-01-12",
"studyId": "RCV000415158",
"targetFromSourceId": "ENSG00000139988",
"variantFunctionalConsequenceId": "SO_0001583",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"15258582",
"15322982"
],
"releaseDate": "2012-08-13",
"studyId": "RCV000002127",
"targetFromSourceId": "ENSG00000139988",
"variantFunctionalConsequenceId": "SO_0001583",
Expand Down
2 changes: 2 additions & 0 deletions tests/output_generation/test_clinvar_to_evidence_strings.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import json
import os
import re

import requests
import xml.etree.ElementTree as ElementTree

Expand Down

0 comments on commit d55aced

Please sign in to comment.