From e626a5673427d44c6c6bb9b8417767616f00f9d3 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 5 Jun 2023 14:47:47 -0400 Subject: [PATCH 1/5] Add rna_kg template --- src/ontogpt/evaluation/rna_kg/abstract1.txt | 0 src/ontogpt/templates/composite_disease.py | 243 ++++++++++++++++++ src/ontogpt/templates/core.py | 95 +++---- src/ontogpt/templates/environmental_sample.py | 123 +++++---- src/ontogpt/templates/rna_kg.py | 174 +++++++++++++ src/ontogpt/templates/rna_kg.yaml | 105 ++++++++ 6 files changed, 643 insertions(+), 97 deletions(-) create mode 100644 src/ontogpt/evaluation/rna_kg/abstract1.txt create mode 100644 src/ontogpt/templates/composite_disease.py create mode 100644 src/ontogpt/templates/rna_kg.py create mode 100644 src/ontogpt/templates/rna_kg.yaml diff --git a/src/ontogpt/evaluation/rna_kg/abstract1.txt b/src/ontogpt/evaluation/rna_kg/abstract1.txt new file mode 100644 index 000000000..e69de29bb diff --git a/src/ontogpt/templates/composite_disease.py b/src/ontogpt/templates/composite_disease.py new file mode 100644 index 000000000..91a1d9f7e --- /dev/null +++ b/src/ontogpt/templates/composite_disease.py @@ -0,0 +1,243 @@ +from __future__ import annotations +from datetime import datetime, date +from enum import Enum +from typing import List, Dict, Optional, Any, Union, Literal +from pydantic import BaseModel as BaseModel, Field +from linkml_runtime.linkml_model import Decimal + +metamodel_version = "None" +version = "None" + +class WeakRefShimBaseModel(BaseModel): + __slots__ = '__weakref__' + +class ConfiguredBaseModel(WeakRefShimBaseModel, + validate_assignment = True, + validate_all = True, + underscore_attrs_are_private = True, + extra = 'forbid', + arbitrary_types_allowed = True): + pass + + +class NCITDrugType(str, Enum): + + + dummy = "dummy" + + +class NCITTreatmentType(str, Enum): + + + dummy = "dummy" + + +class NCITTActivityType(str, Enum): + + + dummy = "dummy" + + +class MAXOActionType(str, Enum): + + + dummy = "dummy" + + +class MESHTherapeuticType(str, Enum): + + + dummy = "dummy" + + +class CHEBIDrugType(str, Enum): + + + dummy = "dummy" + + +class NullDataOptions(str, Enum): + + UNSPECIFIED_METHOD_OF_ADMINISTRATION = "UNSPECIFIED_METHOD_OF_ADMINISTRATION" + NOT_APPLICABLE = "NOT_APPLICABLE" + NOT_MENTIONED = "NOT_MENTIONED" + + + +class CompositeDisease(ConfiguredBaseModel): + + main_disease: Optional[str] = Field(None, description="""the name of the disease that is treated.""") + drugs: Optional[List[str]] = Field(default_factory=list, description="""semicolon-separated list of named small molecule drugs""") + treatments: Optional[List[str]] = Field(default_factory=list, description="""semicolon-separated list of therapies and treatments are indicated for treating the disease.""") + contraindications: Optional[List[str]] = Field(default_factory=list, description="""semicolon-separated list of therapies and treatments that are contra-indicated for the disease, and should not be used, due to risk of adverse effects.""") + treatment_mechanisms: Optional[List[TreatmentMechanism]] = Field(default_factory=list, description="""semicolon-separated list of treatment to asterisk-separated mechanism associations""") + treatment_efficacies: Optional[List[TreatmentEfficacy]] = Field(default_factory=list, description="""semicolon-separated list of treatment to efficacy associations, e.g. Imatinib*effective""") + treatment_adverse_effects: Optional[List[TreatmentAdverseEffect]] = Field(default_factory=list, description="""semicolon-separated list of treatment to adverse effect associations, e.g. Imatinib*nausea""") + + + +class ExtractionResult(ConfiguredBaseModel): + """ + A result of extracting knowledge on text + """ + input_id: Optional[str] = Field(None) + input_title: Optional[str] = Field(None) + input_text: Optional[str] = Field(None) + raw_completion_output: Optional[str] = Field(None) + prompt: Optional[str] = Field(None) + extracted_object: Optional[Any] = Field(None, description="""The complex objects extracted from the text""") + named_entities: Optional[List[Any]] = Field(default_factory=list, description="""Named entities extracted from the text""") + + + +class NamedEntity(ConfiguredBaseModel): + + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") + label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + + + +class Gene(NamedEntity): + + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") + label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + + + +class Symptom(NamedEntity): + + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") + label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + + + +class Disease(NamedEntity): + + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") + label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + + + +class AdverseEffect(NamedEntity): + + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") + label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + + + +class Treatment(NamedEntity): + + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") + label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + + + +class Mechanism(NamedEntity): + + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") + label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + + + +class Drug(NamedEntity): + + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") + label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + + + +class CompoundExpression(ConfiguredBaseModel): + + None + + + +class TreatmentMechanism(CompoundExpression): + + treatment: Optional[str] = Field(None) + mechanism: Optional[str] = Field(None) + + + +class TreatmentAdverseEffect(CompoundExpression): + + treatment: Optional[str] = Field(None) + adverse_effects: Optional[List[str]] = Field(default_factory=list) + + + +class TreatmentEfficacy(CompoundExpression): + + treatment: Optional[str] = Field(None) + efficacy: Optional[str] = Field(None) + + + +class Triple(CompoundExpression): + """ + Abstract parent for Relation Extraction tasks + """ + subject: Optional[str] = Field(None) + predicate: Optional[str] = Field(None) + object: Optional[str] = Field(None) + qualifier: Optional[str] = Field(None, description="""A qualifier for the statements, e.g. \"NOT\" for negation""") + subject_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the subject of the statement, e.g. \"high dose\" or \"intravenously administered\"""") + object_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the object of the statement, e.g. \"severe\" or \"with additional complications\"""") + + + +class TextWithTriples(ConfiguredBaseModel): + + publication: Optional[Publication] = Field(None) + triples: Optional[List[Triple]] = Field(default_factory=list) + + + +class RelationshipType(NamedEntity): + + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") + label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + + + +class Publication(ConfiguredBaseModel): + + id: Optional[str] = Field(None, description="""The publication identifier""") + title: Optional[str] = Field(None, description="""The title of the publication""") + abstract: Optional[str] = Field(None, description="""The abstract of the publication""") + combined_text: Optional[str] = Field(None) + full_text: Optional[str] = Field(None, description="""The full text of the publication""") + + + +class AnnotatorResult(ConfiguredBaseModel): + + subject_text: Optional[str] = Field(None) + object_id: Optional[str] = Field(None) + object_text: Optional[str] = Field(None) + + + + +# Update forward refs +# see https://pydantic-docs.helpmanual.io/usage/postponed_annotations/ +CompositeDisease.update_forward_refs() +ExtractionResult.update_forward_refs() +NamedEntity.update_forward_refs() +Gene.update_forward_refs() +Symptom.update_forward_refs() +Disease.update_forward_refs() +AdverseEffect.update_forward_refs() +Treatment.update_forward_refs() +Mechanism.update_forward_refs() +Drug.update_forward_refs() +CompoundExpression.update_forward_refs() +TreatmentMechanism.update_forward_refs() +TreatmentAdverseEffect.update_forward_refs() +TreatmentEfficacy.update_forward_refs() +Triple.update_forward_refs() +TextWithTriples.update_forward_refs() +RelationshipType.update_forward_refs() +Publication.update_forward_refs() +AnnotatorResult.update_forward_refs() + diff --git a/src/ontogpt/templates/core.py b/src/ontogpt/templates/core.py index fba922326..2d48fe876 100644 --- a/src/ontogpt/templates/core.py +++ b/src/ontogpt/templates/core.py @@ -1,98 +1,104 @@ -"""Core template.""" from __future__ import annotations - -from typing import Any, List, Optional - -from pydantic import BaseModel as BaseModel -from pydantic import Field +from datetime import datetime, date +from enum import Enum +from typing import List, Dict, Optional, Any, Union, Literal +from pydantic import BaseModel as BaseModel, Field +from linkml_runtime.linkml_model import Decimal metamodel_version = "None" version = "None" - class WeakRefShimBaseModel(BaseModel): - __slots__ = "__weakref__" - - -class ConfiguredBaseModel( - WeakRefShimBaseModel, - validate_assignment=True, - validate_all=True, - underscore_attrs_are_private=True, - extra="forbid", - arbitrary_types_allowed=True, -): - pass - + __slots__ = '__weakref__' + +class ConfiguredBaseModel(WeakRefShimBaseModel, + validate_assignment = True, + validate_all = True, + underscore_attrs_are_private = True, + extra = 'forbid', + arbitrary_types_allowed = True): + pass + + +class NullDataOptions(str, Enum): + + UNSPECIFIED_METHOD_OF_ADMINISTRATION = "UNSPECIFIED_METHOD_OF_ADMINISTRATION" + NOT_APPLICABLE = "NOT_APPLICABLE" + NOT_MENTIONED = "NOT_MENTIONED" + + class ExtractionResult(ConfiguredBaseModel): - """A result of extracting knowledge on text.""" - + """ + A result of extracting knowledge on text + """ input_id: Optional[str] = Field(None) input_title: Optional[str] = Field(None) input_text: Optional[str] = Field(None) raw_completion_output: Optional[str] = Field(None) prompt: Optional[str] = Field(None) - extracted_object: Optional[Any] = Field( - None, description="""The complex objects extracted from the text""" - ) - named_entities: Optional[List[Any]] = Field( - default_factory=list, description="""Named entities extracted from the text""" - ) + extracted_object: Optional[Any] = Field(None, description="""The complex objects extracted from the text""") + named_entities: Optional[List[Any]] = Field(default_factory=list, description="""Named entities extracted from the text""") + class NamedEntity(ConfiguredBaseModel): + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + class CompoundExpression(ConfiguredBaseModel): - pass + + None + class Triple(CompoundExpression): - """Abstract parent for Relation Extraction tasks.""" - + """ + Abstract parent for Relation Extraction tasks + """ subject: Optional[str] = Field(None) predicate: Optional[str] = Field(None) object: Optional[str] = Field(None) - qualifier: Optional[str] = Field( - None, description="""A qualifier for the statements, e.g. \"NOT\" for negation""" - ) - subject_qualifier: Optional[str] = Field( - None, - description="""An optional qualifier or modifier for the subject of the\ - statement, e.g. \"high dose\" or \"intravenously administered\"""", - ) - object_qualifier: Optional[str] = Field( - None, - description="""An optional qualifier or modifier for the object of\ - the statement, e.g. \"severe\" or \"with additional complications\"""", - ) + qualifier: Optional[str] = Field(None, description="""A qualifier for the statements, e.g. \"NOT\" for negation""") + subject_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the subject of the statement, e.g. \"high dose\" or \"intravenously administered\"""") + object_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the object of the statement, e.g. \"severe\" or \"with additional complications\"""") + class TextWithTriples(ConfiguredBaseModel): + publication: Optional[Publication] = Field(None) triples: Optional[List[Triple]] = Field(default_factory=list) + class RelationshipType(NamedEntity): + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + class Publication(ConfiguredBaseModel): + id: Optional[str] = Field(None, description="""The publication identifier""") title: Optional[str] = Field(None, description="""The title of the publication""") abstract: Optional[str] = Field(None, description="""The abstract of the publication""") combined_text: Optional[str] = Field(None) full_text: Optional[str] = Field(None, description="""The full text of the publication""") + class AnnotatorResult(ConfiguredBaseModel): + subject_text: Optional[str] = Field(None) object_id: Optional[str] = Field(None) object_text: Optional[str] = Field(None) + + # Update forward refs @@ -105,3 +111,4 @@ class AnnotatorResult(ConfiguredBaseModel): RelationshipType.update_forward_refs() Publication.update_forward_refs() AnnotatorResult.update_forward_refs() + diff --git a/src/ontogpt/templates/environmental_sample.py b/src/ontogpt/templates/environmental_sample.py index 105453622..136a141d1 100644 --- a/src/ontogpt/templates/environmental_sample.py +++ b/src/ontogpt/templates/environmental_sample.py @@ -1,148 +1,164 @@ -"""Environmental sample template.""" from __future__ import annotations - -from typing import Any, List, Optional - -from pydantic import BaseModel as BaseModel -from pydantic import Field +from datetime import datetime, date +from enum import Enum +from typing import List, Dict, Optional, Any, Union, Literal +from pydantic import BaseModel as BaseModel, Field +from linkml_runtime.linkml_model import Decimal metamodel_version = "None" version = "None" - class WeakRefShimBaseModel(BaseModel): - __slots__ = "__weakref__" - - -class ConfiguredBaseModel( - WeakRefShimBaseModel, - validate_assignment=True, - validate_all=True, - underscore_attrs_are_private=True, - extra="forbid", - arbitrary_types_allowed=True, -): - pass - + __slots__ = '__weakref__' + +class ConfiguredBaseModel(WeakRefShimBaseModel, + validate_assignment = True, + validate_all = True, + underscore_attrs_are_private = True, + extra = 'forbid', + arbitrary_types_allowed = True): + pass + + +class NullDataOptions(str, Enum): + + UNSPECIFIED_METHOD_OF_ADMINISTRATION = "UNSPECIFIED_METHOD_OF_ADMINISTRATION" + NOT_APPLICABLE = "NOT_APPLICABLE" + NOT_MENTIONED = "NOT_MENTIONED" + + class Study(ConfiguredBaseModel): - location: Optional[List[str]] = Field( - default_factory=list, description="""the sites at which the study was conducted""" - ) - environmental_material: Optional[List[str]] = Field( - default_factory=list, description="""the environmental material that was sampled""" - ) + + location: Optional[List[str]] = Field(default_factory=list, description="""the sites at which the study was conducted""") + environmental_material: Optional[List[str]] = Field(default_factory=list, description="""the environmental material that was sampled""") environments: Optional[List[str]] = Field(default_factory=list) causal_relationships: Optional[List[CausalRelationship]] = Field(default_factory=list) variables: Optional[List[str]] = Field(default_factory=list) measurements: Optional[List[Measurement]] = Field(default_factory=list) + class ExtractionResult(ConfiguredBaseModel): - """A result of extracting knowledge on text.""" - + """ + A result of extracting knowledge on text + """ input_id: Optional[str] = Field(None) input_title: Optional[str] = Field(None) input_text: Optional[str] = Field(None) raw_completion_output: Optional[str] = Field(None) prompt: Optional[str] = Field(None) - extracted_object: Optional[Any] = Field( - None, description="""The complex objects extracted from the text""" - ) - named_entities: Optional[List[Any]] = Field( - default_factory=list, description="""Named entities extracted from the text""" - ) + extracted_object: Optional[Any] = Field(None, description="""The complex objects extracted from the text""") + named_entities: Optional[List[Any]] = Field(default_factory=list, description="""Named entities extracted from the text""") + class NamedEntity(ConfiguredBaseModel): + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + class Location(NamedEntity): + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + class EnvironmentalMaterial(NamedEntity): + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + class Environment(NamedEntity): + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + class Variable(NamedEntity): + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + class Unit(NamedEntity): + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + class CompoundExpression(ConfiguredBaseModel): - pass + + None + class Measurement(CompoundExpression): + value: Optional[str] = Field(None, description="""the value of the measurement""") unit: Optional[str] = Field(None, description="""the unit of the measurement""") + class CausalRelationship(CompoundExpression): - cause: Optional[str] = Field( - None, description="""the variable that is the cause of the effect""" - ) + + cause: Optional[str] = Field(None, description="""the variable that is the cause of the effect""") effect: Optional[str] = Field(None, description="""the things that is affected""") + class Triple(CompoundExpression): - """Abstract parent for Relation Extraction tasks.""" - + """ + Abstract parent for Relation Extraction tasks + """ subject: Optional[str] = Field(None) predicate: Optional[str] = Field(None) object: Optional[str] = Field(None) - qualifier: Optional[str] = Field( - None, description="""A qualifier for the statements, e.g. \"NOT\" for negation""" - ) - subject_qualifier: Optional[str] = Field( - None, - description="""An optional qualifier or modifier for the subject of\ - the statement, e.g. \"high dose\" or \"intravenously administered\"""", - ) - object_qualifier: Optional[str] = Field( - None, - description="""An optional qualifier or modifier for the object of\ - the statement, e.g. \"severe\" or \"with additional complications\"""", - ) + qualifier: Optional[str] = Field(None, description="""A qualifier for the statements, e.g. \"NOT\" for negation""") + subject_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the subject of the statement, e.g. \"high dose\" or \"intravenously administered\"""") + object_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the object of the statement, e.g. \"severe\" or \"with additional complications\"""") + class TextWithTriples(ConfiguredBaseModel): + publication: Optional[Publication] = Field(None) triples: Optional[List[Triple]] = Field(default_factory=list) + class RelationshipType(NamedEntity): + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + class Publication(ConfiguredBaseModel): + id: Optional[str] = Field(None, description="""The publication identifier""") title: Optional[str] = Field(None, description="""The title of the publication""") abstract: Optional[str] = Field(None, description="""The abstract of the publication""") combined_text: Optional[str] = Field(None) full_text: Optional[str] = Field(None, description="""The full text of the publication""") + class AnnotatorResult(ConfiguredBaseModel): + subject_text: Optional[str] = Field(None) object_id: Optional[str] = Field(None) object_text: Optional[str] = Field(None) + + # Update forward refs @@ -163,3 +179,4 @@ class AnnotatorResult(ConfiguredBaseModel): RelationshipType.update_forward_refs() Publication.update_forward_refs() AnnotatorResult.update_forward_refs() + diff --git a/src/ontogpt/templates/rna_kg.py b/src/ontogpt/templates/rna_kg.py new file mode 100644 index 000000000..4cb9767f1 --- /dev/null +++ b/src/ontogpt/templates/rna_kg.py @@ -0,0 +1,174 @@ +from __future__ import annotations +from datetime import datetime, date +from enum import Enum +from typing import List, Dict, Optional, Any, Union, Literal +from pydantic import BaseModel as BaseModel, Field +from linkml_runtime.linkml_model import Decimal + +metamodel_version = "None" +version = "None" + +class WeakRefShimBaseModel(BaseModel): + __slots__ = '__weakref__' + +class ConfiguredBaseModel(WeakRefShimBaseModel, + validate_assignment = True, + validate_all = True, + underscore_attrs_are_private = True, + extra = 'forbid', + arbitrary_types_allowed = True): + pass + + +class NullDataOptions(str, Enum): + + UNSPECIFIED_METHOD_OF_ADMINISTRATION = "UNSPECIFIED_METHOD_OF_ADMINISTRATION" + NOT_APPLICABLE = "NOT_APPLICABLE" + NOT_MENTIONED = "NOT_MENTIONED" + + + +class GenemiRNARelationship(ConfiguredBaseModel): + + gene: Optional[str] = Field(None) + miRNA: Optional[str] = Field(None) + + + +class GeneDiseaseRelationship(ConfiguredBaseModel): + + gene: Optional[str] = Field(None) + disease: Optional[str] = Field(None) + + + +class MiRNADiseaseRelationship(ConfiguredBaseModel): + + gene: Optional[str] = Field(None) + disease: Optional[str] = Field(None) + + + +class ExtractionResult(ConfiguredBaseModel): + """ + A result of extracting knowledge on text + """ + input_id: Optional[str] = Field(None) + input_title: Optional[str] = Field(None) + input_text: Optional[str] = Field(None) + raw_completion_output: Optional[str] = Field(None) + prompt: Optional[str] = Field(None) + extracted_object: Optional[Any] = Field(None, description="""The complex objects extracted from the text""") + named_entities: Optional[List[Any]] = Field(default_factory=list, description="""Named entities extracted from the text""") + + + +class NamedEntity(ConfiguredBaseModel): + + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") + label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + + + +class MiRNA(NamedEntity): + + label: Optional[str] = Field(None, description="""the name of the miRNA""") + description: Optional[str] = Field(None, description="""a textual description of the miRNA""") + synonyms: Optional[List[str]] = Field(default_factory=list, description="""alternative names of the miRNA""") + disease: Optional[List[str]] = Field(default_factory=list) + miRNA_disease: Optional[List[MiRNADiseaseRelationship]] = Field(default_factory=list, description="""semicolon-separated list of miRNA to disease relationships""") + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") + + + +class Gene(NamedEntity): + + label: Optional[str] = Field(None, description="""the name of the gene""") + description: Optional[str] = Field(None, description="""a textual description of the gene""") + synonyms: Optional[List[str]] = Field(default_factory=list, description="""alternative names of the gene""") + miRNAs: Optional[List[str]] = Field(default_factory=list) + disease: Optional[List[str]] = Field(default_factory=list) + gene_miRNA: Optional[List[GenemiRNARelationship]] = Field(default_factory=list, description="""semicolon-separated list of gene to miRNA relationships""") + gene_disease: Optional[List[GeneDiseaseRelationship]] = Field(default_factory=list, description="""semicolon-separated list of gene to disease relationships""") + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") + + + +class Disease(NamedEntity): + + label: Optional[str] = Field(None, description="""the name of the disease""") + description: Optional[str] = Field(None, description="""a textual description of the disease""") + synonyms: Optional[List[str]] = Field(default_factory=list, description="""alternative names of the disease""") + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") + + + +class CompoundExpression(ConfiguredBaseModel): + + None + + + +class Triple(CompoundExpression): + """ + Abstract parent for Relation Extraction tasks + """ + subject: Optional[str] = Field(None) + predicate: Optional[str] = Field(None) + object: Optional[str] = Field(None) + qualifier: Optional[str] = Field(None, description="""A qualifier for the statements, e.g. \"NOT\" for negation""") + subject_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the subject of the statement, e.g. \"high dose\" or \"intravenously administered\"""") + object_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the object of the statement, e.g. \"severe\" or \"with additional complications\"""") + + + +class TextWithTriples(ConfiguredBaseModel): + + publication: Optional[Publication] = Field(None) + triples: Optional[List[Triple]] = Field(default_factory=list) + + + +class RelationshipType(NamedEntity): + + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") + label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + + + +class Publication(ConfiguredBaseModel): + + id: Optional[str] = Field(None, description="""The publication identifier""") + title: Optional[str] = Field(None, description="""The title of the publication""") + abstract: Optional[str] = Field(None, description="""The abstract of the publication""") + combined_text: Optional[str] = Field(None) + full_text: Optional[str] = Field(None, description="""The full text of the publication""") + + + +class AnnotatorResult(ConfiguredBaseModel): + + subject_text: Optional[str] = Field(None) + object_id: Optional[str] = Field(None) + object_text: Optional[str] = Field(None) + + + + +# Update forward refs +# see https://pydantic-docs.helpmanual.io/usage/postponed_annotations/ +GenemiRNARelationship.update_forward_refs() +GeneDiseaseRelationship.update_forward_refs() +MiRNADiseaseRelationship.update_forward_refs() +ExtractionResult.update_forward_refs() +NamedEntity.update_forward_refs() +MiRNA.update_forward_refs() +Gene.update_forward_refs() +Disease.update_forward_refs() +CompoundExpression.update_forward_refs() +Triple.update_forward_refs() +TextWithTriples.update_forward_refs() +RelationshipType.update_forward_refs() +Publication.update_forward_refs() +AnnotatorResult.update_forward_refs() + diff --git a/src/ontogpt/templates/rna_kg.yaml b/src/ontogpt/templates/rna_kg.yaml new file mode 100644 index 000000000..8bc6cd2da --- /dev/null +++ b/src/ontogpt/templates/rna_kg.yaml @@ -0,0 +1,105 @@ +id: https://w3id.org/ontogpt/biological_process +name: testRNA-KG-template +title: testRNA-KG Template +description: >- + A template for RNA-KG +prefixes: + linkml: https://w3id.org/linkml/ + +default_range: string + +imports: + - linkml:types + - core + +classes: + miRNA: + is_a: NamedEntity + attributes: + label: + description: the name of the miRNA + description: + description: a textual description of the miRNA + synonyms: + description: alternative names of the miRNA + multivalued: true + disease: + range: Disease + multivalued: true + miRNA_disease: + description: semicolon-separated list of miRNA to disease relationships + multivalued: true + range: miRNADiseaseRelationship + + Gene: + is_a: NamedEntity + attributes: + label: + description: the name of the gene + description: + description: a textual description of the gene + synonyms: + description: alternative names of the gene + multivalued: true + miRNAs: + range: miRNA + multivalued: true + disease: + range: Disease + multivalued: true + gene_miRNA: + description: semicolon-separated list of gene to miRNA relationships + multivalued: true + range: GenemiRNARelationship + gene_disease: + description: semicolon-separated list of gene to disease relationships + multivalued: true + range: GeneDiseaseRelationship + + Disease: + is_a: NamedEntity + id_prefixes: + - MONDO + annotations: + annotators: sqlite:obo:mondo + attributes: + label: + description: the name of the disease + description: + description: a textual description of the disease + synonyms: + description: alternative names of the disease + multivalued: true + + GenemiRNARelationship: + id_prefixes: + - RO + attributes: + gene: + range: Gene + miRNA: + range: miRNA + annotations: + annotators: sqlite:obo:ro + + GeneDiseaseRelationship: + id_prefixes: + - RO + attributes: + gene: + range: Gene + disease: + range: Disease + annotations: + annotators: sqlite:obo:ro + + miRNADiseaseRelationship: + id_prefixes: + - RO + attributes: + gene: + range: miRNA + disease: + range: Disease + annotations: + annotators: sqlite:obo:ro \ No newline at end of file From 33d8c65f647f1cffd87d48a88afe5419bcb6d21e Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 5 Jun 2023 14:48:07 -0400 Subject: [PATCH 2/5] Remove disease and mirna slots from Gene --- src/ontogpt/templates/rna_kg.yaml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/ontogpt/templates/rna_kg.yaml b/src/ontogpt/templates/rna_kg.yaml index 8bc6cd2da..92d761d04 100644 --- a/src/ontogpt/templates/rna_kg.yaml +++ b/src/ontogpt/templates/rna_kg.yaml @@ -41,12 +41,6 @@ classes: synonyms: description: alternative names of the gene multivalued: true - miRNAs: - range: miRNA - multivalued: true - disease: - range: Disease - multivalued: true gene_miRNA: description: semicolon-separated list of gene to miRNA relationships multivalued: true From 64db1e1aa9e4d5049cb1a2b38298f02173cdcca3 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 5 Jun 2023 14:48:51 -0400 Subject: [PATCH 3/5] Remove disease slot from mirna --- src/ontogpt/templates/rna_kg.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/ontogpt/templates/rna_kg.yaml b/src/ontogpt/templates/rna_kg.yaml index 92d761d04..524d50bc1 100644 --- a/src/ontogpt/templates/rna_kg.yaml +++ b/src/ontogpt/templates/rna_kg.yaml @@ -23,9 +23,6 @@ classes: synonyms: description: alternative names of the miRNA multivalued: true - disease: - range: Disease - multivalued: true miRNA_disease: description: semicolon-separated list of miRNA to disease relationships multivalued: true From 91327c3195b557618a7a87e1f9ee71aff156213f Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 5 Jun 2023 14:54:41 -0400 Subject: [PATCH 4/5] Add some rna_kg stuff --- src/ontogpt/evaluation/rna_kg/abstract1.txt | 13 +++++++++++++ src/ontogpt/evaluation/rna_kg/abstract2.txt | 16 ++++++++++++++++ src/ontogpt/templates/rna_kg.py | 3 --- src/ontogpt/templates/rna_kg.yaml | 4 ++++ 4 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 src/ontogpt/evaluation/rna_kg/abstract2.txt diff --git a/src/ontogpt/evaluation/rna_kg/abstract1.txt b/src/ontogpt/evaluation/rna_kg/abstract1.txt index e69de29bb..ab5e5cbb6 100644 --- a/src/ontogpt/evaluation/rna_kg/abstract1.txt +++ b/src/ontogpt/evaluation/rna_kg/abstract1.txt @@ -0,0 +1,13 @@ +Abstract +One challenge in miRNA–genes–diseases interaction studies is that it is challenging to +find labeled data that indicate a positive or negative relationship between miRNA and +genes. The use of one-class classification methods shows a promising path for validating +them. We have applied two one-class classification methods, Isolation Forest and +One-class SVM, to validate miRNAs interactions with the ERBB2 gene present in breast +cancer scenarios using features extracted via sequence-binding. We found that the +One-class SVM outperforms the Isolation Forest model, with values of sensitivity of +80.49% and a specificity of 86.49% showing results that are comparable to previous studies. +Additionally, we have demonstrated that the use of features extracted from a +sequence-based approach (considering miRNA and gene sequence binding characteristics) +and one-class models have proven to be a feasible method for validating these genetic +molecule interactions. \ No newline at end of file diff --git a/src/ontogpt/evaluation/rna_kg/abstract2.txt b/src/ontogpt/evaluation/rna_kg/abstract2.txt new file mode 100644 index 000000000..4b5518625 --- /dev/null +++ b/src/ontogpt/evaluation/rna_kg/abstract2.txt @@ -0,0 +1,16 @@ +MicroRNA (miRNA)–gene interactions are well-recognized as involved in the progression +of almost all cancer types including prostate cancer, which is one of the most common +cancers in men. This study explored the significantly dysregulated genes and miRNAs and +elucidated the potential miRNA–gene regulatory network in prostate cancer. Integrative +analysis of prostate cancer and normal prostate transcriptomic data in The Cancer Genome +Atlas dataset was conducted using both differential expression analysis and weighted +correlation network analysis (WGCNA). Thirteen genes (RRM2, ORC6, CDC45, CDKN2A, E2F2, +MYBL2, CCNB2, PLK1, FOXM1, CDC25C, PKMYT1, GTSE1, and CDC20) were potentially +correlated with prostate cancer based on functional enrichment analyses. MiRNAs +targeting these genes were predicted and eight miRNAs were intersections between +those miRNAs and the hub miRNAs obtained from miRNA WGCNA analysis. Three genes +(E2F2, RRM2, and PKMYT1) and four miRNAs (hsa-mir-17-5p, hsa-mir-20a-5p, hsa-mir-92a-3p, +and hsa-mir-93-5p) were key factors according to the interaction network. RRM2 and +PKMYT1 were significantly related to survival. These findings partially elucidated +the dysregulation of gene expressions in prostate cancer. Efficient manipulations of +the miRNA–gene interactions in prostate cancer may be exploited as promising therapeutics. \ No newline at end of file diff --git a/src/ontogpt/templates/rna_kg.py b/src/ontogpt/templates/rna_kg.py index 4cb9767f1..baa9328cc 100644 --- a/src/ontogpt/templates/rna_kg.py +++ b/src/ontogpt/templates/rna_kg.py @@ -75,7 +75,6 @@ class MiRNA(NamedEntity): label: Optional[str] = Field(None, description="""the name of the miRNA""") description: Optional[str] = Field(None, description="""a textual description of the miRNA""") synonyms: Optional[List[str]] = Field(default_factory=list, description="""alternative names of the miRNA""") - disease: Optional[List[str]] = Field(default_factory=list) miRNA_disease: Optional[List[MiRNADiseaseRelationship]] = Field(default_factory=list, description="""semicolon-separated list of miRNA to disease relationships""") id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") @@ -86,8 +85,6 @@ class Gene(NamedEntity): label: Optional[str] = Field(None, description="""the name of the gene""") description: Optional[str] = Field(None, description="""a textual description of the gene""") synonyms: Optional[List[str]] = Field(default_factory=list, description="""alternative names of the gene""") - miRNAs: Optional[List[str]] = Field(default_factory=list) - disease: Optional[List[str]] = Field(default_factory=list) gene_miRNA: Optional[List[GenemiRNARelationship]] = Field(default_factory=list, description="""semicolon-separated list of gene to miRNA relationships""") gene_disease: Optional[List[GeneDiseaseRelationship]] = Field(default_factory=list, description="""semicolon-separated list of gene to disease relationships""") id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") diff --git a/src/ontogpt/templates/rna_kg.yaml b/src/ontogpt/templates/rna_kg.yaml index 524d50bc1..580300fa4 100644 --- a/src/ontogpt/templates/rna_kg.yaml +++ b/src/ontogpt/templates/rna_kg.yaml @@ -30,6 +30,10 @@ classes: Gene: is_a: NamedEntity + id_prefixes: + - HGNC + annotations: + annotators: bioportal:hgnc-nr attributes: label: description: the name of the gene From 6a0d1fe915b3af2237c0035b77aca567baf709d3 Mon Sep 17 00:00:00 2001 From: caufieldjh Date: Sun, 23 Jul 2023 09:52:37 +0200 Subject: [PATCH 5/5] Add gene-protein interaction template --- src/ontogpt/templates/gene_protein.py | 162 ++++++++++++++++++++++++ src/ontogpt/templates/gene_protein.yaml | 85 +++++++++++++ 2 files changed, 247 insertions(+) create mode 100644 src/ontogpt/templates/gene_protein.py create mode 100644 src/ontogpt/templates/gene_protein.yaml diff --git a/src/ontogpt/templates/gene_protein.py b/src/ontogpt/templates/gene_protein.py new file mode 100644 index 000000000..f9e0c63c0 --- /dev/null +++ b/src/ontogpt/templates/gene_protein.py @@ -0,0 +1,162 @@ +from __future__ import annotations +from datetime import datetime, date +from enum import Enum +from typing import List, Dict, Optional, Any, Union, Literal +from pydantic import BaseModel as BaseModel, Field +from linkml_runtime.linkml_model import Decimal + +metamodel_version = "None" +version = "None" + +class WeakRefShimBaseModel(BaseModel): + __slots__ = '__weakref__' + +class ConfiguredBaseModel(WeakRefShimBaseModel, + validate_assignment = True, + validate_all = True, + underscore_attrs_are_private = True, + extra = 'forbid', + arbitrary_types_allowed = True): + pass + + +class NullDataOptions(str, Enum): + + UNSPECIFIED_METHOD_OF_ADMINISTRATION = "UNSPECIFIED_METHOD_OF_ADMINISTRATION" + NOT_APPLICABLE = "NOT_APPLICABLE" + NOT_MENTIONED = "NOT_MENTIONED" + + + +class ExtractionResult(ConfiguredBaseModel): + """ + A result of extracting knowledge on text + """ + input_id: Optional[str] = Field(None) + input_title: Optional[str] = Field(None) + input_text: Optional[str] = Field(None) + raw_completion_output: Optional[str] = Field(None) + prompt: Optional[str] = Field(None) + extracted_object: Optional[Any] = Field(None, description="""The complex objects extracted from the text""") + named_entities: Optional[List[Any]] = Field(default_factory=list, description="""Named entities extracted from the text""") + + + +class NamedEntity(ConfiguredBaseModel): + + id: Optional[str] = Field(None, description="""A unique identifier for the named entity""") + label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + + + +class Protein(NamedEntity): + + id: str = Field(None, description="""A unique identifier for the named entity""") + label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + + + +class Gene(NamedEntity): + + id: str = Field(None, description="""A unique identifier for the named entity""") + label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + + + +class ProteinToGenePredicate(NamedEntity): + + id: str = Field(None, description="""A unique identifier for the named entity""") + label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + + + +class CompoundExpression(ConfiguredBaseModel): + + None + + + +class Triple(CompoundExpression): + """ + Abstract parent for Relation Extraction tasks + """ + subject: Optional[str] = Field(None) + predicate: Optional[str] = Field(None) + object: Optional[str] = Field(None) + qualifier: Optional[str] = Field(None, description="""A qualifier for the statements, e.g. \"NOT\" for negation""") + subject_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the subject of the statement, e.g. \"high dose\" or \"intravenously administered\"""") + object_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the object of the statement, e.g. \"severe\" or \"with additional complications\"""") + + + +class ProteinToGeneRelationship(Triple): + """ + A triple where the subject is a protein and the object is a gene. + """ + subject: Optional[str] = Field(None, description="""The name of a protein.""") + predicate: Optional[str] = Field(None, description="""A predicate for protein to gene relationships.""") + object: Optional[str] = Field(None, description="""The name of a gene.""") + qualifier: Optional[str] = Field(None, description="""A qualifier for the statements, e.g. \"NOT\" for negation""") + subject_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the subject of the statement, e.g. \"high dose\" or \"intravenously administered\"""") + object_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the object of the statement, e.g. \"severe\" or \"with additional complications\"""") + + + +class TextWithTriples(ConfiguredBaseModel): + + publication: Optional[Publication] = Field(None) + triples: Optional[List[Triple]] = Field(default_factory=list) + + + +class GeneProInteractionDocument(TextWithTriples): + """ + A document that contains protein to gene relationships. + """ + publication: Optional[Publication] = Field(None) + triples: Optional[List[ProteinToGeneRelationship]] = Field(default_factory=list) + + + +class RelationshipType(NamedEntity): + + id: str = Field(None, description="""A unique identifier for the named entity""") + label: Optional[str] = Field(None, description="""The label (name) of the named thing""") + + + +class Publication(ConfiguredBaseModel): + + id: Optional[str] = Field(None, description="""The publication identifier""") + title: Optional[str] = Field(None, description="""The title of the publication""") + abstract: Optional[str] = Field(None, description="""The abstract of the publication""") + combined_text: Optional[str] = Field(None) + full_text: Optional[str] = Field(None, description="""The full text of the publication""") + + + +class AnnotatorResult(ConfiguredBaseModel): + + subject_text: Optional[str] = Field(None) + object_id: Optional[str] = Field(None) + object_text: Optional[str] = Field(None) + + + + +# Update forward refs +# see https://pydantic-docs.helpmanual.io/usage/postponed_annotations/ +ExtractionResult.update_forward_refs() +NamedEntity.update_forward_refs() +Protein.update_forward_refs() +Gene.update_forward_refs() +ProteinToGenePredicate.update_forward_refs() +CompoundExpression.update_forward_refs() +Triple.update_forward_refs() +ProteinToGeneRelationship.update_forward_refs() +TextWithTriples.update_forward_refs() +GeneProInteractionDocument.update_forward_refs() +RelationshipType.update_forward_refs() +Publication.update_forward_refs() +AnnotatorResult.update_forward_refs() + diff --git a/src/ontogpt/templates/gene_protein.yaml b/src/ontogpt/templates/gene_protein.yaml new file mode 100644 index 000000000..9256cb417 --- /dev/null +++ b/src/ontogpt/templates/gene_protein.yaml @@ -0,0 +1,85 @@ +id: https://w3id.org/ontogpt/gene_protein +name: gene_protein +title: gene-protein test Template +description: >- + A template for gene-protein interactions. + This template is intended to represent associations between genes and protein. +prefixes: + linkml: https://w3id.org/linkml/ + core: http://w3id.org/ontogpt/core/ + geneID: https://www.ncbi.nlm.nih.gov/gene/ + PR: http://purl.obolibrary.org/obo/PR_ + +default_range: string + +imports: + - linkml:types + - core + +#================================== +# Classes # +#================================== +classes: + + GeneProInteractionDocument: + tree_root: true + description: A document that contains protein to gene relationships. + is_a: TextWithTriples + slot_usage: + triples: + range: ProteinToGeneRelationship + annotations: + prompt: >- + A semi-colon separated list of protein to gene relationships, + where the relationship is either "regulates" + or "gene product of". For example: + p53 protein regulates CDKN1A; + Large ribosomal subunit protein eL21 is gene product of RPL21 + + ProteinToGeneRelationship: + is_a: Triple + description: A triple where the subject is a protein and the object is a gene. + slot_usage: + subject: + range: Protein + description: >- + The name of a protein. + annotations: + prompt.examples: TNF protein, lipocalin-1, transcription factor GATA-4, transcription factor GATA-6, sonic hedgehog protein, catenin (Cadherin-Associated Protein) + object: + range: Gene + description: >- + The name of a gene. + annotations: + prompt.examples: EGFR, IL6, CTNNA1, GRB2, CCR5 + predicate: + range: ProteinToGenePredicate + description: A predicate for protein to gene relationships. + annotations: + prompt.examples: regulates, gene product of, directly regulates the activity of, is directly regulating the activity of + + Protein: + is_a: NamedEntity + id_prefixes: + - PR + annotations: + annotators: gilda:, sqlite:obo:pr + + Gene: + is_a: NamedEntity + id_prefixes: + - HGNC + - PR + - UniProtKB + annotations: + annotators: gilda:, sqlite:obo:hgnc, sqlite:obo:pr + + ProteinToGenePredicate: + is_a: NamedEntity + id_prefixes: + - RO + slot_usage: + id: + pattern: "RO:0002211|RO:0002204" + annotations: + annotators: gilda:, sqlite:obo:ro, bioportal:so, bioportal:sio \ No newline at end of file