-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add archived_datasets folder with old mappings
- Loading branch information
Showing
129 changed files
with
28,437 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
name: UniProt to BioLink RDF | ||
# TODO: Add step to manage versioning (delete previous graph, load new graph, generate metadata) | ||
on: | ||
workflow_dispatch: | ||
inputs: | ||
endpoint: | ||
description: 'Upload to SPARQL endpoint' | ||
required: true | ||
default: 'https://graphdb.dumontierlab.com/repositories/ncats-red-kg/statements' | ||
graph: | ||
description: 'In the Graph' | ||
required: true | ||
default: 'https://w3id.org/d2s/graph/uniprot' | ||
|
||
jobs: | ||
run-sparql: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v2 | ||
|
||
- name: Run SPARQL queries to convert UniProt | ||
uses: vemonet/sparql-operations-action@v1 | ||
with: | ||
file: datasets/uniprot/mapping | ||
endpoint: ${{ github.event.inputs.endpoint }} | ||
user: ${{ secrets.GRAPHDB_USER }} | ||
password: ${{ secrets.GRAPHDB_PASSWORD }} | ||
inputvar: https://sparql.uniprot.org | ||
outputvar: ${{ github.event.inputs.graph }} | ||
servicevar: https://sparql.uniprot.org | ||
|
||
- name: Compute and insert HCLS descriptive metadata | ||
uses: vemonet/sparql-operations-action@v1 | ||
with: | ||
file: https://github.com/MaastrichtU-IDS/d2s-scripts-repository/tree/master/sparql/compute-hcls-stats | ||
endpoint: ${{ github.event.inputs.endpoint }} | ||
user: ${{ secrets.GRAPHDB_USER }} | ||
password: ${{ secrets.GRAPHDB_PASSWORD }} | ||
inputvar: ${{ github.event.inputs.graph }} | ||
outputvar: https://w3id.org/d2s/metadata | ||
servicevar: ${{ github.event.inputs.endpoint }} | ||
# servicevar: http://localhost:7200/repositories/ncats-red-kg |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
name: WikiPathways to BioLink RDF | ||
# TODO: Add step to manage versioning (delete previous graph, load new graph, generate metadata) | ||
on: | ||
workflow_dispatch: | ||
inputs: | ||
endpoint: | ||
description: 'Upload to SPARQL endpoint' | ||
required: true | ||
default: 'https://graphdb.dumontierlab.com/repositories/ncats-red-kg/statements' | ||
graph: | ||
description: 'In the Graph' | ||
required: true | ||
default: 'https://w3id.org/d2s/graph/wikipathways' | ||
|
||
jobs: | ||
run-sparql: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v2 | ||
|
||
- name: Run SPARQL queries to convert Wikipathways | ||
uses: vemonet/sparql-operations-action@v1 | ||
with: | ||
file: datasets/wikipathways/mapping | ||
endpoint: ${{ github.event.inputs.endpoint }} | ||
user: ${{ secrets.GRAPHDB_USER }} | ||
password: ${{ secrets.GRAPHDB_PASSWORD }} | ||
inputvar: http://rdf.wikipathways.org/ | ||
outputvar: ${{ github.event.inputs.graph }} | ||
servicevar: http://sparql.wikipathways.org/sparql | ||
|
||
- name: Compute and insert HCLS descriptive metadata | ||
uses: vemonet/sparql-operations-action@v1 | ||
with: | ||
file: https://github.com/MaastrichtU-IDS/d2s-scripts-repository/tree/master/sparql/compute-hcls-stats | ||
endpoint: ${{ github.event.inputs.endpoint }} | ||
user: ${{ secrets.GRAPHDB_USER }} | ||
password: ${{ secrets.GRAPHDB_PASSWORD }} | ||
inputvar: ${{ github.event.inputs.graph }} | ||
outputvar: https://w3id.org/d2s/metadata | ||
servicevar: ${{ github.event.inputs.endpoint }} | ||
# servicevar: http://localhost:7200/repositories/ncats-red-kg |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
|
||
## For Tabular files workflows | ||
dataset_to_process: "date" | ||
|
||
## For XML workflows | ||
# dir_to_process: | ||
# class: Directory | ||
# path: ../../workspace/input/date | ||
|
||
sparql_final_graph_uri: "https://w3id.org/d2s/graph/date" | ||
|
||
|
||
# Final SPARQL endpoint to load the BioLink RDF | ||
# Add /statements for RDF4J server like GraphDB | ||
sparql_final_triplestore_url: "https://graphdb.dumontierlab.com/repositories/ncats-red-kg/statements" | ||
# sparql_final_triplestore_url: "http://graphdb:7200/repositories/ncats-red-kg/statements" | ||
|
||
sparql_final_triplestore_username: "import_user" | ||
sparql_final_triplestore_password: "dba" | ||
|
||
# R2RML params | ||
input_data_jdbc: "jdbc:drill:drillbit=drill:31010" | ||
|
||
# Temporary triplestore (e.g. Virtuoso). TODO: improve | ||
sparql_tmp_service_url: "http://tmp-virtuoso:8890/sparql" | ||
# sparql_tmp_service_url: "http://blazegraph:8082/bigdata/sparql" | ||
|
||
sparql_tmp_triplestore_url: "http://tmp-virtuoso:8890/sparql" | ||
# sparql_tmp_triplestore_url: "http://blazegraph:8082/bigdata/sparql" | ||
sparql_tmp_triplestore_username: "dba" | ||
sparql_tmp_triplestore_password: "dba" | ||
|
||
sparql_tmp_graph_uri: "https://w3id.org/d2s/graph/date" | ||
# sparql_tmp_graph_uri: "https://w3id.org/d2s/graph/xml2rdf" | ||
tmp_triplestore_container_id: "d2s-tmp-virtuoso" | ||
tmp_triplestore_load_dir: "/usr/local/virtuoso-opensource/var/lib/virtuoso/db" | ||
|
||
# Split params | ||
# split_property: "https://w3id.org/biolink/vocab/has_participant" | ||
# split_class: "https://w3id.org/biolink/vocab/GeneGrouping" | ||
# split_delimiter: "," | ||
# split_quote: '"' | ||
|
||
sparql_transform_queries_path: "mapping" | ||
sparql_insert_metadata_path: "metadata" | ||
|
||
cwl_workflow_filename: "csv-virtuoso.cwl" | ||
|
||
config_dir: | ||
class: Directory | ||
path: . | ||
|
||
cwl_dir: | ||
class: Directory | ||
path: ../../d2s-core |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#!/bin/bash | ||
|
||
wget -N http://tatonettilab.org/resources/DATE/date_resource.zip | ||
|
||
# Unzip | ||
# All in same dir | ||
unzip -o \*.zip | ||
|
||
# Convert TSV to CSV | ||
sed -e 's/"/\\"/g' -e 's/\t/","/g' -e 's/^/"/' -e 's/$/"/' -e 's/\r//' date_resource/Drug_target_reactome_pathway.tsv > date.csv | ||
|
||
# rm date_resource/Drug_target_reactome_pathway_filtered.tsv | ||
# Should contains 2 tsv | ||
# date_resource/Drug_target_reactome_pathway.tsv | ||
# date_resource/Drug_target_reactome_pathway_filtered.tsv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
PREFIX d2smodel: <https://w3id.org/d2s/model/> | ||
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> | ||
PREFIX owl: <http://www.w3.org/2002/07/owl#> | ||
PREFIX dc: <http://purl.org/dc/elements/1.1/> | ||
PREFIX dcterms: <http://purl.org/dc/terms/> | ||
PREFIX bl: <https://w3id.org/biolink/vocab/> | ||
PREFIX w3idvocab: <https://w3id.org/d2s/vocab/> | ||
INSERT | ||
{ | ||
GRAPH <?_output> { | ||
?drugUri a bl:Drug ; | ||
bl:id ?drugId ; | ||
bl:name ?drugName . | ||
|
||
?targetUniprotUri a bl:Protein . | ||
|
||
?targetSymbolUri a bl:Gene ; | ||
bl:id ?targetSymbol ; | ||
bl:has_gene_product ?targetUniprotUri ; | ||
bl:systematic_synonym ?targetSymbol . | ||
# expressed_in tissue? | ||
|
||
?pathwayUri a bl:Pathway ; # tissue-specific molecular pathway | ||
bl:name ?pathwayName ; | ||
bl:part_of ?tissueUri . | ||
|
||
?tissueUri a bl:GrossAnatomicalStructure ; # tissue | ||
bl:name ?tissueName . | ||
|
||
|
||
?interactionUri a bl:ChemicalToGeneAssociation ; | ||
bl:relation bl:interacts_with ; | ||
bl:subject ?drugUri ; | ||
bl:object ?targetUniprotUri ; | ||
bl:part_of ?pathwayUri . | ||
|
||
# TODO: We also could create a bl:ChemicalToPathwayAssociation. What would be the best choice? | ||
|
||
# TODO: CellLine https://biolink.github.io/biolink-model/docs/CellLineToDiseaseOrPhenotypicFeatureAssociation.html | ||
#?cellLineUri a bl:CellLine ; | ||
#bl:id ?cellLineId . | ||
} | ||
} | ||
WHERE { | ||
SERVICE <?_service> { | ||
GRAPH <?_input> { | ||
# To filter out Drug_target_reactome_pathway_filtered.tsv file | ||
# TODO: Be careful the file path in the type can change: | ||
# ?s a <http://data2services/data/ncats/date/Drug_target_reactome_pathway.tsv> ; | ||
?s d2smodel:Drug_idStitch ?drugId ; # eg: CID000004927 | ||
d2smodel:Drug_name ?drugName . | ||
BIND( iri(concat("https://identifiers.org/pubchem.compound/", replace(?drugId, "CID", "") ) ) AS ?drugUri ) | ||
|
||
?s d2smodel:TargetUniprot ?targetUniprotId . | ||
BIND( iri(concat("https://identifiers.org/uniprot/", ?targetUniprotId ) ) AS ?targetUniprotUri ) | ||
|
||
?s d2smodel:Pathway ?pathwayName . # eg: Retrograde neurotrophin signalling | ||
BIND( iri(concat("https://w3id.org/d2s/data/pathway/", md5(?pathwayName) ) ) AS ?pathwayUri ) | ||
|
||
BIND( iri(concat("https://w3id.org/d2s/data/protein/interaction/", md5(concat(?drugId, ?targetUniprotId, ?pathwayName)) ) ) AS ?interactionUri ) | ||
BIND( iri(concat("https://w3id.org/d2s/data/pathway/association/", md5(concat(?drugId, ?targetUniprotId, ?pathwayName)) ) ) AS ?pathwayAssociationUri ) | ||
|
||
OPTIONAL { | ||
?s d2smodel:Cell_line_id ?cellLineId . # NA (majority), HT1080, SHSYSY.RA, astrocytes, GM2313... | ||
BIND( if( ?cellLineId="NA", | ||
iri("") , | ||
iri(concat("https://w3id.org/d2s/data/cell_line/", ?cellLineId) ) | ||
) AS ?cellLineUri ) | ||
} | ||
OPTIONAL { | ||
?s d2smodel:Tissue ?tissueName . # Fetalbrain | ||
BIND( iri(concat("https://w3id.org/d2s/data/tissue/", md5(?tissueName) ) ) AS ?tissueUri ) | ||
} | ||
OPTIONAL { | ||
?s d2smodel:Pathway_size ?pathwaySize . # 42... | ||
} | ||
OPTIONAL { | ||
?s d2smodel:TargetSymbol ?targetSymbol . # CHRM3 | ||
FILTER(!contains(?targetSymbol, "c(")) # filter out symbol like c(\"CALM1\", \"CALM2\", \"CALM3\") | ||
BIND( iri(concat("https://identifiers.org/hgnc.symbol/", ?targetSymbol ) ) AS ?targetSymbolUri ) | ||
} | ||
OPTIONAL { | ||
?s d2smodel:Target_class ?targetClass . # gpcr, enzyme, lgic, vgic, transporter, other_protein, catalytic_receptor, nhr, other_ic | ||
BIND( iri(concat("https://w3id.org/d2s/data/protein/class/", md5(?targetClass) ) ) AS ?targetClassUri ) | ||
} | ||
OPTIONAL { | ||
?s d2smodel:Dataset ?dataset . # GTEx, U133A, NCI60, HPM_PRT | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
@prefix rr: <http://www.w3.org/ns/r2rml#>. | ||
@prefix rml: <http://semweb.mmlab.be/ns/rml#>. | ||
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>. | ||
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>. | ||
@prefix ql: <http://semweb.mmlab.be/ns/ql#>. | ||
@prefix map: <http://mapping.example.com/>. | ||
|
||
map:fn_0 rml:logicalSource map:source_0; | ||
rr:predicateObjectMap map:pomexec_0, map:pom_7. | ||
map:map_interactions_0 rml:logicalSource map:source_0; | ||
a rr:TriplesMap; | ||
rdfs:label "interactions"; | ||
rr:subjectMap map:s_0; | ||
rr:predicateObjectMap map:pom_0, map:pom_1, map:pom_2, map:pom_3, map:pom_4, map:pom_5, map:pom_6. | ||
map:om_0 a rr:ObjectMap; | ||
rr:constant "https://w3id.org/biolink/vocab/PairwiseGeneToGeneInteraction"; | ||
rr:termType rr:IRI. | ||
map:om_1 a rr:ObjectMap; | ||
rr:template "https://identifiers.org/pubchem.compound/{Drug_ID_Stitch}"; | ||
rr:termType rr:IRI. | ||
map:om_2 a rr:ObjectMap; | ||
rr:template "https://identifiers.org/uniprot:{Target(uniprot)}"; | ||
rr:termType rr:IRI. | ||
map:om_3 a rr:ObjectMap; | ||
rr:constant "https://w3id.org/biolink/vocab/interacts_with"; | ||
rr:termType rr:IRI. | ||
map:om_4 a rr:ObjectMap; | ||
rr:template "https://w3id.org/d2s/dataset/date/{Dataset}"; | ||
rr:termType rr:IRI. | ||
map:om_5 a rr:ObjectMap; | ||
rml:reference "Pathway"; | ||
rr:termType rr:Literal. | ||
map:om_6 a <http://semweb.mmlab.be/ns/fnml#FunctionTermMap>; | ||
rr:termType rr:IRI; | ||
<http://semweb.mmlab.be/ns/fnml#functionValue> map:fn_0. | ||
map:om_7 a rr:ObjectMap; | ||
rr:template "https://w3id.org/d2s/data/date/pathway/{Pathway}"; | ||
rr:termType rr:Literal. | ||
map:omexec_0 rr:constant "http://example.com/idlab/function/toUpperCaseURL"; | ||
rr:termType rr:IRI. | ||
map:pm_0 a rr:PredicateMap; | ||
rr:constant rdf:type. | ||
map:pm_1 a rr:PredicateMap; | ||
rr:constant <https://w3id.org/biolink/vocab/subject>. | ||
map:pm_2 a rr:PredicateMap; | ||
rr:constant <https://w3id.org/biolink/vocab/object>. | ||
map:pm_3 a rr:PredicateMap; | ||
rr:constant <https://w3id.org/biolink/vocab/relation>. | ||
map:pm_4 a rr:PredicateMap; | ||
rr:constant <https://w3id.org/biolink/vocab/provided_by>. | ||
map:pm_5 a rr:PredicateMap; | ||
rr:constant <https://w3id.org/biolink/vocab/part_of>. | ||
map:pm_6 a rr:PredicateMap; | ||
rr:constant <https://w3id.org/biolink/vocab/part_of>. | ||
map:pm_7 a rr:PredicateMap; | ||
rr:constant <http://example.com/idlab/function/str>. | ||
map:pmexec_0 rr:constant <https://w3id.org/function/ontology#executes>. | ||
map:pom_0 a rr:PredicateObjectMap; | ||
rr:predicateMap map:pm_0; | ||
rr:objectMap map:om_0. | ||
map:pom_1 a rr:PredicateObjectMap; | ||
rr:predicateMap map:pm_1; | ||
rr:objectMap map:om_1. | ||
map:pom_2 a rr:PredicateObjectMap; | ||
rr:predicateMap map:pm_2; | ||
rr:objectMap map:om_2. | ||
map:pom_3 a rr:PredicateObjectMap; | ||
rr:predicateMap map:pm_3; | ||
rr:objectMap map:om_3. | ||
map:pom_4 a rr:PredicateObjectMap; | ||
rr:predicateMap map:pm_4; | ||
rr:objectMap map:om_4. | ||
map:pom_5 a rr:PredicateObjectMap; | ||
rr:predicateMap map:pm_5; | ||
rr:objectMap map:om_5. | ||
map:pom_6 a rr:PredicateObjectMap; | ||
rr:predicateMap map:pm_6; | ||
rr:objectMap map:om_6. | ||
map:pom_7 a rr:PredicateObjectMap; | ||
rr:predicateMap map:pm_7; | ||
rr:objectMap map:om_7. | ||
map:pomexec_0 rr:predicateMap map:pmexec_0; | ||
rr:objectMap map:omexec_0. | ||
map:s_0 a rr:SubjectMap; | ||
rr:template "https://w3id.org/d2s/data/date/interaction/{Drug_ID_Stitch}_{Target(uniprot)}". | ||
map:source_0 a rml:LogicalSource; | ||
rml:source "date.csv"; | ||
rml:referenceFormulation ql:CSV. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
prefixes: | ||
grel: "http://users.ugent.be/~bjdmeest/function/grel.ttl#" | ||
idlab: "http://example.com/idlab/function/" | ||
xsd: "http://www.w3.org/2001/XMLSchema#" | ||
rdfs: "http://www.w3.org/2000/01/rdf-schema#" | ||
bl: "https://w3id.org/biolink/vocab/" | ||
d2s: "https://w3id.org/d2s/" | ||
pubmed: "https://identifiers.org/pubmed:" | ||
|
||
mappings: | ||
interactions: | ||
sources: | ||
- ['date.csv~csv'] | ||
# Dataset Drug_name Drug_ID(Stitch) Tissue Cell_line_ID Target(uniprot) Target(symbol) Target_class Pathway Pathway_size | ||
# U133A leuprolide acetate CID000003911 Pituitary NA P30968 GNRHR gpcr Eukaryotic Translation Elongation 89 | ||
# U133A leuprolide acetate CID000003911 Pituitary NA P30968 GNRHR gpcr Growth hormone receptor signaling 41 | ||
|
||
s: https://w3id.org/d2s/data/date/interaction/$(Drug_ID_Stitch)_$(Target(uniprot\)) | ||
po: | ||
- [a, bl:ChemicalToGeneAssociation] | ||
- p: bl:subject | ||
o: https://identifiers.org/pubchem.compound/$(Drug_ID_Stitch)~iri | ||
# TODO: remove CID from the ID for proper URI | ||
- p: bl:object | ||
o: https://identifiers.org/uniprot:$(Target(uniprot\))~iri | ||
- p: bl:relation | ||
o: bl:interacts_with~iri | ||
- p: bl:provided_by | ||
o: d2s:dataset/date/$(Dataset)~iri | ||
- p: bl:participates_in | ||
o: $(Pathway) | ||
- p: bl:part_of | ||
o: $(Tissue) | ||
# - p: bl:part_of | ||
# o: | ||
# function: idlab:toUpperCaseURL | ||
# parameters: | ||
# - [idlab:str, "https://w3id.org/d2s/data/date/pathway/$(Pathway)"] | ||
# type: iri | ||
|
||
# Also pathway part_of tissue | ||
# TODO: generate a URI for Pathway (do it through preprocessing?) | ||
# We could have a python script which iterates over Pathway row to resolve the URI | ||
|
||
drugs: | ||
sources: | ||
- ['date.csv~csv'] | ||
s: https://identifiers.org/pubchem.compound/$(Drug_ID_Stitch)~iri | ||
po: | ||
- [a, bl:ChemicalSubstance] | ||
- p: bl:name | ||
o: $(Drug_name) |
Oops, something went wrong.