Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

For comparison only: DEV branch #61

Merged
merged 152 commits into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
152 commits
Select commit Hold shift + click to select a range
fa49f0f
add obsolete classes due to renamed patterns
jamesamcl Dec 13, 2023
690f708
add sssom mapping generation
jamesamcl Dec 13, 2023
28cfef2
Add obsoletion module
matentzn Dec 13, 2023
9027175
inheres_in_part_of
jamesamcl Dec 13, 2023
2fedb71
Add method to to postprocess modified patterns after matching
matentzn Dec 13, 2023
7fbee3e
Merge branch 'dev' of https://github.com/obophenotype/upheno-dev into…
matentzn Dec 13, 2023
add9688
Update upheno_prepare.py
matentzn Dec 13, 2023
4ad5740
fix inheres_in pattern generation
jamesamcl Dec 13, 2023
78965b9
add missing method to lib.py
jamesamcl Dec 13, 2023
f07a464
add out path for sssom
jamesamcl Jan 31, 2024
9b6d25e
add mappings file wip
jamesamcl Jan 31, 2024
4193f75
remove exit from prepare
jamesamcl Jan 31, 2024
fe43954
remove missing bridges
jamesamcl Jan 31, 2024
2e6c4ca
Merge branch 'master' into dev
matentzn Jan 31, 2024
3b309d9
reinstate excluded patterns with comment
jamesamcl Jan 31, 2024
71a03c5
Merge branch 'dev' of github.com:obophenotype/upheno-dev into dev
jamesamcl Jan 31, 2024
35015cf
Updated patterns
matentzn Mar 6, 2024
0c329bb
Create upheno-pattern-deriver.ipynb
matentzn Mar 6, 2024
0432c8a
Updated patterns
matentzn Mar 8, 2024
ee9af35
add +x to upheno_pipeline.sh
jamesamcl Mar 6, 2024
e7b0d8a
Add changed patterns generation step
matentzn Mar 8, 2024
1966b9a
Merge branch 'dev' of https://github.com/obophenotype/upheno-dev into…
matentzn Mar 8, 2024
0e0fa3c
fix variable same name as function
jamesamcl Mar 11, 2024
41e06f1
Update upheno_create_profiles.py
matentzn Apr 10, 2024
3f4cb53
Update upheno_create_profiles.py
matentzn Apr 10, 2024
7456884
Update upheno_create_profiles.py
matentzn Apr 11, 2024
e4b323b
Review changed patterns
matentzn Apr 18, 2024
e783292
Updating remaining classes
matentzn Apr 18, 2024
e0b3ba9
Updated some more patterns
matentzn Apr 18, 2024
a84ad08
Add a few more rewrite rules
matentzn Apr 18, 2024
4d10c56
Update upheno-pattern-deriver.ipynb
matentzn Apr 18, 2024
de437e3
Update upheno-config.yaml
jamesamcl Apr 24, 2024
157cfea
Update upheno_create_profiles.py
matentzn Apr 24, 2024
fed7de1
Merge branch 'dev' of https://github.com/obophenotype/upheno-dev into…
matentzn Apr 24, 2024
fe758e2
filter out bfo
jamesamcl Apr 24, 2024
4dfb83f
Update scripts and files
matentzn Jun 19, 2024
67cccc5
Update components and add new top level classes
matentzn Jun 19, 2024
9e31002
Remove three patterns for matches
matentzn Jun 19, 2024
98878a5
Update some phenotype patterns
matentzn Jun 20, 2024
62c1905
Huge refactor
matentzn Jun 20, 2024
682fd5d
Update some data files
matentzn Jun 21, 2024
c8c94d7
Update upheno_prepare.py
matentzn Jun 21, 2024
92ac6d9
Update lexical_mapping.py
matentzn Jun 21, 2024
f14488a
Update lib.py
matentzn Jun 21, 2024
55680e9
Update upheno-config.yaml
matentzn Jun 21, 2024
420f775
Update upheno.Makefile
matentzn Jun 21, 2024
793cb2f
Update upheno_pipeline.sh
matentzn Jun 21, 2024
06a63f9
Huge refactor, no words
matentzn Jun 22, 2024
a95da92
Updated files during release
matentzn Jun 22, 2024
67299a9
Merge branch 'master' into dev
matentzn Jun 22, 2024
47ba698
Refactor uPheno makefile continued
matentzn Jun 22, 2024
c922bf2
Update catalog-v001.xml
matentzn Jun 22, 2024
a9788c1
Update import
matentzn Jun 22, 2024
986d849
Update merged_import.owl
matentzn Jun 22, 2024
35dcb4c
State of latest fixed version of uPheno
matentzn Jun 23, 2024
89df4e2
Update upheno_id_map.txt
matentzn Jun 23, 2024
941c9dd
Create upheno_id_map_december_2023.txt
matentzn Jun 23, 2024
8102d97
set SOT for upheno_id_map to github agaimn
matentzn Jun 23, 2024
443378c
Update upheno-config.yaml
matentzn Jun 23, 2024
8f56da2
Add uPheno fillers to version control
matentzn Jun 23, 2024
abf8127
More huge refactors
matentzn Jun 23, 2024
4817c7b
Update python depedencies
matentzn Jun 23, 2024
a30df2d
More refactoring
matentzn Jun 23, 2024
597d5ea
Update upheno.Makefile
matentzn Jun 23, 2024
4a36bf2
Update patterns
matentzn Jun 23, 2024
7b8a55a
Update upheno_build.py
matentzn Jun 23, 2024
0a54e3b
refactor
matentzn Jun 23, 2024
c6cb03a
Update fillers
matentzn Jun 23, 2024
fe24d2e
Fix fillers pipeline
matentzn Jun 23, 2024
b6eb68e
Update upheno.Makefile
matentzn Jun 23, 2024
72fcd69
Update upheno fillers
matentzn Jun 23, 2024
4fd297e
Update fillers
matentzn Jun 23, 2024
47e7217
Update upheno_id_map.txt
matentzn Jun 23, 2024
248cd20
Make sure upheno map is updated correctly
matentzn Jun 23, 2024
0d278fe
Update upheno.Makefile
matentzn Jun 23, 2024
3526c62
Update lib.py
matentzn Jun 23, 2024
0ae1bf9
Update upheno_create_profiles.py
matentzn Jun 23, 2024
8dc82f1
Add all uPheno patterns to patterns dir
matentzn Jun 24, 2024
518ca35
Add automatic from upheno map
matentzn Jun 24, 2024
4317b81
Add all changed patterns to pattern directory
matentzn Jun 24, 2024
89184ab
Update a few pattern names
matentzn Jun 24, 2024
2abbfd5
Update pattern names
matentzn Jun 24, 2024
2a85058
Remove modified patterns
matentzn Jun 24, 2024
40b540b
Remove modified patterns
matentzn Jun 24, 2024
af1cca6
Update pipelines
matentzn Jun 24, 2024
8dd05cd
Add missing cols to obsolete tsv
matentzn Jun 24, 2024
46bd970
Remove obsolete classes form set
matentzn Jun 24, 2024
a52c40c
Update DOSDP
matentzn Jun 24, 2024
69f7882
More refactoring
matentzn Jun 24, 2024
29c0f7c
Get rid of some patterns we dont need anymore
matentzn Jun 25, 2024
bdfe3c2
Another huge refactor
matentzn Jun 25, 2024
e378993
Add obsoletion pipeline
matentzn Jun 25, 2024
16a9cd5
Update merged.owl goal
matentzn Jun 25, 2024
ace925f
Update definitions.owl
matentzn Jun 25, 2024
189e4ee
Update obsolete.tsv
matentzn Jun 27, 2024
1c1e2a2
Update upheno-odk.yaml
matentzn Jun 27, 2024
165ae54
Update abnormalAnatomicalEntity.tsv
matentzn Jun 27, 2024
48a8c1c
Updated patterns
matentzn Jun 27, 2024
cfe807d
Update upheno-deprecated.owl
matentzn Jun 27, 2024
6140e9f
Create upheno_qc.ipynb
matentzn Jun 27, 2024
a0512de
Obsolete all of the gocc_anatomical_entity cases
matentzn Jun 27, 2024
3668ff5
Apply all cc_cl removals
matentzn Jun 27, 2024
296573d
Get rid of resistence_to_entity_chebi case in uPheno
matentzn Jun 27, 2024
f3d50db
Dealt with bp_and_mf
matentzn Jun 27, 2024
18f4132
Update obsolete.tsv
matentzn Jun 27, 2024
b0bfdee
Remove some obsoleted classes from patterns
matentzn Aug 4, 2024
c96d7bb
Deleting a lot of redundant class definitions
matentzn Aug 4, 2024
8cfae0b
Updated release files
matentzn Aug 4, 2024
cecfc2a
Update imports and components
matentzn Aug 4, 2024
5442cde
Update ODK
matentzn Aug 4, 2024
b0af5e3
ODK 1.5.2 update
matentzn Aug 4, 2024
43be67d
Update upheno-edit.owl
matentzn Aug 4, 2024
67d6e7b
Remove all NBO phenotypes from abnormalBiologicalProcess pattern
matentzn Aug 4, 2024
cdeef4e
Update definitions.owl
matentzn Aug 4, 2024
f361dfb
Remove some more classes
matentzn Aug 4, 2024
2bc12f1
Remove some redundant classes from patterns
matentzn Aug 4, 2024
c5c5958
Update component files
matentzn Aug 25, 2024
52fa451
Update scripts
matentzn Aug 25, 2024
5b3c3c6
Update MISC
matentzn Aug 26, 2024
8744032
Update scripts
matentzn Aug 27, 2024
6094b5b
Update patterns (mainly removals)
matentzn Aug 27, 2024
ffd42b3
Update imports files
matentzn Aug 27, 2024
495c35a
Remove merged import which has become to large to be handled
matentzn Aug 27, 2024
680213c
Adding remaining SSPOs to imports
matentzn Aug 27, 2024
535ed65
Update bridge files
matentzn Aug 27, 2024
beca8b3
Obsolete 8 redundant classes
matentzn Aug 27, 2024
ba2d541
Update components and mappings
matentzn Aug 28, 2024
8f93933
Update templates
matentzn Aug 28, 2024
0bf5b98
Update pipelines
matentzn Aug 28, 2024
f45150c
Update remove duplicative classes from pattern files
matentzn Aug 28, 2024
5dcfd61
ODK config fixes
matentzn Aug 28, 2024
d437b65
Update upheno.Makefile
matentzn Aug 28, 2024
e84feaa
Add anatomy mappings into upheno pattern matching process
matentzn Aug 30, 2024
cce685c
Add Uberon sssom file to uPheno
matentzn Aug 30, 2024
eae3bf1
Update components and mappings
matentzn Aug 30, 2024
c9c7e40
Add a few more mapping sets to upheno odk config
matentzn Aug 30, 2024
7b47abe
Update COmponents and mappings
matentzn Aug 31, 2024
e62ee96
Update obsolete.tsv
matentzn Aug 31, 2024
7c39ab3
Update uPheno deprecated
matentzn Aug 31, 2024
c41d47a
Remove a few obsolete c-s matches
matentzn Aug 31, 2024
60ffca9
Providing some last tweaks to the ontology
matentzn Aug 31, 2024
2a5cf80
Update upheno.Makefile
matentzn Aug 31, 2024
8c0a9ca
Update upheno.Makefile
matentzn Aug 31, 2024
3e0f60a
Add root alignments
matentzn Sep 1, 2024
aa78439
Various updates
matentzn Sep 1, 2024
cf2ab7c
Update uPheno release files
matentzn Sep 3, 2024
49e8e82
Remove upheno.owl
matentzn Sep 3, 2024
abe17c5
Delete upheno-base.owl
matentzn Sep 3, 2024
a7a9069
Update makefile
matentzn Sep 3, 2024
0413f08
MISC custom makefile updates
matentzn Sep 3, 2024
c858992
Add lexical matching command to CLI
matentzn Sep 3, 2024
75726f0
Update release files
matentzn Sep 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/curation/upheno-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,8 @@ upheno_profiles:
pattern_repos:
- obophenotype/upheno/contents/src/patterns/dosdp-dev
- obophenotype/upheno/contents/src/patterns/dosdp-patterns
exclude_patterns:
- abnormalMorphologyOfAnatomicalEntity.yaml
release_artefacts:
- base
- simple-non-classified
Expand Down
45 changes: 42 additions & 3 deletions src/ontology/Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# ----------------------------------------
# Makefile for upheno
# Generated using ontology-development-kit
# ODK Version: v1.4
# ODK Version: v1.4.3
# ----------------------------------------
# IMPORTANT: DO NOT EDIT THIS FILE. To override default make goals, use upheno.Makefile instead

Expand Down Expand Up @@ -43,14 +43,14 @@ REPORT_PROFILE_OPTS =
OBO_FORMAT_OPTIONS =
SPARQL_VALIDATION_CHECKS = owldef-self-reference iri-range label-with-iri multiple-replaced_by
SPARQL_EXPORTS = basic-report class-count-by-prefix edges xrefs obsoletes synonyms
ODK_VERSION_MAKEFILE = v1.4
ODK_VERSION_MAKEFILE = v1.4.3

TODAY ?= $(shell date +%Y-%m-%d)
OBODATE ?= $(shell date +'%d:%m:%Y %H:%M')
VERSION= $(TODAY)
ANNOTATE_ONTOLOGY_VERSION = annotate -V $(ONTBASE)/releases/$(VERSION)/$@ --annotation owl:versionInfo $(VERSION)
ANNOTATE_CONVERT_FILE = annotate --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) convert -f ofn --output [email protected] && mv [email protected] $@
OTHER_SRC = $(PATTERNDIR)/definitions.owl
OTHER_SRC = $(PATTERNDIR)/definitions.owl $(COMPONENTSDIR)/upheno-deprecated.owl
ONTOLOGYTERMS = $(TMPDIR)/ontologyterms.txt
EDIT_PREPROCESSED = $(TMPDIR)/$(ONT)-preprocess.owl
PATTERNDIR= ../patterns
Expand Down Expand Up @@ -345,6 +345,45 @@ refresh-%:
no-mirror-refresh-%:
$(MAKE) IMP=true IMP_LARGE=true MIR=false PAT=false $(IMPORTDIR)/$*_import.owl -B


# ----------------------------------------
# Components
# ----------------------------------------
# Some ontologies contain external and internal components. A component is included in the ontology in its entirety.

COMP=true # Global parameter to bypass component generation

.PHONY: all_components
all_components: $(OTHER_SRC)

.PHONY: recreate-components
recreate-components:
$(MAKE) COMP=true IMP=false MIR=true PAT=true IMP_LARGE=false all_components -B

.PHONY: no-mirror-recreate-components
no-mirror-recreate-components:
$(MAKE) COMP=true IMP=false MIR=false PAT=true IMP_LARGE=false all_components -B

.PHONY: recreate-%
recreate-%:
$(MAKE) COMP=true IMP=false IMP_LARGE=false MIR=true PAT=true $(COMPONENTSDIR)/$*.owl -B

.PHONY: no-mirror-recreate-%
no-mirror-recreate-%:
$(MAKE) COMP=true IMP=false IMP_LARGE=false MIR=false PAT=true $(COMPONENTSDIR)/$*.owl -B

$(COMPONENTSDIR)/%: | $(COMPONENTSDIR)
touch $@
.PRECIOUS: $(COMPONENTSDIR)/%



$(COMPONENTSDIR)/upheno-deprecated.owl: $(TEMPLATEDIR)/obsolete.tsv
if [ $(COMP) = true ] ; then $(ROBOT) template \
$(patsubst %, --template %, $^) \
$(ANNOTATE_CONVERT_FILE); fi

.PRECIOUS: $(COMPONENTSDIR)/upheno-deprecated.owl
# ----------------------------------------
# Mirroring upstream ontologies
# ----------------------------------------
Expand Down
1,736 changes: 1,736 additions & 0 deletions src/ontology/components/upheno-deprecated.owl

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions src/ontology/upheno-odk.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -278,3 +278,9 @@ import_group:
- id: pato
- id: cl
- id: mpath
components:
products:
- filename: upheno-deprecated.owl
use_template: TRUE
templates:
- obsolete.tsv
106 changes: 106 additions & 0 deletions src/scripts/create_sssom.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@

import pandas as pd
import os
import yaml
import glob
import argparse
from sssom.context import get_converter
from sssom.parsers import from_sssom_dataframe
from sssom.writers import write_table

def main():
parser = argparse.ArgumentParser(description='Create SSSOM file from upheno id map and pattern matches')
parser.add_argument('--upheno_id_map', type=str, help='upheno id map file')
parser.add_argument('--patterns_dir', type=str, help='directory containing pattern files')
parser.add_argument('--matches_dir', type=str, help='directory containing pattern matches')
args = parser.parse_args()
create_upheno_sssom(args.upheno_id_map, args.patterns_dir, args.matches_dir)

def get_id_columns(pattern_file):
try:
with open(pattern_file, "r") as stream:
pattern_json = yaml.safe_load(stream)
idcolumns = list(pattern_json["vars"].keys())
return idcolumns
except Exception as exc:
print("Could not get id columns: " + pattern_file)
return None

def create_upheno_sssom(upheno_id_map, patterns_dir, matches_dir):

all_pattern_matches_map = dict()

for pattern_match_tsv in glob.glob(matches_dir + "/**/*.tsv"):
pattern_name = os.path.basename( pattern_match_tsv ).split(".")[0]
df = pd.read_csv(pattern_match_tsv, sep='\t')
if pattern_name in all_pattern_matches_map:
all_pattern_matches_map[pattern_name] = pd.concat([ all_pattern_matches_map[pattern_name], df ])
else:
all_pattern_matches_map[pattern_name] = df



cache_pattern_file_to_idcolumn = dict()

df = pd.read_csv(upheno_id_map, sep='\t')

sssom = []

converter = get_converter()

for index, row in df.iterrows():
tokens = row['id'].split('-')
fillers = tokens[:-1]
pattern_name = tokens[-1].split('.')[0]
pattern_file = pattern_name + ".yaml"
id_columns = cache_pattern_file_to_idcolumn.get(pattern_file)
if id_columns == None:
id_columns = get_id_columns(os.path.join(patterns_dir, pattern_file))
cache_pattern_file_to_idcolumn[pattern_file] = id_columns
if id_columns == None:
continue
# print(tokens)
# print(pattern_file)
# print(id_columns)
# print(fillers)
tsv_df = all_pattern_matches_map[pattern_name]
#filtered = tsv[lambda df: filter_row(df, id_columns, fillers) ]

mask = pd.Series(True, index=tsv_df.index)
for col, filler in zip(id_columns, fillers):
mask = mask & (tsv_df[col] == filler)
subset_df = tsv_df[mask]

# print(subset_df)

upheno_id = row['defined_class']

for index, row in subset_df.iterrows():
species_specific_id = row['defined_class']
sssom.append([
converter.compress(upheno_id),
"semapv:crossSpeciesExactMatch",
converter.compress(species_specific_id),
"semapv:LogicalMatching"
])

df_out = pd.DataFrame(sssom, columns=['subject_id', 'predicate_id', 'object_id', 'mapping_justification'])

meta = dict()
meta['mapping_set_id'] = 'https://data.monarchinitiative.org/mappings/upheno/upheno-species-independent.sssom.tsv'
msdf = from_sssom_dataframe(df_out, prefix_map=converter, meta=meta)
msdf.clean_prefix_map()
write_table(msdf, open("upheno-species-independent.sssom.tsv", "w"))

def filter_row(df, id_columns, fillers):
n = 0
while n < len(id_columns):
column = id_columns[n]
filler = fillers[n]
if df[column] != filler:
return False
return True

if __name__ == "__main__":
main()

3 changes: 3 additions & 0 deletions src/scripts/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ def get_min_upheno_id(self):
def get_pattern_repos(self):
return self.config.get("pattern_repos")

def get_exclude_patterns(self):
return self.config.get("exclude_patterns")

def get_working_directory(self):
return self.config.get("working_directory")

Expand Down
2 changes: 1 addition & 1 deletion src/scripts/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@
# we therefore map the whole repo (../..) to a docker volume.
#
# See README-editors.md for more details.
docker run -v $PWD/../../:/work -w /work/src/scripts --rm -ti obolibrary/odkfull:dev "$@"
docker run -v $PWD/../../:/work -w /work/src/scripts --rm -ti obolibrary/odkfull:v1.3.1 "$@"
3 changes: 0 additions & 3 deletions src/scripts/upheno_pipeline.bat

This file was deleted.

7 changes: 7 additions & 0 deletions src/scripts/upheno_pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,14 @@ sh run.sh python3 upheno_prepare.py ../curation/upheno-config.yaml
# from the previous step using dosdp. Add taxon restrictions

sh run.sh python3 upheno_create_profiles.py ../curation/upheno-config.yaml

sh run.sh python3 upheno-stats.py ../curation/upheno-config.yaml

sh run.sh python3 create_sssom.py \
--upheno_id_map ../curation/upheno_id_map.txt \
--patterns_dir ../curation/patterns-for-matching \
--matches_dir ../curation/pattern-matches
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jamesamcl

add --output param here.


cd ../ontology/
sh run.sh make o sim reports
echo "Release successfully completed, ready to deploy."
77 changes: 71 additions & 6 deletions src/scripts/upheno_prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import sys
import urllib.request
import warnings
import json
from subprocess import CalledProcessError, check_call

import pandas as pd
Expand Down Expand Up @@ -207,15 +208,17 @@ def get_pattern_urls(upheno_pattern_repos):
return upheno_patterns


def download_patterns(upheno_pattern_repos, pattern_dir):
def download_patterns(upheno_pattern_repos, pattern_dir, exclude_patterns):
upheno_patterns = get_pattern_urls(upheno_pattern_repos)
filenames = []
for url in upheno_patterns:
print("Downloading " + url)
filename = os.path.basename(url)
file_path = os.path.join(pattern_dir, filename)
print("Downloading " + filename + " from " + url + " to " + pattern_dir)
if filename in exclude_patterns:
continue
if not upheno_config.is_skip_pattern_download():
try:
# try:
x = urllib.request.urlopen(url).read()
y = ruamel.yaml.round_trip_load(x, preserve_quotes=True)
print(file_path)
Expand All @@ -227,8 +230,23 @@ def download_patterns(upheno_pattern_repos, pattern_dir):
with open(file_path, "w") as outfile:
ruamel.yaml.round_trip_dump(y, outfile, explicit_start=True, width=5000)

except Exception as exc:
print(exc)
# generate inheres_in matches for any inheres_in_part_of patterns
#
if "RO:0002314" in y["relations"].values(): # inheres in part of
new_pattern = y.copy()
new_pattern["relations"] = {}
for k,v in y["relations"].items():
if v == "RO:0002314":
new_pattern["relations"][k] = "RO:0000052"
else:
new_pattern["relations"][k] = v
new_file_path = os.path.splitext(file_path)[0] + "-modified.yaml"
with open(new_file_path, "w") as outfile:
ruamel.yaml.round_trip_dump(new_pattern, outfile, explicit_start=True, width=5000)
filenames.append(new_file_path)

# except Exception as exc:
# print(exc)

if os.path.isfile(file_path):
filenames.append(filename)
Expand Down Expand Up @@ -386,6 +404,42 @@ def prepare_species_specific_phenotype_ontologies(config):
)
robot_upheno_component(o_base_taxon, remove_eqs_file)

def postprocess_modified_patterns(upheno_config, pattern_files, matches_dir):
patterns = []
delete_files = []
delete_files.extend(pattern_files)

for pattern_path in pattern_files:
pid = os.path.basename(pattern_path).replace(".yaml", "")
patterns.append(pid)

for id in upheno_config.get_phenotype_ontologies():
oid_matches_path = os.path.join(matches_dir, id)
for pattern in patterns:
# Load both the modified and unm modified tsv files
# merge them and write them back to the unmodified file
unmodified_tsv_path = os.path.join(oid_matches_path, pattern + ".tsv")
modified_tsv_path = os.path.join(oid_matches_path, pattern + "-modification.tsv")
if not os.path.exists(modified_tsv_path):
continue
if not os.path.exists(unmodified_tsv_path):
continue
df_unmodified = pd.read_csv(unmodified_tsv_path, sep="\t")
df_modified = pd.read_csv(modified_tsv_path, sep="\t")
df_combined = pd.concat([df_unmodified, df_modified])
# Remove duplicate rows
df_final = df_combined.drop_duplicates()
df_final.to_csv(unmodified_tsv_path, sep="\t", index=False)
delete_files.append(modified_tsv_path)

# Delete the modified tsv files and their corresponding patterns:
for file_path in delete_files:
if os.path.exists(file_path):
os.remove(file_path)





def match_patterns(upheno_config, pattern_files, matches_dir, pattern_dir, overwrite=True):
patterns = []
Expand Down Expand Up @@ -413,6 +467,14 @@ def match_patterns(upheno_config, pattern_files, matches_dir, pattern_dir, overw
dosdp_pattern_match(ontology_path, pattern_string2, pattern_dir, outdir, TIMEOUT)
else:
print("Matches for ({}) already made, bypassing.".format(outdir))

postprocess_modified_patterns = [
os.path.join(pattern_dir, f)
for f in os.listdir(pattern_dir)
if os.path.isfile(os.path.join(pattern_dir, f)) and f.endswith("-modification.yaml")
]

postprocess_modified_patterns(upheno_config, postprocess_modified_patterns, matches_dir)


def add_taxon_restrictions(
Expand Down Expand Up @@ -479,13 +541,16 @@ def download_sources(dir, overwrite=True):


print("### Download patterns ###")
pattern_files = download_patterns(upheno_config.get_pattern_repos(), pattern_dir)
exclude_patterns = upheno_config.get_exclude_patterns()
pattern_files = download_patterns(upheno_config.get_pattern_repos(), pattern_dir, exclude_patterns)
pattern_files = [
os.path.join(pattern_dir, f)
for f in os.listdir(pattern_dir)
if os.path.isfile(os.path.join(pattern_dir, f)) and f.endswith(".yaml")
]

exit(0)

print("### Download sources ###")
print("ROBOT args: " + os.environ["ROBOT_JAVA_ARGS"])
download_sources(module_dir, upheno_config.is_overwrite_ontologies())
Expand Down
Loading
Loading