Skip to content

Commit

Permalink
Merge pull request #136 from bclenet/rm_images_2
Browse files Browse the repository at this point in the history
Graph comparison ignoring URNs
  • Loading branch information
cmaumet authored Jun 13, 2024
2 parents 856ae21 + 01eb664 commit feaf2f7
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 5 deletions.
47 changes: 44 additions & 3 deletions bids_prov/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import shutil
import uuid
from typing import Mapping, Union, Tuple
import re

CONTEXT_URL = "https://raw.githubusercontent.com/bids-standard/BEP028_BIDSprov/master/context.json"

Expand Down Expand Up @@ -120,6 +121,44 @@ def compute_sha_256_entity(entities: dict):
shutil.rmtree(directory)


def simplify_urns(graph: str) -> str:
"""
Replace URNs of a json-ld graph with simpler (not random) values defined by their order of appearance in the graph.
Parameters
----------
graph : str
The json-ld graph dumped as a string
Returns
-------
str
The input string with simplified urns.
"""

# Get all occurrences of ("urn:" + 36 following chars)
urn_occurrences = [i for i in re.finditer(r'urn:[a-zA-Z0-9\-]{36}', graph)]

# Prepare a dict in which :
# - key is an urn as present in the original graph
# - value is an id corresponding to the order of appearance of the urn in the graph.
known_urns = dict()
counter = 0

# Assign one simpler id to each urn
for urn in urn_occurrences:
if urn.group() not in known_urns:
known_urns[urn.group()] = f"urn:{str(counter).zfill(36)}"
counter += 1

# Replace all urns in the input graph with their assigned id
output_graph = graph
for key, value in known_urns.items():
output_graph = output_graph.replace(key, value)

return output_graph


def writing_jsonld(graph, indent, output_file):
"""
Write a json-ld in memory unless it already exists and contains the same content
Expand All @@ -140,10 +179,12 @@ def writing_jsonld(graph, indent, output_file):
"""
if os.path.isfile(output_file):
with open(output_file, "r") as f:
existing_content = f.read()
existing_content = simplify_urns(f.read())

new_content = simplify_urns(json.dumps(graph, indent=indent))

if existing_content == json.dumps(graph, indent=indent):
return True
if existing_content == new_content:
return True

with open(output_file, "w") as fd:
json.dump(graph, fd, indent=indent)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Date : 2024_05_23_08h10m05s
Date : 2024_06_12_12h43m15s
Processing files...
file= nidmresults-examples/afni_alt_onesided_proc.sub_001
file= nidmresults-examples/afni_alt_onesided_proc.sub_001
Expand Down Expand Up @@ -69,4 +69,4 @@ Processing files...
file= nidmresults-examples/spm_thr_voxelfdrp05_batch.m
file= nidmresults-examples/spm_thr_voxelfwep05_batch.m
file= nidmresults-examples/spm_thr_voxelunct4_batch.m
End of processed files. Results in dir : 'examples/from_parsers'. Time required: 0:00:01.882820
End of processed files. Results in dir : 'examples/from_parsers'. Time required: 0:00:02.104508

0 comments on commit feaf2f7

Please sign in to comment.