Skip to content

Commit

Permalink
New strategies for Id generation
Browse files Browse the repository at this point in the history
  • Loading branch information
bclenet committed Sep 11, 2024
1 parent 19f0262 commit 3846692
Show file tree
Hide file tree
Showing 6 changed files with 157 additions and 41 deletions.
26 changes: 15 additions & 11 deletions bids_prov/afni/afni_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@
from itertools import chain

from bids_prov.fsl.fsl_parser import get_entities
from bids_prov.utils import get_default_graph, CONTEXT_URL, get_id, label_mapping, compute_sha_256_entity, \
from bids_prov.utils import (
get_default_graph, CONTEXT_URL, label_mapping, compute_sha_256_entity,
get_activity_urn, get_agent_urn, get_entity_urn,
writing_jsonld
)

# regex to catch inputs
# in `cp /fsl/5.0/doc/fsl.css .files no_ext 5.0` --> only `.files` should match
Expand Down Expand Up @@ -117,12 +120,12 @@ def build_records(commands_block: list, agent_id: str, verbose: bool = False):

for (block, cmd) in commands_block:
cmd_s = re.split(" |=", cmd)
a_name = cmd_s[0]
activity_name = cmd_s[0]
cmd_args_remain = cmd_s[1:]
inputs = []
outputs = []
function_in_description_functions = False
command_name_end = os.path.split(a_name)[1]
command_name_end = os.path.split(activity_name)[1]

for df in description_functions:
if df["Name"] == command_name_end:
Expand Down Expand Up @@ -182,27 +185,28 @@ def build_records(commands_block: list, agent_id: str, verbose: bool = False):
outputs = list(chain(*(attributes.pop(k)
for k in attributes.keys() & OUTPUT_TAGS)))
entity_names = [_ for _ in re.findall(
INPUT_RE, cmd_without_attributes[len(a_name):])]
INPUT_RE, cmd_without_attributes[len(activity_name):])]

if entity_names and entity_names[0] in cmd_without_attributes:
outputs.append(entity_names[-1])
if len(entity_names) > 1:
inputs.append(entity_names[0])

# the file name and possible extension
label = f"{os.path.split(a_name)[1]}"

activity_label = label_mapping(
f'{os.path.split(activity_name)[1]}',
'afni/afni_labels.json')
activity = {
"@id": f"urn:{get_id()}",
"Label": label_mapping(label, "afni/afni_labels.json"),
"@id": get_activity_urn(activity_label),
"Label": activity_label,
"AssociatedWith": "urn:" + agent_id,
"Command": cmd,
"Parameters": param_dic,
"Used": list(),
}

for input_path in inputs:
input_id = f"urn:{get_id()}" # def format_id
input_id = get_entity_urn(input_path)
existing_input = next(
(entity for entity in records["Entities"] if entity["AtLocation"] == input_path), None)

Expand All @@ -225,7 +229,7 @@ def build_records(commands_block: list, agent_id: str, verbose: bool = False):
for output_path in outputs:
records["Entities"].append(
{
"@id": f"urn:{get_id()}",
"@id": get_entity_urn(output_path),
"Label": os.path.split(output_path)[1],
"AtLocation": output_path,
"GeneratedBy": activity["@id"],
Expand Down Expand Up @@ -363,7 +367,7 @@ def fusion_activities(activities, label):
command += activity["Command"] + "; "

return {
"@id": f"urn:{get_id()}",
"@id": get_activity_urn(label),
"Label": label,
"AssociatedWith": activities[0]["AssociatedWith"],
"Command": command,
Expand Down
30 changes: 17 additions & 13 deletions bids_prov/fsl/fsl_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@

from bs4 import BeautifulSoup

from bids_prov.utils import get_default_graph, CONTEXT_URL, get_id, label_mapping, compute_sha_256_entity, \
from bids_prov.utils import (
get_default_graph, CONTEXT_URL, label_mapping, compute_sha_256_entity,
get_activity_urn, get_agent_urn, get_entity_urn,
writing_jsonld
)

# regex to catch inputs
# in `cp /fsl/5.0/doc/fsl.css .files no_ext 5.0` --> only `.files` should match
Expand Down Expand Up @@ -421,7 +424,7 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):
# process to remove + and - in pngappend command
cmd = cmd.replace(" + ", " ").replace(" - ", " ")
cmd_s = re.split(" |=", cmd)
a_name = cmd_s[0]
activity_name = cmd_s[0]

inputs = []
outputs = []
Expand All @@ -430,7 +433,7 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):

function_in_description_functions = False

command_name_end = os.path.split(a_name)[1]
command_name_end = os.path.split(activity_name)[1]
for df in description_functions:
if df["Name"] == command_name_end:
description_of_command = df
Expand All @@ -457,9 +460,9 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):
outputs = list(chain(*(attributes.pop(k)
for k in attributes.keys() & OUTPUT_TAGS)))
entity_names = [_ for _ in re.findall(
INPUT_RE, cmd_without_attributes[len(a_name):])]
INPUT_RE, cmd_without_attributes[len(activity_name):])]

# # cmd_conf = get_closest_config(a_name) # with the module boutiques
# # cmd_conf = get_closest_config(activity_name) # with the module boutiques
# cmd_conf = None # None because boutiques is not used at this time
# # if cmd_conf:
# # pos_args = filter(lambda e: not e.startswith("-"), cmd_s) # TODO use "-key value" mappings
Expand All @@ -471,12 +474,13 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):
if len(entity_names) > 1:
inputs.append(entity_names[0])

# the file name and possible extension
label = f"{os.path.split(a_name)[1]}"

a = {
"@id": f"urn:{get_id()}",
"Label": label_mapping(label, "fsl/fsl_labels.json"),
# Create activity label & record
activity_label = label_mapping(
f'{os.path.split(activity_name)[1]}',
'fsl/fsl_labels.json')
activity = {
"@id": get_activity_urn(activity_label),
"Label": activity_label,
"AssociatedWith": "urn:" + agent_id,
"Command": cmd,
# "attributes": [
Expand All @@ -487,7 +491,7 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):

for input_path in inputs:
# input_name = input_path.replace("/", "_") # TODO
input_id = f"urn:{get_id()}" # def format_id
input_id = get_entity_urn(input_path)

existing_input = next(
(entity for entity in records["Entities"] if entity["AtLocation"] == input_path), None)
Expand All @@ -509,7 +513,7 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):
# output_name = output_path.replace("/", "_") # TODO
records["Entities"].append(
{
"@id": f"urn:{get_id()}",
"@id": get_entity_urn(output_path),
"Label": os.path.split(output_path)[1],
"AtLocation": output_path,
"GeneratedBy": a["@id"],
Expand Down
18 changes: 11 additions & 7 deletions bids_prov/spm/spm_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@
from typing import List, Dict, Generator

from bids_prov.spm import spm_config as conf
from bids_prov.utils import get_id, get_default_graph, CONTEXT_URL, label_mapping, compute_sha_256_entity, \
from bids_prov.utils import (
get_uuid, get_default_graph, CONTEXT_URL, label_mapping, compute_sha_256_entity,
get_activity_urn, get_agent_urn, get_entity_urn,
writing_jsonld
)


def format_activity_name(activity_name: str) -> str:
Expand Down Expand Up @@ -61,7 +64,7 @@ def get_input_entity(right: str) -> List[dict]:
file_location = re.sub(r"\,1", "", file_drop_quotes) # ds000052/RESULTS/Sub01/con_0001.nii
entity_label_short = "_".join(file_location.split("/")[-2:]) # Sub01_con_0001.nii
entity = {
"@id": "urn:" + get_id(),
"@id": get_entity_urn(file_location),
"Label": label_mapping(entity_label_short, "spm/spm_activity_labels.json"),
"AtLocation": file_location
}
Expand Down Expand Up @@ -170,7 +173,7 @@ def get_entities_from_ext_config(conf_dic: dict, activity_name: str, activity_id
# activity_id), None)
for output in conf_dic[activity]['outputs']:
name = conf_dic[activity]['name']
entity = {"@id": "urn:" + get_id(),
entity = {"@id": get_entity_urn(output),
"Label": label_mapping(name, "spm/spm_activity_labels.json"),
"Atlocation": output,
"GeneratedBy": activity_id,
Expand Down Expand Up @@ -217,15 +220,15 @@ def find_output_id_from_closest(closest_activity: dict, records: dict) -> str:
Returns
-------
output_id : entity id, if such one has been generated by the closest activity, else new id
output_id : entity id, if such one has been generated by the closest activity, else new id
"""
for entity in records["Entities"]:
if "GeneratedBy" in entity:
if entity["GeneratedBy"] == closest_activity["@id"]: # entity["Label"] == parts[-1]
output_id = entity["@id"]
break
else:
output_id = "urn:" + get_id()
output_id = 'urn:uuid:' + get_uuid()
# output_id = next(
# (entity["@id"] for entity in records["Entities"]
# if parts[-1] == entity["Label"] and entity["GeneratedBy"] == closest_activity["@id"]
Expand Down Expand Up @@ -309,9 +312,10 @@ def get_records(task_groups: dict, agent_id: str, verbose=False) -> dict:
command_prefix = command_prefix[:-1]
command += '\n'.join([command_prefix + c for c in end_line_list])

activity_id = "urn:" + get_id()
activity_label = format_activity_name(common_prefix_act)
activity_id = get_activity_urn(activity_label)
activity = {"@id": activity_id,
"Label": format_activity_name(common_prefix_act),
"Label": activity_label,
"Used": list(),
"AssociatedWith": "urn:" + agent_id,
"Command": command
Expand Down
3 changes: 1 addition & 2 deletions bids_prov/tests/test_spm_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,10 @@ def test_get_input_entity():
right = "{'ds011/sub-01/func/sub-01_task-tonecounting_bold_trunctest.nii.gz'};"
# entity label : sub-01_task-tonecounting_bold.nii.gz
entities = [{
"@id": "urn:c15521b1-b3dc-450a-9daa-37e51b591d75",
"@id": "bids::ds011/sub-01/func/sub-01_task-tonecounting_bold_trunctest.nii.gz",
"Label": "func_sub-01_task-tonecounting_bold_trunctest.nii.gz",
"AtLocation": "ds011/sub-01/func/sub-01_task-tonecounting_bold_trunctest.nii.gz"
}]
init_random_state()
right_entity = get_input_entity(right)[0]
assert right_entity == entities[0]

Expand Down
48 changes: 42 additions & 6 deletions bids_prov/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,35 @@
import hashlib

from bids_prov.utils import (
get_id, get_rrid, get_default_graph, CONTEXT_URL, label_mapping, get_sha256
get_uuid, get_random_string, get_rrid, make_alnum,
get_activity_urn, get_agent_urn, get_entity_urn,
get_default_graph, CONTEXT_URL, label_mapping, get_sha256
)
from unittest.mock import mock_open, patch


def test_get_id():
def test_get_uuid():
# Test that the function returns a valid UUID string
result = get_id()
result = get_uuid()
assert isinstance(result, str)
assert isinstance(uuid.UUID(result), uuid.UUID)

# Test that the UUID returned is version 4
assert uuid.UUID(result).version == 4

# Test that the function returns a different ID each time it's called
id1 = get_id()
id2 = get_id()
id1 = get_uuid()
id2 = get_uuid()
assert id1 != id2

def test_get_random_string():
# Test that the function returns a random string
result = get_random_string()
assert isinstance(result, str)
assert len(result) == 8
assert result.isalnum()
assert len(get_random_string(5)) == 5
assert result != get_random_string()

def test_get_rrid():
# Test that the function returns a RRID string
result = get_rrid('FSL')
Expand All @@ -35,6 +45,32 @@ def test_get_rrid():
# Test the the function returns None if the software is not referenced
assert get_rrid('unreferenced_software') is None

def test_make_alnum():
# Test that the function that removes all non alphanumeric chars from a string
assert make_alnum('¨^$£$êµ*ad45@') == 'ad45'
assert make_alnum('\\//:!§.;,?[]()}{}') == ''
assert make_alnum('ezeasdsa45ADA5sdas') == 'ezeasdsa45ADA5sdas'

def test_get_activity_urn():
# Test that the function that returns URNs for activities
assert 'urn:spm-' in get_activity_urn('SPM')
assert len(get_activity_urn('SPM')) == 16
assert 'urn:spmv1242-' in get_activity_urn('SPM v. 1242355')
assert len(get_activity_urn('SPM v. 1242355')) == 21

def test_get_agent_urn():
# Test that the function that returns URNs for agents
assert 'urn:bet-' in get_agent_urn('BET')
assert len(get_agent_urn('BET')) == 16
assert 'urn:movefile-' in get_agent_urn('Move file')
assert len(get_agent_urn('SPM v. 1242355')) == 21

def test_get_entity_urn():
# Test that the function that returns URNs for entities
assert get_entity_urn('') == 'bids::'
assert get_entity_urn('sub-001/func/T1.nii') == 'bids::sub-001/func/T1.nii'
assert get_entity_urn('T1.nii') == 'bids::T1.nii'

def test_get_default_graph():
context_url = "http://example.com/context"
spm_ver = "v1.0"
Expand Down
Loading

0 comments on commit 3846692

Please sign in to comment.