Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New @id generation for parsers and other bug corrections #142

Open
wants to merge 22 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
c7a2197
automated nidm example computation from github action
Jun 10, 2024
c7372d4
Merge branch 'bids-standard:master' into master
bclenet Sep 5, 2024
dd2f163
Replacing RRID by AltIdentifier
bclenet Sep 5, 2024
acef62b
Dealing with RRIDs for AltIdentifier
bclenet Sep 5, 2024
79b41a1
[TEST] updates for RRID handling
bclenet Sep 5, 2024
777f59c
Updating parsers after changes in default_graph
bclenet Sep 5, 2024
19f0262
automated nidm example computation from github action
Sep 5, 2024
3846692
New strategies for Id generation
bclenet Sep 11, 2024
a63a41d
Sanitizing URIs for Entities from AFNI provenance
bclenet Sep 19, 2024
db87417
Cleaning SPM paths before parsing nidm examples
bclenet Sep 24, 2024
f142cfc
AFNI entity URIs
bclenet Sep 25, 2024
11003ec
Entity ids
bclenet Sep 26, 2024
c85e427
Correcting some function descriptions doe AFNI
bclenet Sep 26, 2024
3b84ca9
AFni description functions + SPM c1xxx bug
bclenet Oct 3, 2024
7bcf386
Allowing '--' for FSL parameter keys
bclenet Oct 3, 2024
5b9d420
[BUG] multiple space chars causing wrong entities for FSL parser
bclenet Oct 3, 2024
f740415
Safer URNs for all parsers
bclenet Oct 3, 2024
6aa1fcf
automated nidm example computation from github action
Oct 3, 2024
8be9c48
[BUG] Agent ids on all parsers
bclenet Oct 3, 2024
fd70559
Issue with c1xxx ids
bclenet Oct 4, 2024
9ca3535
[Launch Nidm Examples] Changing file names for ds000052 bids dataset
bclenet Oct 4, 2024
d35cd57
automated nidm example computation from github action
Oct 4, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 27 additions & 14 deletions bids_prov/afni/afni_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@
from itertools import chain

from bids_prov.fsl.fsl_parser import get_entities
from bids_prov.utils import get_default_graph, CONTEXT_URL, get_id, label_mapping, compute_sha_256_entity, \
from bids_prov.utils import (
get_default_graph, CONTEXT_URL, label_mapping, compute_sha_256_entity,
get_activity_urn, get_agent_urn, get_entity_urn, make_alnum, get_uuid,
writing_jsonld
)

# regex to catch inputs
# in `cp /fsl/5.0/doc/fsl.css .files no_ext 5.0` --> only `.files` should match
Expand Down Expand Up @@ -117,12 +120,12 @@ def build_records(commands_block: list, agent_id: str, verbose: bool = False):

for (block, cmd) in commands_block:
cmd_s = re.split(" |=", cmd)
a_name = cmd_s[0]
activity_name = cmd_s[0]
cmd_args_remain = cmd_s[1:]
inputs = []
outputs = []
function_in_description_functions = False
command_name_end = os.path.split(a_name)[1]
command_name_end = os.path.split(activity_name)[1]

for df in description_functions:
if df["Name"] == command_name_end:
Expand Down Expand Up @@ -182,27 +185,32 @@ def build_records(commands_block: list, agent_id: str, verbose: bool = False):
outputs = list(chain(*(attributes.pop(k)
for k in attributes.keys() & OUTPUT_TAGS)))
entity_names = [_ for _ in re.findall(
INPUT_RE, cmd_without_attributes[len(a_name):])]
INPUT_RE, cmd_without_attributes[len(activity_name):])]

if entity_names and entity_names[0] in cmd_without_attributes:
outputs.append(entity_names[-1])
if len(entity_names) > 1:
inputs.append(entity_names[0])

# the file name and possible extension
label = f"{os.path.split(a_name)[1]}"

activity_label = label_mapping(
f'{os.path.split(activity_name)[1]}',
'afni/afni_labels.json')
activity = {
"@id": f"urn:{get_id()}",
"Label": label_mapping(label, "afni/afni_labels.json"),
"AssociatedWith": "urn:" + agent_id,
"@id": get_activity_urn(activity_label),
"Label": activity_label,
"AssociatedWith": agent_id,
"Command": cmd,
"Parameters": param_dic,
"Used": list(),
}

for input_path in inputs:
input_id = f"urn:{get_id()}" # def format_id
# Deal with not human readable paths
if not make_alnum(input_path):
input_id = 'urn:uuid:' + get_uuid()
else:
input_id = get_entity_urn(input_path)
existing_input = next(
(entity for entity in records["Entities"] if entity["AtLocation"] == input_path), None)

Expand All @@ -223,9 +231,13 @@ def build_records(commands_block: list, agent_id: str, verbose: bool = False):
activity["Used"] = sorted(set(activity["Used"]))

for output_path in outputs:
if not make_alnum(output_path):
output_id = 'urn:uuid:' + get_uuid()
else:
output_id = get_entity_urn(output_path)
records["Entities"].append(
{
"@id": f"urn:{get_id()}",
"@id": output_id,
"Label": os.path.split(output_path)[1],
"AtLocation": output_path,
"GeneratedBy": activity["@id"],
Expand Down Expand Up @@ -272,7 +284,7 @@ def gather_multiline(input_file: str) -> list:

def readlines(input_file: str) -> list:
"""
gather multiline command split by \ separator
Read lines form an input file and return the list of commands it contains

Parameters
----------
Expand Down Expand Up @@ -363,7 +375,7 @@ def fusion_activities(activities, label):
command += activity["Command"] + "; "

return {
"@id": f"urn:{get_id()}",
"@id": get_activity_urn(label),
"Label": label,
"AssociatedWith": activities[0]["AssociatedWith"],
"Command": command,
Expand Down Expand Up @@ -454,7 +466,8 @@ def afni_to_bids_prov(filename: str, context_url=CONTEXT_URL, output_file=None,
"""
commands_block = readlines(filename)

graph, agent_id = get_default_graph(label="AFNI", context_url=context_url, soft_ver=soft_ver)
graph, agent_id = get_default_graph(
soft_label="AFNI", context_url=context_url, soft_version=soft_ver)
records, bloc_act = build_records(commands_block, agent_id, verbose=verbose)

graph["Records"].update(records)
Expand Down
4 changes: 2 additions & 2 deletions bids_prov/afni/description_functions.json
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
"GeneratedBy": ["-prefix"]
},
{"Name": "3dAllineate",
"Used": ["-base", "-input", "-1Dmatrix_apply"],
"Used": ["-base", "-input", "-1Dmatrix_apply", "-master"],
"GeneratedBy": ["-prefix"]
},
{"Name": "3dTstat",
Expand Down Expand Up @@ -115,7 +115,7 @@
"GeneratedBy" : ["-x1D"]
},
{"Name": "3dmaskave",
"Used" : [2,3],
"Used" : ["-mask", 2,3],
"GeneratedBy" : [">"]
},
{"Name": "3dTnorm",
Expand Down
4 changes: 2 additions & 2 deletions bids_prov/fsl/description_functions.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"Name": "fslmaths",
"Used": [0, "-add","-sub","-mul","-div","-rem","-mas","-max","-min","-seed","-restart","-save"],
"GeneratedBy": [1],
"ParametersValue" : ["-dt", "-odt","-thr","-thrp","-thrP","-uthr","-uthrp","-uthrP",
"ParametersValue" : ["-dt", "-odt","-thr","-thrp","-thrP","-uthr","-uthrp","-uthrP", "-bptf",
{
"Name": "-grid",
"Index": ["0:2"]
Expand Down Expand Up @@ -78,7 +78,7 @@
{
"Name": "cluster",
"Used": ["-i", "-c"],
"GeneratedBy": ["-o", "--othresh", "--olmax", "--olmaxim", "--osize", "--omax", "--omean", "--opvals", "-c", "--cope", "-x","--xfm", "--stdvol", "--warpvol"]
"GeneratedBy": ["-o", "--othresh", "--olmax", "--olmaxim", "--osize", "--omax", "--omean", "--opvals", "-c", "--cope", "-x","--xfm", "--stdvol", "--warpvol", ">"]
},
{
"Name": "echo",
Expand Down
79 changes: 52 additions & 27 deletions bids_prov/fsl/fsl_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@

from bs4 import BeautifulSoup

from bids_prov.utils import get_default_graph, CONTEXT_URL, get_id, label_mapping, compute_sha_256_entity, \
from bids_prov.utils import (
get_default_graph, CONTEXT_URL, label_mapping, compute_sha_256_entity,
get_activity_urn, get_agent_urn, get_entity_urn, make_alnum, get_uuid,
writing_jsonld
)

# regex to catch inputs
# in `cp /fsl/5.0/doc/fsl.css .files no_ext 5.0` --> only `.files` should match
Expand Down Expand Up @@ -250,7 +253,7 @@ def _get_entities_from_kwarg(entities, opts, parse_kwarg):
value = []
for (arg, val) in opts._get_kwargs():
# print("\n--arg, val", type(arg), type(val), arg, val)
if param.split("-")[1] == arg:
if param.strip('-') == arg:
# print("\n----arg select", type(arg), arg)
if val != None:
# print("\n------val != None", type(val), val)
Expand Down Expand Up @@ -388,14 +391,14 @@ def get_entities(cmd_s, parameters):
if "GeneratedBy" in parameters:
outputs.extend(_get_arg(parameters["GeneratedBy"], arg_rest))

# print("\n\n inputs", inputs)
# print("\n\n inputs", inputs)
# print("\n\n outputs", outputs)
# print("\n\n params", params)

return inputs, outputs, params


def build_records(groups: Mapping[str, List[str]], agent_id: str):
def build_records(groups: Mapping[str, List[str]], agent_id: str, verbose: bool = False):
"""
Build the `records` field for the final .jsonld file,
from commands lines grouped by stage (e.g. `Registration`, `Post-stats`)
Expand All @@ -420,8 +423,11 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):
for cmd in v:
# process to remove + and - in pngappend command
cmd = cmd.replace(" + ", " ").replace(" - ", " ")
# remove multiple spaces
cmd = ' '.join(cmd.split())
# split according to the following chars " ", "|", and "="
cmd_s = re.split(" |=", cmd)
a_name = cmd_s[0]
activity_name = cmd_s[0]

inputs = []
outputs = []
Expand All @@ -430,7 +436,7 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):

function_in_description_functions = False

command_name_end = os.path.split(a_name)[1]
command_name_end = os.path.split(activity_name)[1]
for df in description_functions:
if df["Name"] == command_name_end:
description_of_command = df
Expand All @@ -439,7 +445,15 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):
cmd_s[1:], description_of_command)
break

if verbose:
print("CMD", cmd)
print('-> inputs: ', inputs)
print('<- outputs: ', outputs)
print(" others args :", *parameters)

if function_in_description_functions is False:
print(f"-> {command_name_end} : Not present in description_functions")

# if the function is not in our description file, the process is based on regex
attributes = defaultdict(list)

Expand All @@ -457,9 +471,9 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):
outputs = list(chain(*(attributes.pop(k)
for k in attributes.keys() & OUTPUT_TAGS)))
entity_names = [_ for _ in re.findall(
INPUT_RE, cmd_without_attributes[len(a_name):])]
INPUT_RE, cmd_without_attributes[len(activity_name):])]

# # cmd_conf = get_closest_config(a_name) # with the module boutiques
# # cmd_conf = get_closest_config(activity_name) # with the module boutiques
# cmd_conf = None # None because boutiques is not used at this time
# # if cmd_conf:
# # pos_args = filter(lambda e: not e.startswith("-"), cmd_s) # TODO use "-key value" mappings
Expand All @@ -471,13 +485,14 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):
if len(entity_names) > 1:
inputs.append(entity_names[0])

# the file name and possible extension
label = f"{os.path.split(a_name)[1]}"

a = {
"@id": f"urn:{get_id()}",
"Label": label_mapping(label, "fsl/fsl_labels.json"),
"AssociatedWith": "urn:" + agent_id,
# Create activity label & record
activity_label = label_mapping(
f'{os.path.split(activity_name)[1]}',
'fsl/fsl_labels.json')
activity = {
"@id": get_activity_urn(activity_label),
"Label": activity_label,
"AssociatedWith": agent_id,
"Command": cmd,
# "attributes": [
# {k: v if len(v) > 1 else v[0]} for k, v in attributes.items()
Expand All @@ -487,48 +502,58 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):

for input_path in inputs:
# input_name = input_path.replace("/", "_") # TODO
input_id = f"urn:{get_id()}" # def format_id
if not make_alnum(input_path):
input_id = 'urn:uuid:' + get_uuid()
else:
input_id = get_entity_urn(input_path)

existing_input = next(
(entity for entity in records["Entities"] if entity["AtLocation"] == input_path), None)
(e for e in records["Entities"] if e["AtLocation"] == input_path), None)
if existing_input is None:
e = {
entity = {
"@id": input_id,
"Label": os.path.split(input_path)[1],
"AtLocation": input_path,
}
records["Entities"].append(e)
a["Used"].append(input_id)
records["Entities"].append(entity)
activity["Used"].append(input_id)
else:
a["Used"].append(existing_input["@id"])
activity["Used"].append(existing_input["@id"])

# Order does not matter and then makes sense to include only unique values
a["Used"] = sorted(set(a["Used"]))
activity["Used"] = sorted(set(activity["Used"]))

for output_path in outputs:
# output_name = output_path.replace("/", "_") # TODO
if not make_alnum(output_path):
output_id = 'urn:uuid:' + get_uuid()
else:
output_id = get_entity_urn(output_path)

records["Entities"].append(
{
"@id": f"urn:{get_id()}",
"@id": output_id,
"Label": os.path.split(output_path)[1],
"AtLocation": output_path,
"GeneratedBy": a["@id"],
"GeneratedBy": activity["@id"],
# "derivedFrom": input_id,
}
)

records["Activities"].append(a)
records["Activities"].append(activity)
if verbose:
print('-------------------------')
return dict(records)


def fsl_to_bids_prov(filename: str, context_url=CONTEXT_URL, output_file=None,
soft_ver="xxx", indent=2, verbose=False) -> bool: # TODO : add fsl version

graph, agent_id = get_default_graph(
label="FSL", context_url=context_url, soft_ver=soft_ver)
soft_label="FSL", context_url=context_url, soft_version=soft_ver)

lines = readlines(filename)
records = build_records(lines, agent_id)
records = build_records(lines, agent_id, verbose)
graph["Records"].update(records)

compute_sha_256_entity(graph["Records"]["Entities"])
Expand Down
Loading
Loading