Skip to content

Commit

Permalink
Merge pull request #3048 from effigies/fix/datalad-run-and-decode
Browse files Browse the repository at this point in the history
fix(datalad): Separate validator process construction from wait, allowing error logging
  • Loading branch information
nellh authored May 8, 2024
2 parents edd8c62 + ce8ee30 commit f9701af
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 21 deletions.
51 changes: 32 additions & 19 deletions services/datalad/datalad_service/tasks/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,39 +35,52 @@ def setup_validator():
subprocess.run(['yarn'])


def run_and_decode(args, timeout, esLogger):
"""Run a subprocess and return the JSON output."""
process = gevent.subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
try:
process.wait(timeout=timeout)
except subprocess.TimeoutExpired:
process.kill()

# Retrieve what we can from the process
stdout, stderr = process.communicate()

# If we have a whole JSON document from stdout, then return it
# Otherwise, log the error and return None
try:
return json.loads(escape_ansi(stdout.decode('utf-8')))
except json.decoder.JSONDecodeError as err:
esLogger.log(stdout, stderr, err)


def validate_dataset_sync(dataset_path, ref, esLogger):
"""
Synchronous dataset validation.
Runs the bids-validator process and installs node dependencies if needed.
"""
setup_validator()
try:
process = gevent.subprocess.run(
['./node_modules/.bin/bids-validator', '--json', '--ignoreSubjectConsistency', dataset_path], stdout=subprocess.PIPE, timeout=300)
return json.loads(process.stdout)
except subprocess.TimeoutExpired as err:
esLogger.log(process.stdout, process.stderr, err)
except json.decoder.JSONDecodeError as err:
esLogger.log(process.stdout, process.stderr, err)
return run_and_decode(
['./node_modules/.bin/bids-validator', '--json', '--ignoreSubjectConsistency', dataset_path],
timeout=300,
esLogger=esLogger,
)


def validate_dataset_deno_sync(dataset_path, ref, esLogger):
"""
Synchronous dataset validation.
Runs the bids-validator process and installs node dependencies if needed.
Runs the deno bids-validator process.
"""
try:
process = gevent.subprocess.run(
['deno', 'run', '-A',
f'https://deno.land/x/bids_validator@{DENO_VALIDATOR_VERSION}/bids-validator.ts',
'--json', dataset_path], stdout=subprocess.PIPE, timeout=300)
return json.loads(escape_ansi(process.stdout.decode('utf-8')))
except subprocess.TimeoutExpired as err:
esLogger.log(process.stdout, process.stderr, err)
except json.decoder.JSONDecodeError as err:
esLogger.log(process.stdout, process.stderr, err)
return run_and_decode(
['deno', 'run', '-A',
f'https://deno.land/x/bids_validator@{DENO_VALIDATOR_VERSION}/bids-validator.ts',
'--json', dataset_path],
timeout=300,
esLogger=esLogger,
)


def summary_mutation(dataset_id, ref, validator_output, validator_metadata):
Expand Down
8 changes: 6 additions & 2 deletions services/datalad/tests/test_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,12 @@ def test_validator_error(new_dataset):
@pytest.fixture
def mock_validator_crash(monkeypatch):
def return_bad_json(*args, **kwargs):
return SimpleNamespace(stdout='{invalidJson', stderr='')
monkeypatch.setattr(subprocess, 'run', return_bad_json)
return SimpleNamespace(
wait=lambda timeout=None: None,
kill=lambda: None,
communicate=lambda: (b'{invalidJson', b'')
)
monkeypatch.setattr(subprocess, 'Popen', return_bad_json)


def test_validator_bad_json(new_dataset, mock_validator_crash):
Expand Down

0 comments on commit f9701af

Please sign in to comment.