Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run bids-validator #5

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 24 additions & 19 deletions bids-hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"errors"
"fmt"
"io"
"io/fs"
"log"
"net/http"
"net/url"
Expand Down Expand Up @@ -66,13 +65,13 @@ var (
// * 0 = "success" (green checkmark)
// * 1 = "failure" (red "X" mark)
// * 2 = "warning" (yellow "!" mark)
// stdout will be saved to the Gitea url "/assets/${BH_UUID}.html" and linked from the commit status
// stderr will be appended to the log file "{{workerLogPath}}/${BH_UUID}.log"
// * 3+ = "error" (red "!" mark, no link to the result page)
// stdout will be saved to the Gitea url "/assets/bids-validator/XX/YY/${BH_UUID}.html" and linked from the commit status
// stderr will be appended to the log file "{{workerLogPath}}/XX/YY/${BH_UUID}.log"
workerScript string

// the path to a log directory for worker stderr output
// read from environment variable WORKER_LOG_PATH
// it should already exist
workerLogPath string

// channel used to ferry jobs from the server to the worker
Expand Down Expand Up @@ -279,18 +278,18 @@ type job struct {
// web link to the results page for this job
// see also j.resultPath()
func (j job) resultUrl() string {
return giteaRootUrl.JoinPath("assets", fmt.Sprintf("%s.html", j.uuid)).String()
return giteaRootUrl.JoinPath("assets", "bids-validator", j.uuid[:2], j.uuid[2:4], fmt.Sprintf("%s.html", j.uuid)).String()
}

// file path to the results page for this job
// see also j.resultUrl()
func (j job) resultPath() string {
return filepath.Join(giteaCustom, "public", fmt.Sprintf("%s.html", j.uuid))
return filepath.Join(giteaCustom, "public", "bids-validator", j.uuid[:2], j.uuid[2:4], fmt.Sprintf("%s.html", j.uuid))
}

// file path to the log file for this job
func (j job) logPath() string {
return filepath.Join(workerLogPath, fmt.Sprintf("%s.log", j.uuid))
return filepath.Join(workerLogPath, j.uuid[:2], j.uuid[2:4], fmt.Sprintf("%s.log", j.uuid))
}

// postStatus posts a commit status to Gitea
Expand Down Expand Up @@ -360,7 +359,12 @@ func (j job) run() (state string, _ error) {
)

// redirect stdout to the result file
stdout, err := os.OpenFile(j.resultPath(), os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
resultPath := j.resultPath()
err := os.MkdirAll(filepath.Dir(resultPath), 0750)
if err != nil {
return stateError, err
}
stdout, err := os.OpenFile(resultPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0640)
if err != nil {
return stateError, err
}
Expand All @@ -373,7 +377,12 @@ func (j job) run() (state string, _ error) {
cmd.Stdout = stdout

// redirect stderr to the log file
stderr, err := os.OpenFile(j.logPath(), os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
logPath := j.logPath()
err = os.MkdirAll(filepath.Dir(logPath), 0750)
if err != nil {
return stateError, err
}
stderr, err := os.OpenFile(j.logPath(), os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0640)
if err != nil {
return stateError, err
}
Expand Down Expand Up @@ -420,10 +429,9 @@ func worker() {
// readConfig sets up global variables from environment values
func readConfig() {
var (
val string
ok bool
err error
info fs.FileInfo
val string
ok bool
err error
)

val, ok = os.LookupEnv("BIDS_HOOK_URL")
Expand Down Expand Up @@ -464,7 +472,7 @@ func readConfig() {
if err != nil {
log.Fatalf("invalid GITEA_CUSTOM: %v", err)
}
err = os.MkdirAll(filepath.Join(giteaCustom, "public"), 0750)
err = os.MkdirAll(filepath.Join(giteaCustom, "public", "bids-validator"), 0750)
if err != nil {
log.Fatalf("error creating output folder: %v", err)
}
Expand All @@ -486,12 +494,9 @@ func readConfig() {
if err != nil {
log.Fatalf("invalid WORKER_LOG_PATH: %v", err)
}
info, err = os.Stat(workerLogPath)
err = os.MkdirAll(workerLogPath, 0750)
if err != nil {
log.Fatalf("error opening WORKER_LOG_PATH: %v", err)
}
if !info.IsDir() {
log.Fatal("WORKER_LOG_PATH is not a directory")
log.Fatalf("error creating log folder: %v", err)
}

val, ok = os.LookupEnv("WORKER_QUEUE_CAPACITY")
Expand Down
133 changes: 128 additions & 5 deletions worker
Original file line number Diff line number Diff line change
@@ -1,8 +1,48 @@
#!/bin/bash
# -------------------------------------------------------------------
# Default bids-hook worker script
# -------------------------------------------------------------------
#
# If bids-hook is started with the environment variable WORKER_SCRIPT
# pointing to this script, then it will be executed by the worker
# once for each accepted job.
#
# In addition to inheriting all environment variables used to launch
# bids-hook, the script will be given the details of the job in the
# following environment variables:
#
# BH_USER, BH_REPO, BH_COMMIT, BH_UUID
#
# Any output generated by the script on stdout (file descriptor 1)
# is saved as a result page, overwriting the output of any previous
# run for the same UUID. The result page is linked from the commit
# status posted on Gitea, and will be visible to anyone who has a
# link. It should be a complete, properly formatted HTML document.
#
# Any output generated by the script on stderr (file descriptor 2)
# is appended to a log file, after the log output of any previous
# run for the same UUID, visible at the filesystem path:
#
# "${WORKER_LOG_PATH}/ab/cd/${BH_UUID}.log"
#
# where "abcd" are the first four characters of BH_UUID.
#
# The exit code of the script is interpreted as follows:
#
# 0 = "success" (green checkmark)
# 1 = "failure" (red "X" mark)
# 2 = "warning" (yellow "!" mark)
# 3+ = "internal error" (red "!" mark, no link to the result page)
#
# -------------------------------------------------------------------

# Exit with the "internal error" status if anything goes wrong.
set -o nounset -o pipefail
trap 'exit 3' ERR
mguaypaq marked this conversation as resolved.
Show resolved Hide resolved

# Log the job metadata to stderr.
(exec 1>&2
echo '=== job start ==='
date
cat <<EOF
BH_USER=${BH_USER}
Expand All @@ -12,13 +52,96 @@ BH_UUID=${BH_UUID}
EOF
)

sleep 5
# Canonicalize the GITEA_REPOSITORY_ROOT before changing directories
# in case it was a relative path.
GITEA_REPO=$(realpath --canonicalize-existing \
"${GITEA_REPOSITORY_ROOT}/${BH_USER}/${BH_REPO}.git")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because I wanted to be more transparent in the output, I wanted; which meant showing a relative path to the user, maybe.

I just did a quick test and git clone canonicalizes local paths; but git remote add doesn't. So..yeah I guess we need this either way; i didn't test with GITEA_REPOSITORY_ROOT set to a relative path I guess. And we'll just have to live with exposing full paths. Drone does, after all (but Drone mostly works in ephemeral docker containers)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. I actually ran my tests with relative paths, and everything failed, that's why I put this in. And since I wasn't trying to show this path to users, only in the admin logs, I hadn't thought about showing a pretty path. Good point.

echo 1>&2 "GITEA_REPO=${GITEA_REPO}"

# Do all the work in a private temporary directory.
WORKDIR=$(mktemp --directory --tmpdir bids-hook.XXXXXXXXXX)
echo 1>&2 "WORKDIR=${WORKDIR}"
# We have an ERR trap to account for this:
# shellcheck disable=SC2164
cd 1>&2 "$WORKDIR"
# We have an EXIT trap which makes this code reachable:
# shellcheck disable=SC2317
function cleanup() {
echo "# cleaning ${WORKDIR}"
chmod --recursive u+w "$WORKDIR"
rm --recursive "$WORKDIR"
} 1>&2
trap cleanup EXIT

# Check out the repository, redirecting all output to stderr.
(exec 1>&2
# We'd like to do 'git clone -b "$BH_COMMIT" --depth 1 "$GITEA_REPO"'
# but maybe "$BH_COMMIT" is a commit ID instead of a branch name, so
# we do it this way instead.
echo "# cloning ${GITEA_REPO}"
git init
git remote add origin "$GITEA_REPO"
git fetch --depth 1 origin "$BH_COMMIT"
git checkout "$BH_COMMIT"

# If this is a git-annex repository, we need to get the contents.
if git ls-remote --exit-code origin refs/heads/git-annex >/dev/null; then
echo '# getting git-annexed files'
# this reduces copies; always overrides annex.hardlink even if that is set system-wide
git config annex.thin true
# make sure we don't corrupt origin accidentally
git config remote.origin.annex-readonly true
git config annex.private true # XXX this doesn't do anything until git-annex 10
git annex init
git annex dead here # this is like annex.private, but has to be run
# grab the git-annex branch (since we did a shallow clone above)
git annex sync --only-annex --no-content
# NB: using 'copy --from origin' and not 'git annex get; to ensure we're
# validating the contents of origin and not any special remotes
git annex copy --from origin
fi
)

echo 1>&2 '# running bids-validator'
OUTPUT=$(bids-validator .) && STATUS=$? || STATUS=$?
WARNINGS=$(bids-validator . --json | jq '.issues.warnings | length') || true
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's disappointing to have to run this twice. bids-validator is pretty quick, even on large datasets, but I don't trust it to stay that way.

Is there another flag where maybe we could get the warnings routed...elsewhere?

Or can we patch bids-validator so that it exits:

  • 0 if no issues exist
  • 1 if issues exist and contain errors
  • 2 if issues exist

Does it already do that?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it already do that?

I don't think so. Looking at the source, here is the meaning of each exit status for bids-validator:

  • Exit 3 for internal errors (the equivalent of a traceback).
  • Exit 2 for problems opening the dataset directory.
  • Exit 1 if the .bids-validator-config.json is invalid or the dataset contains "error"-level issues.
  • Exit 0 otherwise, including if there are "warning"-level issues.

cat 1>&2 <<EOF
STATUS=${STATUS}
WARNINGS=${WARNINGS}
EOF

cat <<"EOF"
echo 1>&2 '# formatting output'
HTML=$(echo "$OUTPUT" | ansifilter --html --fragment --line-numbers --anchors=self)

# Produce the HTML result page on stdout.
cat <<EOF
<!DOCTYPE html>
<html lang=en>
<title>a title</title>
<p>a paragraph
<title>bids-validator results for ${BH_USER}/${BH_REPO}@${BH_COMMIT}</title>
<p>Here are the bids-validator results for ${BH_USER}/${BH_REPO}@${BH_COMMIT}:</p>
<pre>
${HTML}
</pre>
</html>
EOF

exit 0
# Our exit code means:
# 0 = "success" (green checkmark)
# 1 = "failure" (red "X" mark)
# 2 = "warning" (yellow "!" mark)
# 3+ = "internal error" (red "!" mark, no link to the result page)
case $STATUS in
(0)
if ((WARNINGS)); then
exit 2
else
exit 0
fi
;;
(1)
exit 1
;;
(*)
exit 3
;;
esac