diff --git a/.github/workflows/check_property_files.yml b/.github/workflows/check_property_files.yml new file mode 100644 index 00000000000..505310aab35 --- /dev/null +++ b/.github/workflows/check_property_files.yml @@ -0,0 +1,32 @@ +name: "Properties Check" +on: + pull_request: + paths: + - "src/**/*.properties" + - "scripts/api/data/metadatablocks/*" +jobs: + duplicate_keys: + name: Duplicate Keys + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Run duplicates detection script + shell: bash + run: tests/check_duplicate_properties.sh + + metadata_blocks_properties: + name: Metadata Blocks Properties + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Setup GraalVM + Native Image + uses: graalvm/setup-graalvm@v1 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + java-version: '21' + distribution: 'graalvm-community' + - name: Setup JBang + uses: jbangdev/setup-jbang@main + - name: Run metadata block properties verification script + shell: bash + run: tests/verify_mdb_properties.sh diff --git a/.github/workflows/shellspec.yml b/.github/workflows/shellspec.yml index 227a74fa00f..90b1486b512 100644 --- a/.github/workflows/shellspec.yml +++ b/.github/workflows/shellspec.yml @@ -24,23 +24,6 @@ jobs: run: | cd tests/shell shellspec - shellspec-centos7: - name: "CentOS 7" - runs-on: ubuntu-latest - container: - image: centos:7 - steps: - - uses: actions/checkout@v2 - - name: Install shellspec - run: | - curl -fsSL https://github.com/shellspec/shellspec/releases/download/${{ env.SHELLSPEC_VERSION }}/shellspec-dist.tar.gz | tar -xz -C /usr/share - ln -s /usr/share/shellspec/shellspec /usr/bin/shellspec - - name: Install dependencies - run: yum install -y ed - - name: Run shellspec - run: | - cd tests/shell - shellspec shellspec-rocky8: name: "RockyLinux 8" runs-on: ubuntu-latest diff --git a/src/main/java/propertyFiles/codeMeta20.properties b/src/main/java/propertyFiles/codeMeta20.properties index c0e7eac6d4a..4f3eb087aa4 100644 --- a/src/main/java/propertyFiles/codeMeta20.properties +++ b/src/main/java/propertyFiles/codeMeta20.properties @@ -1,5 +1,6 @@ metadatablock.name=codeMeta20 -metadatablock.displayName=Software Metadata (CodeMeta 2.0) +metadatablock.displayName=Software Metadata (CodeMeta v2.0) +metadatablock.displayFacet=Software datasetfieldtype.codeVersion.title=Software Version datasetfieldtype.codeVersion.description=Version of the software instance, usually following some convention like SemVer etc. datasetfieldtype.codeVersion.watermark=e.g. 0.2.1 or 1.3 or 2021.1 etc diff --git a/tests/check_duplicate_properties.sh b/tests/check_duplicate_properties.sh new file mode 100755 index 00000000000..7d053bdba4b --- /dev/null +++ b/tests/check_duplicate_properties.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# This script will check Java *.properties files within the src dir for duplicates +# and print logs with file annotations about it. + +set -euo pipefail + +FAIL=0 + +while IFS= read -r -d '' FILE; do + + # Scan the whole file for duplicates + FILTER=$(grep -a -v -E "^(#.*|\s*$)" "$FILE" | cut -d"=" -f1 | sort | uniq -c | tr -s " " | { grep -vs "^ 1 " || true; }) + + # If there are any duplicates present, analyse further to point people to the source + if [ -n "$FILTER" ]; then + FAIL=1 + + echo "::group::$FILE" + for KEY in $(echo "$FILTER" | cut -d" " -f3); do + # Find duplicate lines' numbers by grepping for the KEY and cutting the number from the output + DUPLICATE_LINES=$(grep -n -E -e "^$KEY=" "$FILE" | cut -d":" -f1) + # Join the found line numbers for better error log + DUPLICATE_NUMBERS=$(echo "$DUPLICATE_LINES" | paste -sd ',') + + # This form will make Github annotate the lines in the PR that changes the properties file + for LINE_NUMBER in $DUPLICATE_LINES; do + echo "::error file=$FILE,line=$LINE_NUMBER::Found duplicate for key '$KEY' in lines $DUPLICATE_NUMBERS" + done + done + echo "::endgroup::" + fi +done < <( find "$(git rev-parse --show-toplevel)" -wholename "*/src/*.properties" -print0 ) + +if [ "$FAIL" -eq 1 ]; then + exit 1 +fi diff --git a/tests/verify_mdb_properties.sh b/tests/verify_mdb_properties.sh new file mode 100755 index 00000000000..bc62c0f503a --- /dev/null +++ b/tests/verify_mdb_properties.sh @@ -0,0 +1,99 @@ +#!/bin/bash + +# This script will check our metadata block files and scan if the properties files contain all the matching keys. + +set -euo pipefail + +if ! which jbang > /dev/null 2>&1; then + echo "Cannot find jbang on path. Did you install it?" >&2 + exit 1 +fi +if ! which native-image > /dev/null 2>&1; then + echo "Cannot find GraalVM native-image on path. Did you install it?" >&2 + exit 1 +fi + +FAIL=0 + +# We need a small Java app here, replacing spaces, converting to lower case but especially to replace UTF-8 chars with nearest ascii / strip accents because of +# https://github.com/IQSS/dataverse/blob/dddcf29188a5c35174f3c94ffc1c4cb1d7fc0552/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValue.java#L139-L140 +# This cannot be replaced by another tool, as it behaves rather individually. +DIR=$(mktemp -d) +SOURCE="$DIR/stripaccents.java" +STRIP_BIN="$(dirname "$0")/stripaccents" +cat > "$SOURCE" << EOF +///usr/bin/env jbang "\$0" "\$@" ; exit \$? +//JAVA 11+ +//DEPS org.apache.commons:commons-lang3:3.12.0 +import org.apache.commons.lang3.StringUtils; +import java.nio.charset.StandardCharsets; +import java.io.IOException; +class stripaccents { + public static void main(String[] args) throws IOException { + String input = new String(System.in.readAllBytes(), StandardCharsets.UTF_8).toLowerCase().replace(" ", "_"); + System.out.println(StringUtils.stripAccents(input)); + } +} +EOF +jbang export native --force --fresh -O "$STRIP_BIN" "$SOURCE" + +while IFS= read -r -d '' MDB; do + + echo "::group::$MDB" + BLOCK_NAME=$(sed -n "2p" "$MDB" | cut -f2) + BLOCK_DISPLAY_NAME=$(sed -n "2p" "$MDB" | cut -f4) + PROPERTIES_FILE="$(git rev-parse --show-toplevel)/src/main/java/propertyFiles/$BLOCK_NAME.properties" + + # Check correct file exists + if [ ! -r "$PROPERTIES_FILE" ]; then + echo "::error::Missing properties file for metadata block '$BLOCK_NAME', expected at '$PROPERTIES_FILE'" + FAIL=1 + continue + fi + + # Check metadata block properties exist and are equal to TSV source + if ! grep -a -q -e "^metadatablock.name=$BLOCK_NAME$" "$PROPERTIES_FILE"; then + echo "::error::Missing 'metadatablock.name=$BLOCK_NAME' or different from TSV source in $PROPERTIES_FILE" + FAIL=1 + fi + if ! grep -a -q -e "^metadatablock.displayName=$BLOCK_DISPLAY_NAME$" "$PROPERTIES_FILE"; then + echo "::error::Missing 'metadatablock.displayName=$BLOCK_DISPLAY_NAME' or different from TSV source in $PROPERTIES_FILE" + FAIL=1 + fi + if ! grep -a -q -e "^metadatablock.displayFacet=" "$PROPERTIES_FILE"; then + echo "::error::Missing 'metadatablock.displayFacet=...' in $PROPERTIES_FILE" + FAIL=1 + fi + + # Check dataset fields + for FIELD in $(grep -a -A1000 "^#datasetField" "$MDB" | tail -n+2 | grep -a -B1000 "^#controlledVocabulary" | head -n-1 | cut -f2); do + for ENTRY in title description watermark; do + if ! grep -a -q -e "^datasetfieldtype.$FIELD.$ENTRY=" "$PROPERTIES_FILE"; then + echo "::error::Missing key 'datasetfieldtype.$FIELD.$ENTRY=...' in $PROPERTIES_FILE" + FAIL=1 + fi + done + done + + # Check CV entries + while read -r LINE; do + FIELD_NAME=$(echo "$LINE" | cut -f1) + # See https://github.com/IQSS/dataverse/blob/dddcf29188a5c35174f3c94ffc1c4cb1d7fc0552/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValue.java#L139-L140 + # Square brackets are special in grep with expressions activated, so escape them if present! + FIELD_VALUE=$(echo "$LINE" | cut -f2 | "$STRIP_BIN" | sed -e 's/\([][]\)/\\\1/g' ) + + if ! grep -q -a -e "^controlledvocabulary.$FIELD_NAME.$FIELD_VALUE=" "$PROPERTIES_FILE"; then + echo "::error::Missing key 'controlledvocabulary.$FIELD_NAME.$FIELD_VALUE=...' in $PROPERTIES_FILE" + FAIL=1 + fi + done < <(grep -a -A1000 "^#controlledVocabulary" "$MDB" | tail -n+2) + + echo "::endgroup::" + +done < <( find "$(git rev-parse --show-toplevel)/scripts/api/data/metadatablocks" -name '*.tsv' -print0 ) + +rm "$SOURCE" "$STRIP_BIN" + +if [ "$FAIL" -eq 1 ]; then + exit 1 +fi