forked from IQSS/dataverse
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
check for duplicate keys in Java properties files, fix CodeMeta displ…
…ayName (IQSS#9176) * ci: add workflow to check for duplicate keys in Java properties files IQSS#9169 * ci: add metadata blocks properties file check As creating the properties files for metadata blocks is a tedious manual process, this script ensures in CI everything is present. It adds to checking for duplicates. * feat,ci: move check scripts to separate files - Easier to run and debug as separate files - Can be used locally, too - Use GraalVM to compile native binary for accents removal with same Java code as used in application (also uses JBang as build system) - "Just" using JBang is not fast enough, JVM startup times are making it sluggish! * fix,ci: remove shellspec tests on CentOS 7 as it is EOL * test: add a stupid change to test checks run and find problems * ci: fix JBang setup in properties check * fix: make properties check script dir-safe * fix: escape square brackets in pattern * refactor: move CV mangling (lowercase, spaces) to Java * fix(metadata): typo and missing property for CodeMeta * Revert "test: add a stupid change to test checks run and find problems" This reverts commit 33731ad. * file for QA only. DO NOT CHECK IN * file for QA only. DO NOT CHECK IN * file for QA only. DO NOT CHECK IN * Removing QA test files. Safe to merge --------- Co-authored-by: Philip Durbin <[email protected]> Co-authored-by: Steven Winship <[email protected]>
- Loading branch information
1 parent
29d96de
commit c5a6a8f
Showing
5 changed files
with
170 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
name: "Properties Check" | ||
on: | ||
pull_request: | ||
paths: | ||
- "src/**/*.properties" | ||
- "scripts/api/data/metadatablocks/*" | ||
jobs: | ||
duplicate_keys: | ||
name: Duplicate Keys | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- name: Run duplicates detection script | ||
shell: bash | ||
run: tests/check_duplicate_properties.sh | ||
|
||
metadata_blocks_properties: | ||
name: Metadata Blocks Properties | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- name: Setup GraalVM + Native Image | ||
uses: graalvm/setup-graalvm@v1 | ||
with: | ||
github-token: ${{ secrets.GITHUB_TOKEN }} | ||
java-version: '21' | ||
distribution: 'graalvm-community' | ||
- name: Setup JBang | ||
uses: jbangdev/setup-jbang@main | ||
- name: Run metadata block properties verification script | ||
shell: bash | ||
run: tests/verify_mdb_properties.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#!/bin/bash | ||
|
||
# This script will check Java *.properties files within the src dir for duplicates | ||
# and print logs with file annotations about it. | ||
|
||
set -euo pipefail | ||
|
||
FAIL=0 | ||
|
||
while IFS= read -r -d '' FILE; do | ||
|
||
# Scan the whole file for duplicates | ||
FILTER=$(grep -a -v -E "^(#.*|\s*$)" "$FILE" | cut -d"=" -f1 | sort | uniq -c | tr -s " " | { grep -vs "^ 1 " || true; }) | ||
|
||
# If there are any duplicates present, analyse further to point people to the source | ||
if [ -n "$FILTER" ]; then | ||
FAIL=1 | ||
|
||
echo "::group::$FILE" | ||
for KEY in $(echo "$FILTER" | cut -d" " -f3); do | ||
# Find duplicate lines' numbers by grepping for the KEY and cutting the number from the output | ||
DUPLICATE_LINES=$(grep -n -E -e "^$KEY=" "$FILE" | cut -d":" -f1) | ||
# Join the found line numbers for better error log | ||
DUPLICATE_NUMBERS=$(echo "$DUPLICATE_LINES" | paste -sd ',') | ||
|
||
# This form will make Github annotate the lines in the PR that changes the properties file | ||
for LINE_NUMBER in $DUPLICATE_LINES; do | ||
echo "::error file=$FILE,line=$LINE_NUMBER::Found duplicate for key '$KEY' in lines $DUPLICATE_NUMBERS" | ||
done | ||
done | ||
echo "::endgroup::" | ||
fi | ||
done < <( find "$(git rev-parse --show-toplevel)" -wholename "*/src/*.properties" -print0 ) | ||
|
||
if [ "$FAIL" -eq 1 ]; then | ||
exit 1 | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
#!/bin/bash | ||
|
||
# This script will check our metadata block files and scan if the properties files contain all the matching keys. | ||
|
||
set -euo pipefail | ||
|
||
if ! which jbang > /dev/null 2>&1; then | ||
echo "Cannot find jbang on path. Did you install it?" >&2 | ||
exit 1 | ||
fi | ||
if ! which native-image > /dev/null 2>&1; then | ||
echo "Cannot find GraalVM native-image on path. Did you install it?" >&2 | ||
exit 1 | ||
fi | ||
|
||
FAIL=0 | ||
|
||
# We need a small Java app here, replacing spaces, converting to lower case but especially to replace UTF-8 chars with nearest ascii / strip accents because of | ||
# https://github.com/IQSS/dataverse/blob/dddcf29188a5c35174f3c94ffc1c4cb1d7fc0552/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValue.java#L139-L140 | ||
# This cannot be replaced by another tool, as it behaves rather individually. | ||
DIR=$(mktemp -d) | ||
SOURCE="$DIR/stripaccents.java" | ||
STRIP_BIN="$(dirname "$0")/stripaccents" | ||
cat > "$SOURCE" << EOF | ||
///usr/bin/env jbang "\$0" "\$@" ; exit \$? | ||
//JAVA 11+ | ||
//DEPS org.apache.commons:commons-lang3:3.12.0 | ||
import org.apache.commons.lang3.StringUtils; | ||
import java.nio.charset.StandardCharsets; | ||
import java.io.IOException; | ||
class stripaccents { | ||
public static void main(String[] args) throws IOException { | ||
String input = new String(System.in.readAllBytes(), StandardCharsets.UTF_8).toLowerCase().replace(" ", "_"); | ||
System.out.println(StringUtils.stripAccents(input)); | ||
} | ||
} | ||
EOF | ||
jbang export native --force --fresh -O "$STRIP_BIN" "$SOURCE" | ||
|
||
while IFS= read -r -d '' MDB; do | ||
|
||
echo "::group::$MDB" | ||
BLOCK_NAME=$(sed -n "2p" "$MDB" | cut -f2) | ||
BLOCK_DISPLAY_NAME=$(sed -n "2p" "$MDB" | cut -f4) | ||
PROPERTIES_FILE="$(git rev-parse --show-toplevel)/src/main/java/propertyFiles/$BLOCK_NAME.properties" | ||
|
||
# Check correct file exists | ||
if [ ! -r "$PROPERTIES_FILE" ]; then | ||
echo "::error::Missing properties file for metadata block '$BLOCK_NAME', expected at '$PROPERTIES_FILE'" | ||
FAIL=1 | ||
continue | ||
fi | ||
|
||
# Check metadata block properties exist and are equal to TSV source | ||
if ! grep -a -q -e "^metadatablock.name=$BLOCK_NAME$" "$PROPERTIES_FILE"; then | ||
echo "::error::Missing 'metadatablock.name=$BLOCK_NAME' or different from TSV source in $PROPERTIES_FILE" | ||
FAIL=1 | ||
fi | ||
if ! grep -a -q -e "^metadatablock.displayName=$BLOCK_DISPLAY_NAME$" "$PROPERTIES_FILE"; then | ||
echo "::error::Missing 'metadatablock.displayName=$BLOCK_DISPLAY_NAME' or different from TSV source in $PROPERTIES_FILE" | ||
FAIL=1 | ||
fi | ||
if ! grep -a -q -e "^metadatablock.displayFacet=" "$PROPERTIES_FILE"; then | ||
echo "::error::Missing 'metadatablock.displayFacet=...' in $PROPERTIES_FILE" | ||
FAIL=1 | ||
fi | ||
|
||
# Check dataset fields | ||
for FIELD in $(grep -a -A1000 "^#datasetField" "$MDB" | tail -n+2 | grep -a -B1000 "^#controlledVocabulary" | head -n-1 | cut -f2); do | ||
for ENTRY in title description watermark; do | ||
if ! grep -a -q -e "^datasetfieldtype.$FIELD.$ENTRY=" "$PROPERTIES_FILE"; then | ||
echo "::error::Missing key 'datasetfieldtype.$FIELD.$ENTRY=...' in $PROPERTIES_FILE" | ||
FAIL=1 | ||
fi | ||
done | ||
done | ||
|
||
# Check CV entries | ||
while read -r LINE; do | ||
FIELD_NAME=$(echo "$LINE" | cut -f1) | ||
# See https://github.com/IQSS/dataverse/blob/dddcf29188a5c35174f3c94ffc1c4cb1d7fc0552/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValue.java#L139-L140 | ||
# Square brackets are special in grep with expressions activated, so escape them if present! | ||
FIELD_VALUE=$(echo "$LINE" | cut -f2 | "$STRIP_BIN" | sed -e 's/\([][]\)/\\\1/g' ) | ||
|
||
if ! grep -q -a -e "^controlledvocabulary.$FIELD_NAME.$FIELD_VALUE=" "$PROPERTIES_FILE"; then | ||
echo "::error::Missing key 'controlledvocabulary.$FIELD_NAME.$FIELD_VALUE=...' in $PROPERTIES_FILE" | ||
FAIL=1 | ||
fi | ||
done < <(grep -a -A1000 "^#controlledVocabulary" "$MDB" | tail -n+2) | ||
|
||
echo "::endgroup::" | ||
|
||
done < <( find "$(git rev-parse --show-toplevel)/scripts/api/data/metadatablocks" -name '*.tsv' -print0 ) | ||
|
||
rm "$SOURCE" "$STRIP_BIN" | ||
|
||
if [ "$FAIL" -eq 1 ]; then | ||
exit 1 | ||
fi |