-
Notifications
You must be signed in to change notification settings - Fork 493
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
check for duplicate keys in Java properties files, fix CodeMeta displayName #9176
Changes from 12 commits
3806b9c
07d67ac
f4b61bf
ec6a252
33731ad
470b490
d709d23
1ed25f3
68a1b35
1aeb665
fe6c2d3
8b1b7ce
7bd9364
7c63352
4fe1d25
085f628
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
name: "Properties Check" | ||
on: | ||
pull_request: | ||
paths: | ||
- "src/**/*.properties" | ||
- "scripts/api/data/metadatablocks/*" | ||
jobs: | ||
duplicate_keys: | ||
name: Duplicate Keys | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- name: Run duplicates detection script | ||
shell: bash | ||
run: tests/check_duplicate_properties.sh | ||
|
||
metadata_blocks_properties: | ||
name: Metadata Blocks Properties | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- name: Setup GraalVM + Native Image | ||
uses: graalvm/setup-graalvm@v1 | ||
with: | ||
github-token: ${{ secrets.GITHUB_TOKEN }} | ||
java-version: '21' | ||
distribution: 'graalvm-community' | ||
- name: Setup JBang | ||
uses: jbangdev/setup-jbang@main | ||
- name: Run metadata block properties verification script | ||
shell: bash | ||
run: tests/verify_mdb_properties.sh |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#!/bin/bash | ||
|
||
# This script will check Java *.properties files within the src dir for duplicates | ||
# and print logs with file annotations about it. | ||
|
||
set -euo pipefail | ||
|
||
FAIL=0 | ||
|
||
while IFS= read -r -d '' FILE; do | ||
|
||
# Scan the whole file for duplicates | ||
FILTER=$(grep -a -v -E "^(#.*|\s*$)" "$FILE" | cut -d"=" -f1 | sort | uniq -c | tr -s " " | { grep -vs "^ 1 " || true; }) | ||
|
||
# If there are any duplicates present, analyse further to point people to the source | ||
if [ -n "$FILTER" ]; then | ||
FAIL=1 | ||
|
||
echo "::group::$FILE" | ||
for KEY in $(echo "$FILTER" | cut -d" " -f3); do | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On Mac I get There's a long discussion at https://stackoverflow.com/questions/19242275/re-error-illegal-byte-sequence-on-mac-os-x I dunno, I'm ok with this only working on Linux and in our CI. |
||
# Find duplicate lines' numbers by grepping for the KEY and cutting the number from the output | ||
DUPLICATE_LINES=$(grep -n -E -e "^$KEY=" "$FILE" | cut -d":" -f1) | ||
# Join the found line numbers for better error log | ||
DUPLICATE_NUMBERS=$(echo "$DUPLICATE_LINES" | paste -sd ',') | ||
|
||
# This form will make Github annotate the lines in the PR that changes the properties file | ||
for LINE_NUMBER in $DUPLICATE_LINES; do | ||
echo "::error file=$FILE,line=$LINE_NUMBER::Found duplicate for key '$KEY' in lines $DUPLICATE_NUMBERS" | ||
done | ||
done | ||
echo "::endgroup::" | ||
fi | ||
done < <( find "$(git rev-parse --show-toplevel)" -wholename "*/src/*.properties" -print0 ) | ||
|
||
if [ "$FAIL" -eq 1 ]; then | ||
exit 1 | ||
fi |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
#!/bin/bash | ||
|
||
# This script will check our metadata block files and scan if the properties files contain all the matching keys. | ||
|
||
set -euo pipefail | ||
|
||
if ! which jbang > /dev/null 2>&1; then | ||
echo "Cannot find jbang on path. Did you install it?" >&2 | ||
exit 1 | ||
fi | ||
if ! which native-image > /dev/null 2>&1; then | ||
echo "Cannot find GraalVM native-image on path. Did you install it?" >&2 | ||
exit 1 | ||
fi | ||
|
||
FAIL=0 | ||
|
||
# We need a small Java app here, replacing spaces, converting to lower case but especially to replace UTF-8 chars with nearest ascii / strip accents because of | ||
# https://github.com/IQSS/dataverse/blob/dddcf29188a5c35174f3c94ffc1c4cb1d7fc0552/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValue.java#L139-L140 | ||
# This cannot be replaced by another tool, as it behaves rather individually. | ||
DIR=$(mktemp -d) | ||
SOURCE="$DIR/stripaccents.java" | ||
STRIP_BIN="$(dirname "$0")/stripaccents" | ||
cat > "$SOURCE" << EOF | ||
///usr/bin/env jbang "\$0" "\$@" ; exit \$? | ||
//JAVA 11+ | ||
//DEPS org.apache.commons:commons-lang3:3.12.0 | ||
import org.apache.commons.lang3.StringUtils; | ||
import java.nio.charset.StandardCharsets; | ||
import java.io.IOException; | ||
class stripaccents { | ||
public static void main(String[] args) throws IOException { | ||
String input = new String(System.in.readAllBytes(), StandardCharsets.UTF_8).toLowerCase().replace(" ", "_"); | ||
System.out.println(StringUtils.stripAccents(input)); | ||
} | ||
} | ||
EOF | ||
jbang export native --force --fresh -O "$STRIP_BIN" "$SOURCE" | ||
|
||
while IFS= read -r -d '' MDB; do | ||
|
||
echo "::group::$MDB" | ||
BLOCK_NAME=$(sed -n "2p" "$MDB" | cut -f2) | ||
BLOCK_DISPLAY_NAME=$(sed -n "2p" "$MDB" | cut -f4) | ||
PROPERTIES_FILE="$(git rev-parse --show-toplevel)/src/main/java/propertyFiles/$BLOCK_NAME.properties" | ||
|
||
# Check correct file exists | ||
if [ ! -r "$PROPERTIES_FILE" ]; then | ||
echo "::error::Missing properties file for metadata block '$BLOCK_NAME', expected at '$PROPERTIES_FILE'" | ||
FAIL=1 | ||
continue | ||
fi | ||
|
||
# Check metadata block properties exist and are equal to TSV source | ||
if ! grep -a -q -e "^metadatablock.name=$BLOCK_NAME$" "$PROPERTIES_FILE"; then | ||
echo "::error::Missing 'metadatablock.name=$BLOCK_NAME' or different from TSV source in $PROPERTIES_FILE" | ||
FAIL=1 | ||
fi | ||
if ! grep -a -q -e "^metadatablock.displayName=$BLOCK_DISPLAY_NAME$" "$PROPERTIES_FILE"; then | ||
echo "::error::Missing 'metadatablock.displayName=$BLOCK_DISPLAY_NAME' or different from TSV source in $PROPERTIES_FILE" | ||
FAIL=1 | ||
fi | ||
if ! grep -a -q -e "^metadatablock.displayFacet=" "$PROPERTIES_FILE"; then | ||
echo "::error::Missing 'metadatablock.displayFacet=...' in $PROPERTIES_FILE" | ||
FAIL=1 | ||
fi | ||
|
||
# Check dataset fields | ||
for FIELD in $(grep -a -A1000 "^#datasetField" "$MDB" | tail -n+2 | grep -a -B1000 "^#controlledVocabulary" | head -n-1 | cut -f2); do | ||
for ENTRY in title description watermark; do | ||
if ! grep -a -q -e "^datasetfieldtype.$FIELD.$ENTRY=" "$PROPERTIES_FILE"; then | ||
echo "::error::Missing key 'datasetfieldtype.$FIELD.$ENTRY=...' in $PROPERTIES_FILE" | ||
FAIL=1 | ||
fi | ||
done | ||
done | ||
|
||
# Check CV entries | ||
while read -r LINE; do | ||
FIELD_NAME=$(echo "$LINE" | cut -f1) | ||
# See https://github.com/IQSS/dataverse/blob/dddcf29188a5c35174f3c94ffc1c4cb1d7fc0552/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValue.java#L139-L140 | ||
# Square brackets are special in grep with expressions activated, so escape them if present! | ||
FIELD_VALUE=$(echo "$LINE" | cut -f2 | "$STRIP_BIN" | sed -e 's/\([][]\)/\\\1/g' ) | ||
|
||
if ! grep -q -a -e "^controlledvocabulary.$FIELD_NAME.$FIELD_VALUE=" "$PROPERTIES_FILE"; then | ||
echo "::error::Missing key 'controlledvocabulary.$FIELD_NAME.$FIELD_VALUE=...' in $PROPERTIES_FILE" | ||
FAIL=1 | ||
fi | ||
done < <(grep -a -A1000 "^#controlledVocabulary" "$MDB" | tail -n+2) | ||
|
||
echo "::endgroup::" | ||
|
||
done < <( find "$(git rev-parse --show-toplevel)/scripts/api/data/metadatablocks" -name '*.tsv' -print0 ) | ||
|
||
rm "$SOURCE" "$STRIP_BIN" | ||
|
||
if [ "$FAIL" -eq 1 ]; then | ||
exit 1 | ||
fi |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a good fix, making it the same as what's in scripts/api/data/metadatablocks/codemeta.tsv