From ea057289548d40ca65cdaf04457002180f0c340b Mon Sep 17 00:00:00 2001 From: Clemens Schmid Date: Fri, 27 Oct 2023 15:21:56 +0200 Subject: [PATCH] added a check for file encoding to the default validation --- .github/workflows/validation.yml | 3 +++ checkFileEncoding.sh | 37 ++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100755 checkFileEncoding.sh diff --git a/.github/workflows/validation.yml b/.github/workflows/validation.yml index 564a1244..a6022a0a 100644 --- a/.github/workflows/validation.yml +++ b/.github/workflows/validation.yml @@ -33,3 +33,6 @@ jobs: run: git lfs fsck --pointers working-directory: ./data + - name: Check if all files have UTF-8 encoding and Unix line endings + run: ./checkFileEncoding.sh + working-directory: ./data diff --git a/checkFileEncoding.sh b/checkFileEncoding.sh new file mode 100755 index 00000000..a1fa2121 --- /dev/null +++ b/checkFileEncoding.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# function to check if a file is UTF-8 encoded +is_utf8_encoded() { + if iconv -f utf8 "$1" -t utf8 -o /dev/null 2>/dev/null; then + return 0 + else + return 1 + fi +} + +# function to check if a file has Unix line endings (LF) +has_unix_line_endings() { + if [[ $(file "$1") == *CR* ]]; then + return 1 + else + return 0 + fi +} + +# file extensions to search for +extensions=("yml" "bib" "janno" "ssf" "txt" "md" "fam" "ind") + +# initialize exit code +exit_code=0 + +# recursive search for files and check encoding and line endings +for ext in "${extensions[@]}"; do + while IFS= read -r -d '' file; do + if ! is_utf8_encoded "$file" || ! has_unix_line_endings "$file"; then + echo "FAIL: $file" + exit_code=1 + fi + done < <(find . -type f -name "*.$ext" -not -path "./.git*" -print0) +done + +exit $exit_code