Skip to content

[pt-PT] Added words to both dictionaries (.dic) #176

[pt-PT] Added words to both dictionaries (.dic)

[pt-PT] Added words to both dictionaries (.dic) #176

Workflow file for this run

name: Build
on:
push:
paths:
- "data/**"
- "pos_tagger_scripts/build-lt.sh"
- "lt-changes/**"
- ".github/workflows/build.yml"
pull_request:
branches:
- "main"
workflow_dispatch: {}
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.11"]
env:
LT_BRANCH: "pt/dict/v016"
steps:
- name: Set paths
run: |
lt_dir="${{ github.workspace }}/lt"
echo "LT_HOME=${lt_dir}/languagetool" >> $GITHUB_ENV
echo "PT_DICT_HOME=${lt_dir}/portuguese-pos-dict" >> $GITHUB_ENV
echo "LT_TMP_DIR=${lt_dir}/tmp" >> $GITHUB_ENV
echo "HUNSPELL_DIR=${lt_dir}/hunspell" >> $GITHUB_ENV
- uses: actions/checkout@v3
with:
path: ${{ env.PT_DICT_HOME }}
repository: ${{ env.GITHUB_ACTION_REPOSITORY }}
fetch-depth: 0
submodules: recursive
- name: Get number of CPU cores
uses: SimenB/github-actions-cpu-cores@v1
id: cpu-cores
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Set up Poetry
uses: Gr1N/setup-poetry@v8
- name: Check Poetry config
working-directory: ${{ env.PT_DICT_HOME }}
run: poetry check
- name: Install Python dependencies
working-directory: ${{ env.PT_DICT_HOME }}
run: poetry install
env:
POETRY_HTTP_BASIC_GITLAB_USERNAME: "lt-ai-package-registry"
POETRY_HTTP_BASIC_GITLAB_PASSWORD: ${{ secrets.POETRY_HTTP_BASIC_GITLAB_PASSWORD }}
- name: Set dictionary version
working-directory: ${{ env.PT_DICT_HOME }}
run: echo "PT_DICT_VERSION=test-$(date "+%Y-%m-%d")-$RANDOM" >> $GITHUB_ENV
- name: Set up Perl
uses: shogo82148/actions-setup-perl@v1
with:
perl-version: "5.38"
- name: Install Perl dependencies
run: cpan install "Switch" "Text::Unaccent::PurePerl"
- name: Checkout Hunspell
uses: actions/checkout@v3
with:
repository: hunspell/hunspell
path: ${{ env.HUNSPELL_DIR }}
- name: Cache Hunspell
uses: actions/cache@v4
with:
path: |
$HOME/local/bin/unmunch
$HOME/local/bin/hunspell
key: hunspell-${{ hashFiles('lt/hunspell/src', 'lt/hunspell/po', 'lt/hunspell/m4', 'lt/hunspell/Makefile.am', 'lt/hunspell/configure.ac') }}
save-always: true
id: hunspell-cache
- name: Compile Hunspell
if: steps.hunspell-cache.outputs.cache-hit != 'true'
working-directory: ${{ env.HUNSPELL_DIR }}
run: |
sudo apt-get install autoconf automake autopoint libtool
autoreconf -vfi
./configure --prefix $HOME/local
make
sudo make install
sudo ldconfig
export PATH=$HOME/local/bin:$PATH
which unmunch
- name: Set up JDK 11 for x64
uses: actions/setup-java@v3
with:
java-version: '11'
distribution: 'temurin'
architecture: x64
- name: Checkout LT
uses: actions/checkout@v3
with:
repository: languagetool-org/languagetool
path: ${{ env.LT_HOME }}
token: ${{ secrets.LT_OS_TOKEN }}
ref: ${{ env.LT_BRANCH }}
- name: Cache LT Build
uses: actions/cache@v4
with:
path: |
${{ env.LT_HOME }}/languagetool-standalone/target
${{ env.LT_HOME }}/languagetool-dev/target
~/.m2
key: lt-build-${{ hashFiles('lt/languagetool/pom.xml', 'lt/languagetool/**/*.java') }}
save-always: true
id: lt-cache
- name: Build LT
if: steps.lt-cache.outputs.cache-hit != 'true'
run: mvn clean install -DskipTests
working-directory: ${{ env.LT_HOME }}
- name: Build LT Dev
if: steps.lt-cache.outputs.cache-hit != 'true'
run: mvn clean compile assembly:single
working-directory: ${{ env.LT_HOME }}/languagetool-dev
- name: Build POS tagging dictionary
working-directory: ${{ env.PT_DICT_HOME }}
run: |
export PATH=$HOME/local/bin:$PATH
poetry run python "dict_tools/scripts/build_tagger_dicts.py" \
--language pt \
--no-force-compile \
--install-version "${{ env.PT_DICT_VERSION }}" \
--force-install
- name: Update dictionary version in LT pom.xml
working-directory: ${{ env.PT_DICT_HOME }}
run: |
poetry run python "dict_tools/scripts/update_pom.py" \
--new-version "${{ env.PT_DICT_VERSION }}" \
--package-name "portuguese-pos-dict"
# This step will also re-compile LT with the new dictionary version in pom.xml
- name: Build spelling dictionaries
working-directory: ${{ env.PT_DICT_HOME }}
run: |
mkdir -p "${{ env.LT_TMP_DIR }}/compounds"
export PATH=$HOME/local/bin:$PATH
poetry run python "dict_tools/scripts/build_spelling_dicts.py" \
--language pt \
--tmp-dir "${{ env.LT_TMP_DIR }}" \
--force-install \
--install-version "${{ env.PT_DICT_VERSION }}" \
--max-threads "${{ steps.cpu-cores.outputs.count }}"
- name: Test LT
working-directory: ${{ env.LT_HOME }}
if: >
(contains(github.event.pull_request.head.commit.message, '[skip lt test]') == false) &&
(contains(github.event.pull_request.labels.*.name, 'skip_lt_test') == false)
run: |
mvn clean install -DskipTests
./build.sh pt test
- name: Archive binaries
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
uses: actions/upload-artifact@v4
with:
name: src
path: ${{ env.PT_DICT_HOME }}/results/java-lt/src
deploy:
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
runs-on: ubuntu-latest
needs: build
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
fetch-depth: 0
# Necessary since downloading artefacts with an 'overwrite' option is not supported
- name: Empty out src directory
run: rm -rf results/java-lt/src/*
- name: Download artefact from upstream workflow
uses: actions/download-artifact@v4
with:
name: src
path: results/java-lt/src/
- name: Set dictionary version
run: echo "PT_DICT_VERSION=$(git describe --tags --abbrev=0 | sed 's/^v//g')" >> $GITHUB_ENV
- name: Set up JDK 11 for x64
uses: actions/setup-java@v3
with:
java-version: '11'
distribution: 'temurin'
architecture: x64
- name: Import GPG Key
run: echo "${{ secrets.GPG_PRIVATE_KEY }}" | gpg --batch --import
- name: Deploy to SonaType
env:
GPG_KEYNAME: ${{ secrets.GPG_KEYNAME }}
GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
SONATYPE_USERNAME: ${{ secrets.SONATYPE_USERNAME }}
SONATYPE_PASSWORD: ${{ secrets.SONATYPE_PASSWORD }}
working-directory: results/java-lt
run: mvn clean deploy -P release -s ${{ github.workspace }}/results/java-lt/settings.xml