Skip to content

Add naive hunspell cache? #72

Add naive hunspell cache?

Add naive hunspell cache? #72

Workflow file for this run

name: Build
on:
push:
paths:
- "data/**"
- ".github/workflows/build.yml"
pull_request:
branches:
- "main"
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.11"]
steps:
- name: Set paths
run: |
lt_dir="${{ github.workspace }}/lt"
echo "LT_HOME=${lt_dir}/languagetool" >> $GITHUB_ENV
echo "PT_DICT_HOME=${lt_dir}/portuguese-pos-dict" >> $GITHUB_ENV
echo "LT_TMP_DIR=${lt_dir}/tmp" >> $GITHUB_ENV
echo "HUNSPELL_DIR=${lt_dir}/hunspell" >> $GITHUB_ENV
- uses: actions/checkout@v3
with:
path: ${{ env.PT_DICT_HOME }}
repository: ${{ env.GITHUB_ACTION_REPOSITORY }}
fetch-depth: 0
submodules: recursive
- name: Get number of CPU cores
uses: SimenB/github-actions-cpu-cores@v1
id: cpu-cores
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Set up Poetry
uses: Gr1N/setup-poetry@v8
- name: Cache Poetry virtualenv
uses: actions/cache@v2
with:
path: ~/.cache/pypoetry/virtualenvs
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
- name: Check Poetry config
working-directory: ${{ env.PT_DICT_HOME }}
run: poetry check
# Ideally this will also install the 'dict_tools' submodule as a separate package ('dictionary-tools')
- name: Install Python dependencies
working-directory: ${{ env.PT_DICT_HOME }}
run: poetry install
env:
POETRY_HTTP_BASIC_GITLAB_USERNAME: "lt-ai-package-registry"
POETRY_HTTP_BASIC_GITLAB_PASSWORD: ${{ secrets.POETRY_HTTP_BASIC_GITLAB_PASSWORD }}
- name: Set dictionary version
working-directory: ${{ env.PT_DICT_HOME }}
run: echo "PT_DICT_VERSION=test-$(date "+%Y-%m-%d")-$RANDOM" >> $GITHUB_ENV
- name: Set up Perl
uses: shogo82148/actions-setup-perl@v1
with:
perl-version: "5.38"
- name: Install Perl dependencies
run: cpan install "Switch" "Text::Unaccent::PurePerl"
- name: Checkout Hunspell
uses: actions/checkout@v3
with:
repository: hunspell/hunspell
path: ${{ env.HUNSPELL_DIR }}
- name: Cache Hunspell
uses: actions/cache@v2
with:
path: |
/usr/local/bin/unmunch
/usr/local/bin/hunspell
key: lt-build-${{ hashFiles('lt/hunspell/**') }}
id: hunspell-cache
- name: Compile Hunspell
if: steps.hunspell-cache.outputs.cache-hit != 'true'
working-directory: ${{ env.HUNSPELL_DIR }}
run: |
sudo apt-get install autoconf automake autopoint libtool
autoreconf -vfi
./configure
make
sudo make install
sudo ldconfig
which unmunch
- name: Set up JDK 11 for x64
uses: actions/setup-java@v3
with:
java-version: '11'
distribution: 'temurin'
architecture: x64
- name: Checkout LT
uses: actions/checkout@v3
with:
repository: languagetool-org/languagetool
path: ${{ env.LT_HOME }}
token: ${{ secrets.LT_OS_TOKEN }}
ref: pt/dict/new_tokenisation
- name: Cache LT Build
uses: actions/cache@v2
with:
path: |
${{ env.LT_HOME }}/languagetool-standalone/target
${{ env.LT_HOME }}/languagetool-dev/target
~/.m2
key: lt-build-${{ hashFiles('lt/languagetool/pom.xml', 'lt/languagetool/**/*.java') }}
id: lt-cache
- if: steps.lt-cache.outputs.cache-hit != 'true'
name: Build LT
run: mvn clean install -DskipTests
working-directory: ${{ env.LT_HOME }}
- if: steps.lt-cache.outputs.cache-hit != 'true'
name: Build LT Dev
run: mvn clean compile assembly:single
working-directory: ${{ env.LT_HOME }}/languagetool-dev
- name: Build POS tagging dictionary
working-directory: ${{ env.PT_DICT_HOME }}
run: |
poetry run python "dict_tools/scripts/build_tagger_dicts.py" \
--language pt
--install-version "${{ env.PT_DICT_VERSION }}"
- name: Update dictionary version in LT pom.xml
working-directory: ${{ env.PT_DICT_HOME }}
run: |
poetry run python "dict_tools/scripts/update_pom.py" \
--new-version "${{ env.PT_DICT_VERSION }}" \
--package-name "portuguese-pos-dict"
# This step will also re-compile LT with the new dictionary version in pom.xml
- name: Build spelling dictionaries
working-directory: ${{ env.PT_DICT_HOME }}
run: |
mkdir -p "${{ env.LT_TMP_DIR }}/compounds"
poetry run python "dict_tools/scripts/build_spelling_dicts.py" \
--language pt \
--tmp-dir "${{ env.LT_TMP_DIR }}" \
--force-install \
--install-version "${{ env.PT_DICT_VERSION }}" \
--max-threads "${{ steps.cpu-cores.outputs.count }}"
# TODO: uncomment after testing
# - name: Test LT
# working-directory: ${{ env.LT_HOME }}
# # If it fails, keep going; TODO: change this to 'false' after testing
# continue-on-error: true
# run: |
# mvn clean install -DskipTests
# ./build.sh pt test
- name: Archive Java src for deployment
# TODO: uncomment after this is tested to limit archiving to PRs to main
# TODO: add extra condition here to check for success of tests??
# Only do it after a successful build and push to main!
# if: github.event_name == 'push' && github.ref == 'refs/heads/main'
uses: actions/upload-artifact@v4
with:
name: src
path: ${{ env.PT_DICT_HOME }}/results/java-lt/src