Add naive hunspell cache? #72
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Build | |
on: | |
push: | |
paths: | |
- "data/**" | |
- ".github/workflows/build.yml" | |
pull_request: | |
branches: | |
- "main" | |
jobs: | |
build: | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
python-version: ["3.11"] | |
steps: | |
- name: Set paths | |
run: | | |
lt_dir="${{ github.workspace }}/lt" | |
echo "LT_HOME=${lt_dir}/languagetool" >> $GITHUB_ENV | |
echo "PT_DICT_HOME=${lt_dir}/portuguese-pos-dict" >> $GITHUB_ENV | |
echo "LT_TMP_DIR=${lt_dir}/tmp" >> $GITHUB_ENV | |
echo "HUNSPELL_DIR=${lt_dir}/hunspell" >> $GITHUB_ENV | |
- uses: actions/checkout@v3 | |
with: | |
path: ${{ env.PT_DICT_HOME }} | |
repository: ${{ env.GITHUB_ACTION_REPOSITORY }} | |
fetch-depth: 0 | |
submodules: recursive | |
- name: Get number of CPU cores | |
uses: SimenB/github-actions-cpu-cores@v1 | |
id: cpu-cores | |
- name: Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Set up Poetry | |
uses: Gr1N/setup-poetry@v8 | |
- name: Cache Poetry virtualenv | |
uses: actions/cache@v2 | |
with: | |
path: ~/.cache/pypoetry/virtualenvs | |
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }} | |
- name: Check Poetry config | |
working-directory: ${{ env.PT_DICT_HOME }} | |
run: poetry check | |
# Ideally this will also install the 'dict_tools' submodule as a separate package ('dictionary-tools') | |
- name: Install Python dependencies | |
working-directory: ${{ env.PT_DICT_HOME }} | |
run: poetry install | |
env: | |
POETRY_HTTP_BASIC_GITLAB_USERNAME: "lt-ai-package-registry" | |
POETRY_HTTP_BASIC_GITLAB_PASSWORD: ${{ secrets.POETRY_HTTP_BASIC_GITLAB_PASSWORD }} | |
- name: Set dictionary version | |
working-directory: ${{ env.PT_DICT_HOME }} | |
run: echo "PT_DICT_VERSION=test-$(date "+%Y-%m-%d")-$RANDOM" >> $GITHUB_ENV | |
- name: Set up Perl | |
uses: shogo82148/actions-setup-perl@v1 | |
with: | |
perl-version: "5.38" | |
- name: Install Perl dependencies | |
run: cpan install "Switch" "Text::Unaccent::PurePerl" | |
- name: Checkout Hunspell | |
uses: actions/checkout@v3 | |
with: | |
repository: hunspell/hunspell | |
path: ${{ env.HUNSPELL_DIR }} | |
- name: Cache Hunspell | |
uses: actions/cache@v2 | |
with: | |
path: | | |
/usr/local/bin/unmunch | |
/usr/local/bin/hunspell | |
key: lt-build-${{ hashFiles('lt/hunspell/**') }} | |
id: hunspell-cache | |
- name: Compile Hunspell | |
if: steps.hunspell-cache.outputs.cache-hit != 'true' | |
working-directory: ${{ env.HUNSPELL_DIR }} | |
run: | | |
sudo apt-get install autoconf automake autopoint libtool | |
autoreconf -vfi | |
./configure | |
make | |
sudo make install | |
sudo ldconfig | |
which unmunch | |
- name: Set up JDK 11 for x64 | |
uses: actions/setup-java@v3 | |
with: | |
java-version: '11' | |
distribution: 'temurin' | |
architecture: x64 | |
- name: Checkout LT | |
uses: actions/checkout@v3 | |
with: | |
repository: languagetool-org/languagetool | |
path: ${{ env.LT_HOME }} | |
token: ${{ secrets.LT_OS_TOKEN }} | |
ref: pt/dict/new_tokenisation | |
- name: Cache LT Build | |
uses: actions/cache@v2 | |
with: | |
path: | | |
${{ env.LT_HOME }}/languagetool-standalone/target | |
${{ env.LT_HOME }}/languagetool-dev/target | |
~/.m2 | |
key: lt-build-${{ hashFiles('lt/languagetool/pom.xml', 'lt/languagetool/**/*.java') }} | |
id: lt-cache | |
- if: steps.lt-cache.outputs.cache-hit != 'true' | |
name: Build LT | |
run: mvn clean install -DskipTests | |
working-directory: ${{ env.LT_HOME }} | |
- if: steps.lt-cache.outputs.cache-hit != 'true' | |
name: Build LT Dev | |
run: mvn clean compile assembly:single | |
working-directory: ${{ env.LT_HOME }}/languagetool-dev | |
- name: Build POS tagging dictionary | |
working-directory: ${{ env.PT_DICT_HOME }} | |
run: | | |
poetry run python "dict_tools/scripts/build_tagger_dicts.py" \ | |
--language pt | |
--install-version "${{ env.PT_DICT_VERSION }}" | |
- name: Update dictionary version in LT pom.xml | |
working-directory: ${{ env.PT_DICT_HOME }} | |
run: | | |
poetry run python "dict_tools/scripts/update_pom.py" \ | |
--new-version "${{ env.PT_DICT_VERSION }}" \ | |
--package-name "portuguese-pos-dict" | |
# This step will also re-compile LT with the new dictionary version in pom.xml | |
- name: Build spelling dictionaries | |
working-directory: ${{ env.PT_DICT_HOME }} | |
run: | | |
mkdir -p "${{ env.LT_TMP_DIR }}/compounds" | |
poetry run python "dict_tools/scripts/build_spelling_dicts.py" \ | |
--language pt \ | |
--tmp-dir "${{ env.LT_TMP_DIR }}" \ | |
--force-install \ | |
--install-version "${{ env.PT_DICT_VERSION }}" \ | |
--max-threads "${{ steps.cpu-cores.outputs.count }}" | |
# TODO: uncomment after testing | |
# - name: Test LT | |
# working-directory: ${{ env.LT_HOME }} | |
# # If it fails, keep going; TODO: change this to 'false' after testing | |
# continue-on-error: true | |
# run: | | |
# mvn clean install -DskipTests | |
# ./build.sh pt test | |
- name: Archive Java src for deployment | |
# TODO: uncomment after this is tested to limit archiving to PRs to main | |
# TODO: add extra condition here to check for success of tests?? | |
# Only do it after a successful build and push to main! | |
# if: github.event_name == 'push' && github.ref == 'refs/heads/main' | |
uses: actions/upload-artifact@v4 | |
with: | |
name: src | |
path: ${{ env.PT_DICT_HOME }}/results/java-lt/src |