Skip to content

Commit

Permalink
Feature/GitHub workflows (#8)
Browse files Browse the repository at this point in the history
Adds GitHub Actions for building and uploading a package
  • Loading branch information
asajatovic authored Dec 16, 2019
1 parent ec1faa5 commit be8f8d0
Show file tree
Hide file tree
Showing 8 changed files with 112 additions and 30 deletions.
38 changes: 38 additions & 0 deletions .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: Python package

on:
pull_request:
types: [opened, synchronize, reopened]
branches:
- master

jobs:
build:

runs-on: ubuntu-latest
strategy:
max-parallel: 4
matrix:
python-version: [3.6, 3.7, 3.8]

steps:
- uses: actions/checkout@v1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v1
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Lint with flake8
run: |
pip install flake8
# stop the build if there are Python syntax errors or undefined names
flake8 **/*.py --count --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 **/*.py --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
pip install pytest
python -m pytest -vvv test
26 changes: 26 additions & 0 deletions .github/workflows/pythonpublish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: Upload Python Package

on:
release:
types: [created]

jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Set up Python
uses: actions/setup-python@v1
with:
python-version: '3.6'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install setuptools wheel twine
- name: Build and publish
env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
python setup.py sdist bdist_wheel
twine upload dist/*
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ Pull requests are welcome. For major changes, please open an issue first to disc

Please make sure to update tests as appropriate.

To start the tests, just run [`pytest`](https://docs.pytest.org/en/latest/contents.html) in the root source directory.
Tests are run automatically for each pull request on the master branch.
To start the tests locally, just run [`pytest`](https://docs.pytest.org/en/latest/contents.html) in the root source directory.

## License
[MIT](https://choosealicense.com/licenses/mit/) © TakeLab
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
with open(langs_path, "r") as f:
LANGUAGES = json.load(f)

ENTRY_LANGS = set("udpipe_{} = spacy_udpipe:UDPipeLanguage".format(s.split('-')[0])
ENTRY_LANGS = set(f"udpipe_{s.split('-')[0]} = spacy_udpipe:UDPipeLanguage"
for s in LANGUAGES.keys())

setuptools.setup(
name="spacy-udpipe",
version="0.0.5",
version="0.1.0",
description="Use fast UDPipe models directly in spaCy",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
18 changes: 9 additions & 9 deletions spacy_udpipe/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,10 @@ class UDPipeTokenizer(object):
>>> nlp = spacy.load('/path/to/model', udpipe_model=udpipe_model)
"""

to_disk = lambda self, *args, **kwargs: None
from_disk = lambda self, *args, **kwargs: None
to_bytes = lambda self, *args, **kwargs: None
from_bytes = lambda self, *args, **kwargs: None
to_disk = lambda self, *args, **kwargs: None # noqa: E731
from_disk = lambda self, *args, **kwargs: None # noqa: E731
to_bytes = lambda self, *args, **kwargs: None # noqa: E731
from_bytes = lambda self, *args, **kwargs: None # noqa: E731
_ws_pattern = re.compile(r"\s+")

def __init__(self, model, vocab):
Expand Down Expand Up @@ -217,15 +217,15 @@ def __init__(self, lang, path=None, meta=None):
raise Exception(msg)
self._lang = lang.split('-')[0]
if meta is None:
self._meta = {'authors': ("Milan Straka, "
"Jana Straková"),
self._meta = {'author': ("Milan Straka & "
"Jana Straková"),
'description': "UDPipe pretrained model.",
'email': '[email protected]',
'lang': 'udpipe_' + self._lang,
'license': 'CC BY-NC-SA 4.0',
'name': path.split('/')[-1],
'parent_package': 'spacy_udpipe',
'pipeline': 'Tokenizer, POS Tagger, Lemmatizer, Parser',
'pipeline': 'Tokenizer, Tagger, Lemmatizer, Parser',
'source': 'Universal Dependencies 2.4',
'url': 'http://ufal.mff.cuni.cz/udpipe',
'version': '1.2.0'
Expand Down Expand Up @@ -277,15 +277,15 @@ def tokenize(self, text):
return self._read(text, tokenizer)

def tag(self, sentence):
"""Assing part-of-speech tags (inplace).
"""Assign part-of-speech tags (inplace).
sentence (ufal.udpipe.Sentence): Input sentence.
RETURNS (ufal.udpipe.Sentence): Tagged sentence.
"""
self.model.tag(sentence, self.model.DEFAULT)

def parse(self, sentence):
"""Assing dependency parse relations (inplace).
"""Assign dependency parse relations (inplace).
sentence (ufal.udpipe.Sentence): Input sentence.
RETURNS (ufal.udpipe.Sentence): Tagged sentence.
Expand Down
2 changes: 1 addition & 1 deletion spacy_udpipe/languages.json
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,4 @@
"vi": "vietnamese-vtb-ud-2.4-190531.udpipe",
"ug": "uyghur-udt-ud-2.4-190531.udpipe",
"ur": "urdu-udtb-ud-2.4-190531.udpipe"
}
}
4 changes: 2 additions & 2 deletions spacy_udpipe/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from spacy.language import Language
from spacy.util import get_lang_class

BASE_URL = "https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/11234/1-2998/"
BASE_URL = "https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/11234/1-2998/" # noqa: E501
MODELS_DIR = os.path.join(Path(__file__).parent, "models")
langs_path = os.path.join(Path(__file__).parent, "languages.json")
with open(langs_path, "r") as f:
Expand All @@ -32,7 +32,7 @@ def download(lang):
_check_language(lang)
try:
_check_models_dir(lang)
except:
except Exception:
os.makedirs(MODELS_DIR)
if LANGUAGES[lang] in os.listdir(MODELS_DIR):
msg = "Already downloaded a model for the" \
Expand Down
47 changes: 32 additions & 15 deletions test/test_language.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,25 @@
from spacy.lang.en import EnglishDefaults
from spacy.language import BaseDefaults

from spacy_udpipe import download
from spacy_udpipe.language import load
from spacy_udpipe.util import get_defaults


@pytest.fixture(autouse=True)
def download_en():
download("en")


@pytest.fixture
def lang():
return "en"


def tags_equal(act, exp):
"""Check if each actual tag in act is equal to one or more expected tags in exp."""
return all(a == e if isinstance(e, str) else a in e for a, e in zip(act, exp))
"""Check if each actual tag is equal to one or more expected tags."""
return all(a == e if isinstance(e, str) else a in e
for a, e in zip(act, exp))


def test_get_defaults():
Expand All @@ -29,19 +36,30 @@ def test_spacy_udpipe(lang):
text = "Testing one, two, three. This is a test."
doc = nlp(text)

pos_actual = ['VERB', 'NUM', 'PUNCT', 'NUM', 'PUNCT', 'NUM', 'PUNCT',
('PRON', 'DET'), ('AUX', 'VERB'), 'DET', 'NOUN', 'PUNCT']
pos_actual = [('VERB', 'PROPN'), 'NUM', 'PUNCT', 'NUM', 'PUNCT', 'NUM',
'PUNCT',
('PRON', 'DET'), ('AUX', 'VERB'), 'DET', 'NOUN',
'PUNCT']
# test token attributes
assert [t.text for t in doc] == ['Testing', 'one', ',', 'two', ',', 'three', '.',
'This', 'is', 'a', 'test', '.']
assert [t.lemma_ for t in doc] == ['test', 'one', ',', 'two', ',', 'three', '.',
'this', 'be', 'a', 'test', '.']
assert [t.text for t in doc] == ['Testing', 'one', ',', 'two', ',', 'three', # noqa: E501
'.',
'This', 'is', 'a', 'test',
'.']
assert [t.lemma_ for t in doc] == ['test', 'one', ',', 'two', ',', 'three',
'.',
'this', 'be', 'a', 'test',
'.']
assert tags_equal([t.pos_ for t in doc], pos_actual)
assert [t.tag_ for t in doc] == ['V', 'N', 'FF', 'N', 'FF', 'N', 'FS',
'PD', 'V', 'RI', 'S', 'FS'] # CoNNL xpostag-s, custom for each UD treebank
assert [t.dep_ for t in doc] == ['ROOT', 'nummod', 'punct', 'nummod', 'punct', 'nummod', 'punct',
'nsubj', 'cop', 'det', 'ROOT', 'punct']
assert [t.is_sent_start for t in doc] == [True, None, None, None, None, None, None,
# CoNNL xpostag-s, custom for each UD treebank
assert [t.tag_ for t in doc] == ['NNP', 'CD', ',', 'CD', ',', 'CD',
'.',
'DT', 'VBZ', 'DT', 'NN',
'.']
assert [t.dep_ for t in doc] == ['ROOT', 'nummod', 'punct', 'nummod', 'punct', 'nummod', # noqa: E501
'punct',
'nsubj', 'cop', 'det', 'ROOT',
'punct']
assert [t.is_sent_start for t in doc] == [True, None, None, None, None, None, None, # noqa: E501
True, None, None, None, None]
assert any([t.is_stop for t in doc])
# test doc attributes
Expand All @@ -52,7 +70,6 @@ def test_spacy_udpipe(lang):
# test pipe
docs = list(nlp.pipe(["Testing one, two, three.", "This is a test."]))
assert docs[0].text == "Testing one, two, three."
assert [t.pos_ for t in docs[0]] == [
'VERB', 'NUM', 'PUNCT', 'NUM', 'PUNCT', 'NUM', 'PUNCT']
assert [t.pos_ for t in docs[0]] == ['PROPN', 'NUM', 'PUNCT', 'NUM', 'PUNCT', 'NUM', 'PUNCT'] # noqa: E501
assert docs[1].text == "This is a test."
assert tags_equal([t.pos_ for t in docs[1]], pos_actual[-5:])

0 comments on commit be8f8d0

Please sign in to comment.