Skip to content

Commit

Permalink
Whitespaces fix (#5)
Browse files Browse the repository at this point in the history
* Fix DE tokenization

* Update version
  • Loading branch information
asajatovic authored Oct 14, 2019
1 parent 09fb68a commit e12287f
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

setuptools.setup(
name="spacy-udpipe",
version="0.0.3",
version="0.0.4",
description="Use fast UDPipe models directly in spaCy",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
6 changes: 3 additions & 3 deletions spacy_udpipe/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,11 @@ def __call__(self, text):
pos.append(self.vocab.strings.add(token.upostag or ""))
# CoNNL xpostag-s, custom for each UD treebank
tags.append(self.vocab.strings.add(token.xpostag or ""))
deps.append(self.vocab.strings.add(self.__dep(token.deprel) or ""))
deps.append(self.vocab.strings.add(self._dep(token.deprel) or ""))
lemmas.append(self.vocab.strings.add(token.lemma or ""))
offset += len(token.form)
span = text[offset:]
if i == len(tokens) - 1:
if i == len(tokens) - 1 or "SpaceAfter=No" in token.misc:
spaces.append(False)
elif not is_aligned:
spaces.append(True)
Expand All @@ -156,7 +156,7 @@ def __call__(self, text):
doc.is_parsed = True
return doc

def __dep(self, dep):
def _dep(self, dep):
# Ensure labels match with SpaCy
return 'ROOT' if dep == 'root' else dep

Expand Down

0 comments on commit e12287f

Please sign in to comment.