Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wrong prep #62

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 60 additions & 3 deletions src/reynir_correct/pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class IcelandicPlaces:
# "fjörður": "á", # Skip this since 'í *firði' is also common
"eyri": "á",
"vogur": "í",
"brekka": "í",
"brekka": "í", # Ath. algeng ending á bæjarheitum sem taka með sér 'á'
"staðir": "á",
# "höfn": "á", # Skip this since 'í *höfn' is also common
"eyjar": "í",
Expand Down Expand Up @@ -125,6 +125,39 @@ def includes(cls, place: str) -> bool:
return place in cls.ICELOC_PREP


class Countries:

"""Wraps a dictionary of country names with their
associated prepositions"""

COUNTRIES_PREP: Optional[Dict[str, str]] = None
COUNTRIES_PREP_JSONPATH = os.path.join(os.path.dirname(__file__), "resources", "countries_prep.json")

@classmethod
def _load_json(cls) -> None:
"""Load the place name dictionary from a JSON file into memory"""
with open(cls.COUNTRIES_PREP_JSONPATH, encoding="utf-8") as f:
cls.COUNTRIES_PREP = json.load(f)

@classmethod
def lookup_preposition(cls, place: str) -> Optional[str]:
"""Look up the correct preposition to use with a placename,
or None if the placename is not known"""
if cls.COUNTRIES_PREP is None:
cls._load_json()
assert cls.COUNTRIES_PREP is not None
prep = cls.COUNTRIES_PREP.get(place)
return prep

@classmethod
def includes(cls, place: str) -> bool:
"""Return True if the given place is found in the dictionary"""
if cls.COUNTRIES_PREP is None:
cls._load_json()
assert cls.COUNTRIES_PREP is not None
return place in cls.COUNTRIES_PREP


class PatternMatcher:

"""Class to match parse trees with patterns to find probable usage errors"""
Expand Down Expand Up @@ -188,6 +221,28 @@ def get_wordform(word: str, lemma: str, cat: str, variants: Iterable[str]) -> st
# Can be many possible word forms; we want the first one in most cases
return wordforms[0].bmynd

def get_wordform(self, lemma, cat, variants):
"""Get correct wordform from BinPackage,
given a set of variants"""

# Get rid of argument variants in verbs:
variants = list([x for x in variants if not x.isdigit()])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Either list() or [] are superfluous, only one of the two is required

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

...in fact the isdigit() filter is done twice. This could be:

realvars = [x for x in variants if not x.isdigit() and x not in SKIPVARS]

entirely dropping the for loop.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you, done!

realvars = []
for x in variants:
if x.isdigit():
continue
if x in SKIPVARS:
continue
else:
realvars.append(x)

wordforms = BIN.lookup_variants(lemma, cat, realvars)
if not wordforms:
return ""
else:
# Can be many possible word forms, want the first one in most cases
return wordforms[0].bmynd

def wrong_preposition_af(self, match: SimpleTree) -> None:
"""Handle a match of a suspect preposition pattern"""
# Find the offending verb phrase
Expand Down Expand Up @@ -1104,8 +1159,10 @@ def check_pp_with_place(self, match: SimpleTree) -> None:
place = match.NP.lemma
correct_preposition = IcelandicPlaces.lookup_preposition(place)
if correct_preposition is None:
# This is not a known or likely place name
return
correct_preposition = Countries.lookup_preposition(place)
if correct_preposition is None:
# This is not a known or likely place name
return
preposition = match.P.lemma
if correct_preposition == preposition:
# Correct: return
Expand Down
Loading