-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Check if word exists #24
base: master
Are you sure you want to change the base?
Changes from 7 commits
bbae407
e500864
9ef46a1
a21ce00
de1c37f
c180782
0b80fe6
26e6f0d
0481ab4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ | |
import the_hat_game.nltk_setup # noqa: F401 | ||
from the_hat_game.loggers import c_handler, logger | ||
from the_hat_game.players import RemotePlayer | ||
from data.utils import corpus_to_words | ||
|
||
|
||
class Game: | ||
|
@@ -24,8 +25,23 @@ def __init__( | |
n_rounds, | ||
n_explain_words, | ||
n_guessing_words, | ||
corpus_path=None, | ||
vocab_path=None, | ||
random_state=None, | ||
): | ||
"""Main class for Game. | ||
params: | ||
- players: list of AbstractPlayer - players in the game | ||
- words: list of str - all used words in the game | ||
- criteria: 'hard' of 'soft' - game criteria | ||
- n_rounds: int - number of rounds | ||
- n_explain_words: int - number of words for explaining | ||
- n_guessing_words: int - number of words for guessing | ||
- corpus_path: str - path for the corpus to create vocabulary (for criteria='hard') | ||
- vocab_path: str - path for vocabulary (for criteria='hard') | ||
NOTE: only corpus_path or vocab_path must be defined | ||
NOTE: if vocabulary is not defined nltk.wordnet will be used for filter not existing words | ||
""" | ||
assert len(players) >= 2 | ||
assert criteria in ("hard", "soft") | ||
self.players = players | ||
|
@@ -36,6 +52,15 @@ def __init__( | |
self.n_guessing_words = n_guessing_words | ||
self.random_state = random_state | ||
self.stemmer = SnowballStemmer("english") | ||
if corpus_path is not None: | ||
assert vocab_path is None, "corpus and vocabulary cannot be defined at the same time" | ||
self.used_words = corpus_to_words(corpus_path) | ||
elif vocab_path is not None: | ||
with open(vocab_path, encoding="utf-8") as f: | ||
words = f.readlines() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's use another variable instead of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed in 26e6f0d |
||
self.used_words = [word.strip() for word in words] | ||
else: | ||
self.used_words = None | ||
|
||
def remove_repeated_words(self, words): | ||
unique_words = [] | ||
|
@@ -54,9 +79,11 @@ def remove_same_rooted_words(self, word, word_list): | |
cleared_word_list = [w for w in word_list if self.stemmer.stem(w) != root] | ||
return cleared_word_list | ||
|
||
@staticmethod | ||
def remove_non_existing_words(words): | ||
existing_words = [w for w in words if len(wordnet.synsets(w)) > 0] | ||
def remove_non_existing_words(self, words): | ||
if self.used_words is not None: | ||
existing_words = [w for w in words if w in self.used_words] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There could be a situation when initial There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe this will fix this strange behaviour, but I would check if that's not There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Oh, sure. Now it seems like a bug. Thank you! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
added in 26e6f0d There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
yes, adding words for guessing to whitelist fixed this strange behaviour
|
||
else: | ||
existing_words = [w for w in words if len(wordnet.synsets(w)) > 0] | ||
return existing_words | ||
|
||
def create_word_list(self, player, word, n_words): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
data/
migrated toserver/
folder. The idea was to split the logic into two parts (the-hat-game part - only game logic, don't know anything about server implementation; server - everything about working with cloud, uploading texts, etc).So maybe we should move some data functions to
the-hat-game/data.py
. Those which can be used in the-hat-game and aren't related to server directly. Namely,sent_2_words
,corpus_to_words
, and maybesave_words
(in this case we should remove defaultvocab_path
arg value, because it's related to server implementation).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed in 0481ab4
sent_2_words
,corpus_to_words
were added,save_words
not (it is not used anywhere)