Skip to content

Commit

Permalink
standard read of dutch_entities.csv
Browse files Browse the repository at this point in the history
  • Loading branch information
eriktks committed Oct 1, 2024
1 parent b31c1d8 commit 3eeaa4e
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 25 deletions.
12 changes: 0 additions & 12 deletions orangecontrib/storynavigation/modules/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@
NL_FALSE_POSITIVE_VERB_FILENAME = "false_positive_verbs_dutch.txt"
# filename from which to retrieve a list of dutch stopwords
NL_PRONOUNS_FILENAME = "dutchpronouns.txt"
# filename from which to retrieve a list of dutch entities
NL_ENTITIES_FILENAME = "dutch_entities.csv"
# filename from which to retrieve a list of dutch time words
NL_TIME_WORDS_FILENAME = "dutch_time_words.csv"

Expand All @@ -53,8 +51,6 @@
EN_FALSE_POSITIVE_VERB_FILENAME = "false_positive_verbs_english.txt"
# filename from which to retrieve a list of english stopwords
EN_PRONOUNS_FILENAME = "englishpronouns.txt"
# filename from which to retrieve a list of english entities
EN_ENTITIES_FILENAME = "english_entities.csv"
# filename from which to retrieve a list of english time words
EN_TIME_WORDS_FILENAME = "english_time_words.csv"

Expand Down Expand Up @@ -103,14 +99,6 @@
PKG / RESOURCES_SUBPACKAGE / EN_FALSE_POSITIVE_VERB_FILENAME
)

NL_ENTITIES_FILE = (
PKG / RESOURCES_SUBPACKAGE / NL_ENTITIES_FILENAME
)

EN_ENTITIES_FILE = (
PKG / RESOURCES_SUBPACKAGE / EN_ENTITIES_FILENAME
)

NL_TIME_WORDS_FILE = (
PKG / RESOURCES_SUBPACKAGE / NL_TIME_WORDS_FILENAME
)
Expand Down
2 changes: 0 additions & 2 deletions orangecontrib/storynavigation/modules/settinganalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,9 @@ def __setup_required_nlp_resources(self, language):
"""
if language == constants.NL:
self.model = constants.NL_SPACY_MODEL
#self.entity_list = constants.NL_ENTITIES_FILE.read_text(encoding="utf-8").strip().split(os.linesep)
self.time_words = constants.NL_TIME_WORDS_FILE.read_text(encoding="utf-8").strip().split(os.linesep)
elif language == constants.EN:
self.model = constants.EN_SPACY_MODEL
#self.entity_list = constants.EN_ENTITIES_FILE.read_text(encoding="utf-8").strip().split(os.linesep)
self.time_words = constants.EN_TIME_WORDS_FILE.read_text(encoding="utf-8").strip().split(os.linesep)
else:
raise ValueError(f"settingsanalysis.py: unknown language {language}")
Expand Down
27 changes: 16 additions & 11 deletions orangecontrib/storynavigation/widgets/OWSNSettingAnalysis.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import pathlib
import re

from Orange.data import Table
Expand Down Expand Up @@ -32,10 +33,11 @@ class OWSNSettingAnalysis(OWWidget, ConcurrentWidgetMixin):
autocommit = Setting(True)
language = 'nl'
n_segments = 1
user_defined_entities_file = os.path.join(os.getcwd(),
"orangecontrib/storynavigation/resources",
user_defined_entities_file_name = os.path.join(
str(constants.PKG),
str(constants.RESOURCES_SUBPACKAGE),
("dutch" if language == "nl" else "english") + "_entities.csv")
recent_files = [user_defined_entities_file]
recent_files = [user_defined_entities_file_name]
ENTITIES_FILE_YES = "yes: use this file"
ENTITIES_FILE_NO = "no: skip this file"
entity_colors = { "DATE": "lightblue",
Expand Down Expand Up @@ -68,7 +70,7 @@ def __init__(self):
self.controlArea.setSizePolicy(size_policy)
self.user_defined_entities = {}
self.use_user_defined_entities_file = self.ENTITIES_FILE_YES
self.read_entities_file(self.user_defined_entities_file)
self.read_entities_file(self.user_defined_entities_file_name)

self.__make_language_selection_menu()
self.__make_entities_file_dialog()
Expand Down Expand Up @@ -171,20 +173,23 @@ def refresh_search(self):
self.__visualize_text_data()


def read_entities_file(self, user_defined_entities_file):
self.user_defined_entities_file = user_defined_entities_file
def read_entities_file(self, user_defined_entities_file_name):
self.user_defined_entities_file_name = user_defined_entities_file_name
self.user_defined_entities = {}
if self.use_user_defined_entities_file == self.ENTITIES_FILE_YES:
with open(user_defined_entities_file) as file:
for line in file:
fields = line.strip().split(",")
self.user_defined_entities[fields[1]] = fields[0]
user_defined_entities_lines = pathlib.Path(user_defined_entities_file_name).read_text(encoding="utf-8").strip().split("\n")
for line in user_defined_entities_lines:
try:
entity_class, entity_token = line.strip().split(",")
self.user_defined_entities[entity_token] = entity_class
except:
pass
if self.story_elements:
self.reset_story_elements(self.story_elements)


def __process_use_user_defined_entities_file_change(self):
self.read_entities_file(self.user_defined_entities_file)
self.read_entities_file(self.user_defined_entities_file_name)


def get_selected_indexes(self) -> Set[int]:
Expand Down

0 comments on commit 3eeaa4e

Please sign in to comment.