diff --git a/doc/doc.md b/doc/doc.md index 2eeecb868..7c57433d2 100644 --- a/doc/doc.md +++ b/doc/doc.md @@ -609,7 +609,7 @@ You can generate line charts or word clouds for patterns of colligation using an > [!NOTE] > Renamed from **Keyword** to **Keyword Extractor** in *Wordless* 2.2 -In *Keyword Extractor*, you can search for candidates of potential keywords (tokens that have far more or far less frequency in the observed file than in the reference file) in different files given a reference corpus, conduct different tests of statistical significance on each keyword and calculate the Bayes factor and effect size for each keyword using different measures. You can adjust the settings for the generated data via **Generation Settings**. You can disable the calculation of statistical significance and/or Bayes factor and/or effect size by setting **Generation Settings → Test of Statistical Significance / Measures of Bayes Factor / Measure of Effect Size** to **None**. +In *Keyword Extractor*, you can search for candidates of potential keywords (tokens that have far more or far less frequency in the observed corpus than in the reference corpus) in different files given a reference corpus, conduct different tests of statistical significance on each keyword and calculate the Bayes factor and effect size for each keyword using different measures. You can adjust the settings for the generated data via **Generation Settings**. You can disable the calculation of statistical significance and/or Bayes factor and/or effect size by setting **Generation Settings → Test of Statistical Significance / Measures of Bayes Factor / Measure of Effect Size** to **None**. You can filter the results by clicking **Filter results** or search in *Data Table* for parts that might be of interest to you by clicking **Search in results**. @@ -621,11 +621,11 @@ You can generate line charts or word clouds for keywords using any statistics. Y - **11.2 Keyword**
The potential keyword. You can specify what should be counted as a "token" via **Token Settings**. -- **11.3 Frequency (in Reference File)**
- The number of occurrences of the keyword in the reference file. +- **11.3 Frequency (in Reference Corpora)**
+ The number of occurrences of the keyword in reference corpora. -- **11.4 Frequency (in Observed Files)**
- The number of occurrences of the keyword in each observed file. +- **11.4 Frequency (in Observed Corpus)**
+ The number of occurrences of the keyword in each observed corpus. - **11.5 Test Statistic**
The test statistic of the significance test conducted on the keyword in each file. You can change the test of statistical significance used via **Generation Settings → Test of Statistical Significance**. See section [12.4.4 Tests of Statistical Significance, Measures of Bayes Factor, & Measures of Effect Size](#doc-12-4-4) for more details. diff --git a/wordless/wl_file_area.py b/wordless/wl_file_area.py index 4848e4be6..b920c9ff7 100644 --- a/wordless/wl_file_area.py +++ b/wordless/wl_file_area.py @@ -83,8 +83,10 @@ def __init__(self, main, file_type = 'observed'): # Suffix for settings if self.file_type == 'observed': + self.tab = 'corpora_observed' self.settings_suffix = '' elif self.file_type == 'ref': + self.tab = 'corpora_ref' self.settings_suffix = '_ref' # Table @@ -237,17 +239,33 @@ def __init__(self, parent): self.clicked.connect(self.item_clicked) # Menu - self.main.action_file_open_files.triggered.connect(lambda: self.check_file_area(self.open_files)) - self.main.action_file_reopen.triggered.connect(lambda: self.check_file_area(self.reopen)) + self.main.action_file_open_files.triggered.connect( + lambda: self.check_file_area(self.open_files) + ) + self.main.action_file_reopen.triggered.connect( + lambda: self.check_file_area(self.reopen) + ) - self.main.action_file_select_all.triggered.connect(lambda: self.check_file_area(self.horizontalHeader().select_all)) - self.main.action_file_deselect_all.triggered.connect(lambda: self.check_file_area(self.horizontalHeader().deselect_all)) - self.main.action_file_invert_selection.triggered.connect(lambda: self.check_file_area(self.horizontalHeader().invert_selection)) + self.main.action_file_select_all.triggered.connect( + lambda: self.check_file_area(self.horizontalHeader().select_all) + ) + self.main.action_file_deselect_all.triggered.connect( + lambda: self.check_file_area(self.horizontalHeader().deselect_all) + ) + self.main.action_file_invert_selection.triggered.connect( + lambda: self.check_file_area(self.horizontalHeader().invert_selection) + ) - self.main.action_file_close_selected.triggered.connect(lambda: self.check_file_area(self.close_selected)) - self.main.action_file_close_all.triggered.connect(lambda: self.check_file_area(self.close_all)) + self.main.action_file_close_selected.triggered.connect( + lambda: self.check_file_area(self.close_selected) + ) + self.main.action_file_close_all.triggered.connect( + lambda: self.check_file_area(self.close_all) + ) - self.main.tabs_file_area.currentChanged.connect(lambda: self.check_file_area(self.model().itemChanged.emit, self.model().item(0, 0))) + self.main.tabs_file_area.currentChanged.connect( + lambda: self.check_file_area(self.model().itemChanged.emit, self.model().item(0, 0)) + ) def item_changed(self): super().item_changed() @@ -391,10 +409,10 @@ def check_file_area(self, op, *args, **kwargs): if ( ( self.file_type == 'observed' - and self.main.tabs_file_area.tabText(self.main.tabs_file_area.currentIndex()) == self.tr('Observed Files') + and self.main.tabs_file_area.tabText(self.main.tabs_file_area.currentIndex()) == self.tr('Observed Corpora') ) or ( self.file_type == 'ref' - and self.main.tabs_file_area.tabText(self.main.tabs_file_area.currentIndex()) == self.tr('Reference Files') + and self.main.tabs_file_area.tabText(self.main.tabs_file_area.currentIndex()) == self.tr('Reference Corpora') ) ): return op(*args, **kwargs) diff --git a/wordless/wl_keyword_extractor.py b/wordless/wl_keyword_extractor.py index 2aa79989b..a89032472 100644 --- a/wordless/wl_keyword_extractor.py +++ b/wordless/wl_keyword_extractor.py @@ -364,7 +364,7 @@ def __init__(self, parent): self.main.wl_file_area_ref.table_files.model().itemChanged.connect(self.file_changed) - # Enable the buttons and prompt the user if there are only observed files or only reference files + # Enable the buttons and prompt the user if there are only observed corpora or only reference corpora def file_changed(self): if list(self.main.wl_file_area.get_selected_files()) or list(self.main.wl_file_area_ref.get_selected_files()): self.button_generate_table.setEnabled(True) @@ -373,29 +373,29 @@ def file_changed(self): self.button_generate_table.setEnabled(False) self.button_generate_fig.setEnabled(False) - def wl_msg_box_missing_files_observed(self): + def wl_msg_box_missing_corpus_observed(self): wl_msg_boxes.Wl_Msg_Box_Warning( self.main, - title = self.tr('Missing Observed Files'), + title = self.tr('Missing Observed Corpus'), text = self.tr(''' -
You have not specified any observed files yet.
+
You have not specified any observed corpus yet.
''') ).open() - def wl_msg_box_missing_files_ref(self): + def wl_msg_box_missing_corpus_ref(self): wl_msg_boxes.Wl_Msg_Box_Warning( self.main, - title = self.tr('Missing Reference Files'), + title = self.tr('Missing Reference Corpus'), text = self.tr(''' -
You have not specified any reference files yet.
+
You have not specified any reference corpus yet.
''') ).open() - def wl_status_bar_msg_missing_files_observed(self): - self.main.statusBar().showMessage(self.tr('Missing observed files!')) + def wl_status_bar_msg_missing_corpus_observed(self): + self.main.statusBar().showMessage(self.tr('Missing observed corpus!')) - def wl_status_bar_msg_missing_files_ref(self): - self.main.statusBar().showMessage(self.tr('Missing reference files!')) + def wl_status_bar_msg_missing_corpus_ref(self): + self.main.statusBar().showMessage(self.tr('Missing reference corpus!')) @wl_misc.log_time def generate_table(self): @@ -421,11 +421,11 @@ def generate_table(self): wl_threading.Wl_Thread(worker_keyword_extractor_table).start_worker() else: if not files_observed: - self.wl_msg_box_missing_files_observed() - self.wl_status_bar_msg_missing_files_observed() + self.wl_msg_box_missing_corpus_observed() + self.wl_status_bar_msg_missing_corpus_observed() elif not files_ref: - self.wl_msg_box_missing_files_ref() - self.wl_status_bar_msg_missing_files_ref() + self.wl_msg_box_missing_corpus_ref() + self.wl_status_bar_msg_missing_corpus_ref() def update_gui_table(self, err_msg, keywords_freq_files, keywords_stats_files): if wl_checks_work_area.check_results(self.main, err_msg, keywords_freq_files): @@ -447,12 +447,12 @@ def update_gui_table(self, err_msg, keywords_freq_files, keywords_stats_files): # Insert columns self.ins_header_hor( self.model().columnCount() - 2, - self.tr('[Reference Files]\nFrequency'), + self.tr('[Reference Corpora]\nFrequency'), is_int = True, is_cum = True ) self.ins_header_hor( self.model().columnCount() - 2, - self.tr('[Reference Files]\nFrequency %'), + self.tr('[Reference Corpora]\nFrequency %'), is_pct = True, is_cum = True ) @@ -507,25 +507,25 @@ def update_gui_table(self, err_msg, keywords_freq_files, keywords_stats_files): is_breakdown_file = is_breakdown_file ) - # Sort by p-value of the first observed file + # Sort by p-value of the first observed corpus if test_statistical_significance != 'none': self.horizontalHeader().setSortIndicator( self.find_header_hor(self.tr('[{}]\np-value').format(files_observed[0]['name'])), Qt.AscendingOrder ) - # Sort by bayes factor of the first observed file + # Sort by bayes factor of the first observed corpus elif measure_bayes_factor != 'none': self.horizontalHeader().setSortIndicator( self.find_header_hor(self.tr('[{}]\nBayes Factor').format(files_observed[0]['name'])), Qt.DescendingOrder ) - # Sort by effect size of the first observed file + # Sort by effect size of the first observed corpus elif measure_effect_size != 'none': self.horizontalHeader().setSortIndicator( self.find_header_hor(f"[{files_observed[0]['name']}]\n{col_text_effect_size}"), Qt.DescendingOrder ) - # Otherwise sort by frequency of the first observed file + # Otherwise sort by frequency of the first observed corpus else: self.horizontalHeader().setSortIndicator( self.find_header_hor(self.tr('[{}]\nFrequency').format(files_observed[0]['name'])), @@ -624,11 +624,11 @@ def generate_fig(self): wl_threading.Wl_Thread(self.worker_keyword_extractor_fig).start_worker() else: if not files_observed: - self.wl_msg_box_missing_files_observed() - self.wl_status_bar_msg_missing_files_observed() + self.wl_msg_box_missing_corpus_observed() + self.wl_status_bar_msg_missing_corpus_observed() elif not files_ref: - self.wl_msg_box_missing_files_ref() - self.wl_status_bar_msg_missing_files_ref() + self.wl_msg_box_missing_corpus_ref() + self.wl_status_bar_msg_missing_corpus_ref() def update_gui_fig(self, err_msg, keywords_freq_files, keywords_stats_files): if wl_checks_work_area.check_results(self.main, err_msg, keywords_freq_files): @@ -700,7 +700,7 @@ def run(self): files_observed = list(self.main.wl_file_area.get_selected_files()) files_ref = list(self.main.wl_file_area_ref.get_selected_files()) - # Frequency (Reference files) + # Frequency (Reference Corpora) self.keywords_freq_files.append(collections.Counter()) tokens_ref = [] @@ -720,7 +720,7 @@ def run(self): len_tokens_ref = len(tokens_ref) - # Frequency (Observed files) + # Frequency (Observed Corpus) for file_observed in files_observed: text = wl_token_processing.wl_process_tokens_ngram_generator( self.main, file_observed['text'], @@ -741,7 +741,7 @@ def run(self): self.keywords_freq_files.append(sum(self.keywords_freq_files[1:], collections.Counter())) - # Remove tokens that do not appear in any of the observed files + # Remove tokens that do not appear in any observed corpus self.keywords_freq_files[0] = { token: freq for token, freq in self.keywords_freq_files[0].items() diff --git a/wordless/wl_main.py b/wordless/wl_main.py index 12fdc099a..a5163e99e 100644 --- a/wordless/wl_main.py +++ b/wordless/wl_main.py @@ -264,7 +264,7 @@ def __init__(self, loading_window): # Menu self.init_menu() - # Work area & File area + # Work Area & File Area self.init_central_widget() # Status bar @@ -425,13 +425,13 @@ def init_central_widget(self): self.wl_file_area = wl_file_area.Wrapper_File_Area(self) self.wl_file_area_ref = wl_file_area.Wrapper_File_Area(self, file_type = 'ref') - # File area - self.tabs_file_area.addTab(self.wl_file_area, self.tr('Observed Files')) - self.tabs_file_area.addTab(self.wl_file_area_ref, self.tr('Reference Files')) + # File Area + self.tabs_file_area.addTab(self.wl_file_area, self.tr('Observed Corpora')) + self.tabs_file_area.addTab(self.wl_file_area_ref, self.tr('Reference Corpora')) self.tabs_file_area.currentChanged.connect(self.file_area_changed) - # Work area + # Work Area self.init_work_area() # Splitter @@ -518,18 +518,18 @@ def load_settings(self): # Layouts self.centralWidget().setSizes(settings['menu']['prefs']['layouts']['central_widget']) - # File area + # File Area for i in range(self.tabs_file_area.count()): - if self.tabs_file_area.tabText(i) == self.settings_custom['file_area_cur']: + if self.tabs_file_area.widget(i).tab == self.settings_custom['tab_file_area']: self.tabs_file_area.setCurrentIndex(i) break self.tabs_file_area.currentWidget().table_files.model().itemChanged.emit(QStandardItem()) - # Work area + # Work Area for i in range(self.wl_work_area.count()): - if self.wl_work_area.widget(i).tab == self.settings_custom['work_area_cur']: + if self.wl_work_area.widget(i).tab == self.settings_custom['tab_work_area']: self.wl_work_area.setCurrentIndex(i) break @@ -538,14 +538,14 @@ def load_settings(self): def file_area_changed(self): # Current tab - self.settings_custom['file_area_cur'] = self.tabs_file_area.tabText(self.tabs_file_area.currentIndex()) + self.settings_custom['tab_file_area'] = self.tabs_file_area.currentWidget().tab def work_area_changed(self): # Current tab - self.settings_custom['work_area_cur'] = self.wl_work_area.currentWidget().tab + self.settings_custom['tab_work_area'] = self.wl_work_area.currentWidget().tab # File Area - if self.settings_custom['work_area_cur'] == 'keyword_extractor': + if self.settings_custom['tab_work_area'] == 'keyword_extractor': self.tabs_file_area.tabBar().show() else: self.tabs_file_area.setCurrentIndex(0) diff --git a/wordless/wl_settings/wl_settings_default.py b/wordless/wl_settings/wl_settings_default.py index 3ced157fd..046d36cf7 100644 --- a/wordless/wl_settings/wl_settings_default.py +++ b/wordless/wl_settings/wl_settings_default.py @@ -95,8 +95,8 @@ def init_settings_default(main): settings_default = { '1st_startup': True, - 'file_area_cur': _tr('wl_settings_default', 'Observed Files'), - 'work_area_cur': 'profiler', + 'tab_file_area': 'corpora_observed', + 'tab_work_area': 'profiler', 'menu': { 'prefs': {