From fe3100f59075ee4842f2ba4ef5060aa640cffcee Mon Sep 17 00:00:00 2001 From: Severin Simmler Date: Sat, 28 Apr 2018 18:05:30 +0200 Subject: [PATCH 1/4] Fix bug --- application/modeling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/application/modeling.py b/application/modeling.py index 053cc90..6c84309 100644 --- a/application/modeling.py +++ b/application/modeling.py @@ -87,7 +87,7 @@ def workflow(tempdir, archive_dir): cleaning = "removed the {0} most frequent words, based on a threshold".format(len(stopwords)) except KeyError: stopwords = user_input['stopwords'].read().decode('utf-8') - stopwords = dariah_topics.preprocessing.tokenize(stopwords) + stopwords = list(dariah_topics.preprocessing.tokenize(stopwords)) cleaning = "removed the {0} most frequent words, based on an external stopwords list".format(len(stopwords)) hapax_legomena = dariah_topics.preprocessing.find_hapax_legomena(document_term_matrix) features = set(stopwords).union(hapax_legomena) From 7e95ca411eeed879befde1cce704e7343ae9feb1 Mon Sep 17 00:00:00 2001 From: Severin Simmler Date: Sat, 28 Apr 2018 18:18:00 +0200 Subject: [PATCH 2/4] Bugfix --- application/templates/model.html | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/application/templates/model.html b/application/templates/model.html index a4b61fe..27d76f9 100755 --- a/application/templates/model.html +++ b/application/templates/model.html @@ -62,6 +62,10 @@

2.2. Topics and Documents

2.3. Distribution of Topics

In the following graphic you can access one dimension of the information displayed in the heatmap above. This might be a more clear approach, if you are interested in a specific topic, or, more precisely, how the topic is distributed over the documents of your corpus. You can use the widget to select a specific topic.

+
+ + FYI: The proportions you can see here by default are those of the first topic: {{ first_topic|safe }}. But you can of course take a closer look at each topic by using the widget. +
{{ topics_div|safe }}
@@ -71,15 +75,14 @@

2.3. Distribution of Topics

Watch out! The autocompletion is still a bit buggy and serves more as a writing aid. If you click on the suggestion, not much happens yet. However, the text field must contain the complete name and you have to press enter, otherwise it will not work. Sorry. But we're working on it.
{% endif %} -
- - FYI: The proportions you can see here by default are those of the first topic: {{ first_topic|safe }}. But you can of course take a closer look at each topic by using the widget. -
-

2.4. Distribution of Documents

Similar to the above barchart, you can access the other dimension displayed in the heatmap. So, if you are intereseted in a specific document, you have the ability to select it via the widget and inspect its proportions.

+
+ + FYI: The proportions you can see here by default are those of the first document: {{ first_document|safe }}. Here you can also have a closer look at the distribution of the topics for each document using the widget. +
{{ documents_div|safe }}
@@ -89,11 +92,6 @@

2.4. Distribution of Documents

Watch out! The autocompletion is still a bit buggy and serves more as a writing aid. If you click on the suggestion, not much happens yet. However, the text field must contain the complete name and you have to press enter, otherwise it will not work. Sorry. But we're working on it.
{% endif %} -
- - FYI: The proportions you can see here by default are those of the first document: {{ first_document|safe }}. Here you can also have a closer look at the distribution of the topics for each document using the widget. -
-

2. Delving Deeper into Topic Modeling

We want to introduce users with little or no programming experience to digital methods. If this little insight into the text mining technique topic modeling has aroused your interest, and you want to delve deeper into the technical parts, we provide the same convenient, modular workflow which can be entirely controlled from within a well documented Jupyter notebook, integrating a total of three popular LDA implementations.

From 6c502960d0b42df628a9009291f68f0d4766abc9 Mon Sep 17 00:00:00 2001 From: severinsimmler Date: Sun, 29 Apr 2018 00:07:26 +0200 Subject: [PATCH 3/4] Improve UI --- application/templates/model.html | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/application/templates/model.html b/application/templates/model.html index 27d76f9..eb6c3c2 100755 --- a/application/templates/model.html +++ b/application/templates/model.html @@ -66,15 +66,15 @@

2.3. Distribution of Topics

FYI: The proportions you can see here by default are those of the first topic: {{ first_topic|safe }}. But you can of course take a closer look at each topic by using the widget. -
- {{ topics_div|safe }} -
{% if autocomplete_warning_t|safe == "include" %}
Watch out! The autocompletion is still a bit buggy and serves more as a writing aid. If you click on the suggestion, not much happens yet. However, the text field must contain the complete name and you have to press enter, otherwise it will not work. Sorry. But we're working on it.
{% endif %} +
+ {{ topics_div|safe }} +

2.4. Distribution of Documents

Similar to the above barchart, you can access the other dimension displayed in the heatmap. So, if you are intereseted in a specific document, you have the ability to select it via the widget and inspect its proportions. @@ -83,15 +83,15 @@

2.4. Distribution of Documents

FYI: The proportions you can see here by default are those of the first document: {{ first_document|safe }}. Here you can also have a closer look at the distribution of the topics for each document using the widget.
-
- {{ documents_div|safe }} -
{% if autocomplete_warning_d|safe == "include" %}
Watch out! The autocompletion is still a bit buggy and serves more as a writing aid. If you click on the suggestion, not much happens yet. However, the text field must contain the complete name and you have to press enter, otherwise it will not work. Sorry. But we're working on it.
{% endif %} +
+ {{ documents_div|safe }} +

2. Delving Deeper into Topic Modeling

We want to introduce users with little or no programming experience to digital methods. If this little insight into the text mining technique topic modeling has aroused your interest, and you want to delve deeper into the technical parts, we provide the same convenient, modular workflow which can be entirely controlled from within a well documented Jupyter notebook, integrating a total of three popular LDA implementations.

From ede7da968896ada4e7482f7940970c54918327ca Mon Sep 17 00:00:00 2001 From: severinsimmler Date: Sun, 29 Apr 2018 00:08:02 +0200 Subject: [PATCH 4/4] Fix Windows bug --- application/web.py | 1 - 1 file changed, 1 deletion(-) diff --git a/application/web.py b/application/web.py index 11a695d..30a6310 100755 --- a/application/web.py +++ b/application/web.py @@ -67,7 +67,6 @@ def model(): Loads the dumped data, deletes the temporary data, and renders the model page. """ data = application.utils.load_data(TEMPDIR) - shutil.rmtree(TEMPDIR) # Removing the tempdir return flask.render_template('model.html', **data)