From 60d7284774ef8afa2ab8b01827d515a5005a4db6 Mon Sep 17 00:00:00 2001 From: Andrew Tavis Date: Wed, 17 Feb 2021 22:02:47 +0100 Subject: [PATCH] Further model and visual tests - version up for stable release --- CHANGELOG.md | 16 +++++++++++++++ docs/source/conf.py | 2 +- kwx/visuals.py | 26 ++---------------------- setup.py | 4 ++-- tests/test_model.py | 45 ++++++++++++++++++++++++++++++------------ tests/test_visuals.py | 46 ++++++++++++++++++++++++++++++++++++++++++- 6 files changed, 98 insertions(+), 41 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ac4fb3c..669d195 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,19 @@ +### kwx 0.1.0 (Feb 17, 2021) + +First stable release of kwx + +Additions include: + +- Full documentation of the package + +- Virtual environment files + +- Bug fixes + +- Extensive testing of all modules with GH Actions and Codecov + +- Code of conduct and contribution guidelines + ### kwx 0.0.2.2 (Jan 31, 2021) The minimum viable product of kwx: diff --git a/docs/source/conf.py b/docs/source/conf.py index 2afe7f1..61f26c6 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -24,7 +24,7 @@ author = "kwx developers" # The full version, including alpha/beta/rc tags -release = "0.0.2.2" +release = "0.1.0" # -- General configuration --------------------------------------------------- diff --git a/kwx/visuals.py b/kwx/visuals.py index d8e660c..9d506b2 100644 --- a/kwx/visuals.py +++ b/kwx/visuals.py @@ -456,30 +456,8 @@ def gen_word_cloud( plt.imshow(wordcloud, interpolation="bilinear") plt.axis("off") - if save_file == True: - plt.savefig( - "word_cloud_{}.png".format(time.strftime("%Y%m%d-%H%M%S")), - bbox_inches="tight", - dpi=300, - ) - elif type(save_file) == str: # a save path has been provided - if save_file[-4:] == ".zip": - with zipfile.ZipFile(save_file, mode="a") as zf: - plt.plot([0, 0]) - buf = io.BytesIO() - plt.savefig(buf, bbox_inches="tight", dpi=300) - plt.close() - zf.writestr(zinfo_or_arcname="word_cloud.png", data=buf.getvalue()) - zf.close() - else: - if os.path.exists(save_file): - plt.savefig(save_file + "/word_cloud.png", bbox_inches="tight", dpi=300) - else: - plt.savefig( - "word_cloud_{}.png".format(time.strftime("%Y%m%d-%H%M%S")), - bbox_inches="tight", - dpi=300, - ) + # Save file if directed to + save_vis(vis=plt, save_file=save_file, file_name="word_cloud") plt.show() diff --git a/setup.py b/setup.py index a79f8c9..dc70094 100644 --- a/setup.py +++ b/setup.py @@ -10,11 +10,11 @@ setup_args = dict( name="kwx", - version="0.0.2.2", + version="0.1.0", author="Andrew Tavis McAllister", author_email="andrew.t.mcallister@gmail.com", classifiers=[ - "Development Status :: 4 - Beta", + "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Intended Audience :: Education", "Intended Audience :: Science/Research", diff --git a/tests/test_model.py b/tests/test_model.py index 5052809..da9ceca 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -3,6 +3,7 @@ ----------- """ +import os from io import StringIO from kwx import model @@ -20,6 +21,19 @@ def test_extract_frequent_kws(long_text_corpus): assert type(kws[0]) == str +def test_translate_kw_output(long_text_corpus): + kws = model.extract_kws( + method="frequency", + text_corpus=long_text_corpus, + input_language="english", + output_language="german", + num_keywords=10, + prompt_remove_words=False, + ) + assert len(kws) == 10 + assert type(kws[0]) == str + + def test_extract_TFIDF_kws(long_text_corpus): kws = model.extract_kws( method="TFIDF", @@ -87,16 +101,21 @@ def test_extract_lda_BERT_kws(long_text_corpus): assert type(kws[0]) == str -def test_gen_files(): - # model.gen_filesgen_files( - # method="lda", - # text_corpus=None, - # input_language="english", - # num_keywords=10, - # topic_nums_to_compare=[10, 11], - # prompt_remove_words=False, - # org_by_pos=True, - # incl_visuals=True, - # zip_results=True, - # ) - assert True +def test_gen_files(monkeypatch, long_text_corpus): + monkeypatch.setattr("sys.stdin", StringIO("y\nword\nn\n")) + + model.gen_files( + method="lda", + text_corpus=long_text_corpus, + input_language="english", + num_keywords=10, + topic_nums_to_compare=[10, 11], + prompt_remove_words=True, + verbose=False, + incl_most_freq=True, + org_by_pos=True, + incl_visuals=True, + zip_results=True, + ) + + os.remove("text_corpus_kws.zip") diff --git a/tests/test_visuals.py b/tests/test_visuals.py index 613e434..bbc09df 100644 --- a/tests/test_visuals.py +++ b/tests/test_visuals.py @@ -28,6 +28,24 @@ def test_graph_topic_num_evals(monkeypatch, long_text_corpus): ) +def test_return_ideal_metrics(long_text_corpus): + assert ( + type( + visuals.graph_topic_num_evals( + method=["lda"], + text_corpus=long_text_corpus, + input_language="english", + num_keywords=10, + topic_nums_to_compare=[9, 10], + save_file=False, + return_ideal_metrics=True, + verbose=False, + )[1] + ) + == int + ) + + def test_gen_word_cloud(monkeypatch, long_text_corpus): monkeypatch.setattr(plt, "show", lambda: None) visuals.gen_word_cloud( @@ -36,7 +54,17 @@ def test_gen_word_cloud(monkeypatch, long_text_corpus): ignore_words="word", save_file=False, ) - assert True + + +def test_gen_word_cloud_zip(monkeypatch, long_text_corpus): + monkeypatch.setattr(plt, "show", lambda: None) + visuals.gen_word_cloud( + text_corpus=long_text_corpus, + input_language="english", + ignore_words="word", + save_file="tests/test.zip", + ) + os.remove("tests/test.zip") def test_pyLDAvis_topics(long_text_corpus): @@ -55,6 +83,22 @@ def test_pyLDAvis_topics(long_text_corpus): os.remove("tests/lda_topics.html") +def test_pyLDAvis_topics_zip(long_text_corpus): + visuals.pyLDAvis_topics( + method="lda", + text_corpus=long_text_corpus, + input_language="english", + num_topics=10, + min_freq=2, + min_word_len=3, + sample_size=1, + save_file="tests/test.zip", + display_ipython=False, + ) + + os.remove("tests/test.zip") + + def test_t_sne(monkeypatch, long_text_corpus): monkeypatch.setattr(plt, "show", lambda: None) visuals.t_sne(