diff --git a/.gitignore b/.gitignore index b4c5dcc..80d4dec 100644 --- a/.gitignore +++ b/.gitignore @@ -13,7 +13,11 @@ venv/ .ipynb_checkpoints */.ipynb_checkpoints/* -# Testing +# Unit test / coverage reports +htmlcov/ +.coverage +.coverage.* +coverage.xml .pytest_cache/ # PyHelpers @@ -22,4 +26,5 @@ tests/* !tests/data/ !tests/documents/ !tests/images/ +!tests/*.py .pypirc diff --git a/pyhelpers/ops.py b/pyhelpers/ops.py index d49fb4c..5dec1ec 100644 --- a/pyhelpers/ops.py +++ b/pyhelpers/ops.py @@ -943,6 +943,7 @@ def merge_dicts(*dicts): >>> dict_1 = merged_dict >>> dict_2 = {'b': 2, 'c': 4, 'd': [5, 6]} >>> merged_dict = merge_dicts(dict_1, dict_2) + >>> merged_dict {'a': 1, 'b': 2, 'c': [[3, 4], 4], 'd': [5, 6]} """ diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/csr_mat.npz b/tests/data/csr_mat.npz index 442e2fb..72db673 100644 Binary files a/tests/data/csr_mat.npz and b/tests/data/csr_mat.npz differ diff --git a/tests/data/dat.feather b/tests/data/dat.feather index 0c3db79..cb8f8b7 100644 Binary files a/tests/data/dat.feather and b/tests/data/dat.feather differ diff --git a/tests/data/dat.xlsx b/tests/data/dat.xlsx index 6d1fa36..37aca32 100644 Binary files a/tests/data/dat.xlsx and b/tests/data/dat.xlsx differ diff --git a/tests/images/store-save_fig-demo.svg b/tests/images/store-save_fig-demo.svg index 2b0a7dd..d5593a4 100644 --- a/tests/images/store-save_fig-demo.svg +++ b/tests/images/store-save_fig-demo.svg @@ -6,11 +6,11 @@ - 2022-05-02T19:22:05.899496 + 2022-08-15T12:13:37.703681 image/svg+xml - Matplotlib v3.5.1, https://matplotlib.org/ + Matplotlib v3.5.3, https://matplotlib.org/ @@ -41,12 +41,12 @@ z - - + @@ -121,7 +121,7 @@ z - + @@ -163,7 +163,7 @@ z - + @@ -199,7 +199,7 @@ z - + @@ -248,7 +248,7 @@ z - + @@ -305,7 +305,7 @@ z - + @@ -322,12 +322,12 @@ z - - + @@ -342,7 +342,7 @@ L -3.5 0 - + @@ -357,7 +357,7 @@ L -3.5 0 - + @@ -372,7 +372,7 @@ L -3.5 0 - + @@ -387,7 +387,7 @@ L -3.5 0 - + @@ -402,7 +402,7 @@ L -3.5 0 - + @@ -418,7 +418,7 @@ L -3.5 0 +" clip-path="url(#p5958cec24f)" style="fill: none; stroke: #1f77b4; stroke-width: 1.5; stroke-linecap: square"/> + diff --git a/tests/test_ops.py b/tests/test_ops.py new file mode 100644 index 0000000..45cdb0a --- /dev/null +++ b/tests/test_ops.py @@ -0,0 +1,79 @@ +"""Test ops.py""" + +import datetime + +import pytest + + +def test_eval_dtype(capfd): + from pyhelpers.ops import eval_dtype + + val_1 = '1' + origin_val = eval_dtype(val_1) + assert origin_val == 1 + + val_2 = '1.1.1' + origin_val = eval_dtype(val_2) + assert origin_val == '1.1.1' + + +def test_gps_to_utc(): + from pyhelpers.ops import gps_to_utc + + utc_dt = gps_to_utc(gps_time=1271398985.7822514) + + assert utc_dt == datetime.datetime(2020, 4, 20, 6, 23, 5, 782251) + + +def test_parse_size(): + from pyhelpers.ops import parse_size + + assert parse_size(size='123.45 MB') == 129446707 + assert parse_size(size='123.45 MB', binary=False) == 123450000 + assert parse_size(size='123.45 MiB', binary=True) == 129446707 + assert parse_size(size='123.45 MiB', binary=False) == 129446707 + assert parse_size(size=129446707, precision=2) == '123.45 MiB' + assert parse_size(size=129446707, binary=False, precision=2) == '129.45 MB' + + +def test_update_dict_keys(): + from pyhelpers.ops import update_dict_keys + + source_dict = {'a': 1, 'b': 2, 'c': 3} + + upd_dict = update_dict_keys(source_dict, replacements=None) + assert upd_dict == {'a': 1, 'b': 2, 'c': 3} + + repl_keys = {'a': 'd', 'c': 'e'} + upd_dict = update_dict_keys(source_dict, replacements=repl_keys) + assert upd_dict == {'d': 1, 'b': 2, 'e': 3} + + source_dict = {'a': 1, 'b': 2, 'c': {'d': 3, 'e': {'f': 4, 'g': 5}}} + + repl_keys = {'d': 3, 'f': 4} + upd_dict = update_dict_keys(source_dict, replacements=repl_keys) + assert upd_dict == {'a': 1, 'b': 2, 'c': {3: 3, 'e': {4: 4, 'g': 5}}} + + +def test_merge_dicts(): + from pyhelpers.ops import merge_dicts + + dict_a = {'a': 1} + dict_b = {'b': 2} + dict_c = {'c': 3} + + merged_dict = merge_dicts(dict_a, dict_b, dict_c) + assert merged_dict == {'a': 1, 'b': 2, 'c': 3} + + dict_c_ = {'c': 4} + merged_dict = merge_dicts(merged_dict, dict_c_) + assert merged_dict == {'a': 1, 'b': 2, 'c': [3, 4]} + + dict_1 = merged_dict + dict_2 = {'b': 2, 'c': 4, 'd': [5, 6]} + merged_dict = merge_dicts(dict_1, dict_2) + assert merged_dict == {'a': 1, 'b': 2, 'c': [[3, 4], 4], 'd': [5, 6]} + + +if __name__ == '__main__': + pytest.main() diff --git a/tests/test_store.py b/tests/test_store.py new file mode 100644 index 0000000..4df3e30 --- /dev/null +++ b/tests/test_store.py @@ -0,0 +1,314 @@ +"""Test store.py""" + +import json +import os + +import numpy as np +import pytest + +from pyhelpers._cache import example_dataframe +from pyhelpers.dirs import cd + + +def test__check_path_to_file(capfd): + from pyhelpers.store import _check_path_to_file + + file_path = os.getcwd() + try: + _check_path_to_file(file_path, verbose=True) + except AssertionError as e: + print(e) + + out, err = capfd.readouterr() + assert out == "The input for `path_to_file` may not be a file path.\n" + + file_path = cd("pyhelpers.pdf") + _check_path_to_file(file_path, verbose=True) + print("Passed.") + + out, err = capfd.readouterr() + assert out == "Saving \"pyhelpers.pdf\" ... Passed.\n" + + file_path = cd("tests\\documents", "pyhelpers.pdf") + _check_path_to_file(file_path, verbose=True) + print("Passed.") + + out, err = capfd.readouterr() + assert out == "Updating \"pyhelpers.pdf\" at \"tests\\documents\\\" ... Passed.\n" + + +def test__check_loading_path(capfd): + from pyhelpers.store import _check_loading_path + + file_path = cd("tests\\documents", "pyhelpers.pdf") + _check_loading_path(file_path, verbose=True) + print("Passed.") + + out, err = capfd.readouterr() + assert out == "Loading \"tests\\documents\\pyhelpers.pdf\" ... Passed.\n" + + +def test__check_exe_pathname(): + from pyhelpers.store import _check_exe_pathname + + possibilities = ["C:\\Python39\\python.exe", "C:\\Program Files\\Python39\\python.exe"] + + python_exists, path_to_exe = _check_exe_pathname("python.exe", None, possibilities) + assert path_to_exe.endswith('python.exe') + assert os.path.basename(path_to_exe) == 'python.exe' + + +def test__set_index(): + from pyhelpers.store import _set_index + + example_df = example_dataframe() + + assert example_df.equals(_set_index(example_df)) + + example_df_ = _set_index(example_df, index=0) + assert example_df.iloc[:, 0].to_list() == example_df_.index.to_list() + + +def test_save_pickle(capfd): + from pyhelpers.store import save_pickle + + pickle_dat = 1 + + pickle_pathname = cd("tests\\data", "dat.pickle") + save_pickle(pickle_dat, pickle_pathname, verbose=True) + + assert os.path.exists(pickle_pathname) + + out, err = capfd.readouterr() + assert out == 'Updating "dat.pickle" at "tests\\data\\" ... Done.\n' + + pickle_dat = example_dataframe() + save_pickle(pickle_dat, pickle_pathname, verbose=True) + + out, err = capfd.readouterr() + assert out == 'Updating "dat.pickle" at "tests\\data\\" ... Done.\n' + + +def test_save_spreadsheet(capfd): + from pyhelpers.store import save_spreadsheet + + spreadsheet_dat = example_dataframe() + + spreadsheet_pathname = cd("tests\\data", "dat.csv") + save_spreadsheet(spreadsheet_dat, spreadsheet_pathname, index=True, verbose=True) + + out, err = capfd.readouterr() + assert out == 'Updating "dat.csv" at "tests\\data\\" ... Done.\n' + + spreadsheet_pathname = cd("tests\\data", "dat.xlsx") + save_spreadsheet(spreadsheet_dat, spreadsheet_pathname, index=True, verbose=True) + + out, err = capfd.readouterr() + assert out == 'Updating "dat.xlsx" at "tests\\data\\" ... Done.\n' + + +def test_save_spreadsheets(capfd): + from pyhelpers.store import save_spreadsheets + + dat1 = example_dataframe() # Get an example dataframe + + dat2 = dat1.T + + dat = [dat1, dat2] + dat_sheets = ['TestSheet1', 'TestSheet2'] + dat_pathname = cd("tests\\data", "dat.xlsx") + + save_spreadsheets(dat, dat_sheets, dat_pathname, index=True, verbose=True) + out, err = capfd.readouterr() + assert out == 'Updating "dat.xlsx" at "tests\\data\\" ... \n' \ + '\t\'TestSheet1\' ... Done.\n' \ + '\t\'TestSheet2\' ... Done.\n' + + save_spreadsheets( + dat, dat_sheets, dat_pathname, mode='a', index=True, verbose=True, + confirmation_required=False, if_sheet_exists='new') + out, err = capfd.readouterr() + assert out == 'Updating "dat.xlsx" at "tests\\data\\" ... \n' \ + '\t\'TestSheet1\' ... saved as \'TestSheet11\' ... Done.\n' \ + '\t\'TestSheet2\' ... saved as \'TestSheet21\' ... Done.\n' + + save_spreadsheets( + dat, dat_sheets, dat_pathname, mode='a', index=True, confirmation_required=False, + verbose=True) + out, err = capfd.readouterr() + assert out == 'Updating "dat.xlsx" at "tests\\data\\" ... \n' \ + '\t\'TestSheet1\' ... Failed. ' \ + 'Sheet \'TestSheet1\' already exists and if_sheet_exists is set to \'error\'.\n' \ + '\t\'TestSheet2\' ... Failed. ' \ + 'Sheet \'TestSheet2\' already exists and if_sheet_exists is set to \'error\'.\n' + + save_spreadsheets( + dat, dat_sheets, dat_pathname, mode='a', index=True, if_sheet_exists='replace', + confirmation_required=False, verbose=True) + out, err = capfd.readouterr() + assert out == 'Updating "dat.xlsx" at "tests\\data\\" ... \n' \ + '\t\'TestSheet1\' ... Done.\n' \ + '\t\'TestSheet2\' ... Done.\n' + + +def test_save_json(capfd): + from pyhelpers.store import save_json + + json_pathname = cd("tests\\data", "dat.json") + + json_dat = {'a': 1, 'b': 2, 'c': 3, 'd': ['a', 'b', 'c']} + save_json(json_dat, json_pathname, indent=4, verbose=True) + out, err = capfd.readouterr() + assert out == 'Updating "dat.json" at "tests\\data\\" ... Done.\n' + + example_df = example_dataframe() + json_dat = json.loads(example_df.to_json(orient='index')) + + save_json(json_dat, json_pathname, indent=4, verbose=True) + out, err = capfd.readouterr() + assert out == 'Updating "dat.json" at "tests\\data\\" ... Done.\n' + + save_json(json_dat, json_pathname, engine='orjson', verbose=True) + out, err = capfd.readouterr() + assert out == 'Updating "dat.json" at "tests\\data\\" ... Done.\n' + + save_json(json_dat, json_pathname, engine='ujson', indent=4, verbose=True) + out, err = capfd.readouterr() + assert out == 'Updating "dat.json" at "tests\\data\\" ... Done.\n' + + save_json(json_dat, json_pathname, engine='rapidjson', indent=4, verbose=True) + out, err = capfd.readouterr() + assert out == 'Updating "dat.json" at "tests\\data\\" ... Done.\n' + + +def test_save_joblib(capfd): + from pyhelpers.store import save_joblib + + joblib_pathname = cd("tests\\data", "dat.joblib") + + joblib_dat = example_dataframe().to_numpy() + + save_joblib(joblib_dat, joblib_pathname, verbose=True) + out, err = capfd.readouterr() + assert out == 'Updating "dat.joblib" at "tests\\data\\" ... Done.\n' + + np.random.seed(0) + joblib_dat = np.random.rand(100, 100) + save_joblib(joblib_dat, joblib_pathname, verbose=True) + out, err = capfd.readouterr() + assert out == 'Updating "dat.joblib" at "tests\\data\\" ... Done.\n' + + +def test_save_feather(capfd): + from pyhelpers.store import save_feather + + feather_dat = example_dataframe() + + feather_pathname = cd("tests\\data", "dat.feather") + + save_feather(feather_dat, feather_pathname, verbose=True) + out, err = capfd.readouterr() + assert out == 'Updating "dat.feather" at "tests\\data\\" ... Done.\n' + + save_feather(feather_dat, feather_pathname, index=True, verbose=True) + out, err = capfd.readouterr() + assert out == 'Updating "dat.feather" at "tests\\data\\" ... Done.\n' + + +def test_save_svg_as_emf(capfd): + from pyhelpers.store import save_svg_as_emf + from pyhelpers.settings import mpl_preferences + import matplotlib.pyplot as plt + + img_dir = cd("tests\\images") + + mpl_preferences() + + x, y = (1, 1), (2, 2) + plt.figure() + plt.plot([x[0], y[0]], [x[1], y[1]]) + # plt.show() + + svg_file_pathname = cd(img_dir, "store-save_fig-demo.svg") + plt.savefig(svg_file_pathname) # Save the figure as a .svg file + + emf_file_pathname = cd(img_dir, "store-save_fig-demo.emf") + save_svg_as_emf(svg_file_pathname, emf_file_pathname, verbose=True) + out, err = capfd.readouterr() + assert out == 'Updating "store-save_fig-demo.emf" at "tests\\images\\" ... Done.\n' + + plt.close() + + +def test_load_data(capfd): + from pyhelpers.store import load_data + + data_dir = cd("tests\\data") + + dat_pathname = cd(data_dir, "dat.pickle") + pickle_dat = load_data(path_to_file=dat_pathname, verbose=True) + out, err = capfd.readouterr() + assert out == 'Loading "tests\\data\\dat.pickle" ... Done.\n' + assert pickle_dat.equals(example_dataframe()) + + dat_pathname = cd(data_dir, "dat.csv") + csv_dat = load_data(path_to_file=dat_pathname, index=0, verbose=True) + out, err = capfd.readouterr() + assert out == 'Loading "tests\\data\\dat.csv" ... Done.\n' + assert csv_dat.astype('float').equals(example_dataframe()) + + dat_pathname = cd(data_dir, "dat.json") + json_dat = load_data(path_to_file=dat_pathname, verbose=True) + out, err = capfd.readouterr() + assert out == 'Loading "tests\\data\\dat.json" ... Done.\n' + assert list(json_dat.keys()) == example_dataframe().index.to_list() + + dat_pathname = cd(data_dir, "dat.feather") + feather_dat = load_data(path_to_file=dat_pathname, index=0, verbose=True) + out, err = capfd.readouterr() + assert out == 'Loading "tests\\data\\dat.feather" ... Done.\n' + assert feather_dat.equals(example_dataframe()) + + dat_pathname = cd(data_dir, "dat.joblib") + joblib_dat = load_data(path_to_file=dat_pathname, verbose=True) + out, err = capfd.readouterr() + assert out == 'Loading "tests\\data\\dat.joblib" ... Done.\n' + np.random.seed(0) + assert np.array_equal(joblib_dat, np.random.rand(100, 100)) + + +def test_markdown_to_rst(capfd): + from pyhelpers.store import markdown_to_rst + + dat_dir = cd("tests\\documents") + + path_to_md_file = cd(dat_dir, "readme.md") + path_to_rst_file = cd(dat_dir, "readme.rst") + + markdown_to_rst(path_to_md_file, path_to_rst_file, engine='pypandoc', verbose=True) + out, err = capfd.readouterr() + assert out == 'Updating "readme.rst" at "tests\\documents\\" ... Done.\n' + + markdown_to_rst(path_to_md_file, path_to_rst_file, verbose=True) + out, err = capfd.readouterr() + assert out == 'Updating "readme.rst" at "tests\\documents\\" ... Done.\n' + + +def test_xlsx_to_csv(capfd): + from pyhelpers.store import xlsx_to_csv, load_csv + + path_to_test_xlsx = cd("tests\\data", "dat.xlsx") + + path_to_temp_csv = xlsx_to_csv(path_to_test_xlsx) + assert os.path.exists(path_to_temp_csv) + + dat = load_csv(path_to_temp_csv, index=0) + assert dat.astype('float16').equals(example_dataframe().astype('float16')) + + path_to_temp_csv = xlsx_to_csv(path_to_test_xlsx, engine='xlsx2csv') + dat = load_csv(path_to_temp_csv, index=0) + assert dat.astype('float16').equals(example_dataframe().astype('float16')) + + +if __name__ == '__main__': + pytest.main() diff --git a/tests/test_text.py b/tests/test_text.py new file mode 100644 index 0000000..891ed84 --- /dev/null +++ b/tests/test_text.py @@ -0,0 +1,259 @@ +"""Test text.py""" + +import pytest + + +def test_get_acronym(): + from pyhelpers.text import get_acronym + + text_a = 'This is an apple.' + assert get_acronym(text_a) == 'TIAA' + + text_b = "I'm at the University of Birmingham." + assert get_acronym(text_b, only_capitals=True) == 'IUB' + + text_c = 'There is a "ConnectionError"!' + assert get_acronym(text_c, capitals_in_words=True) == 'TCE' + + +def test_remove_punctuation(): + from pyhelpers.text import remove_punctuation + + raw_text = 'Hello world!\tThis is a test. :-)' + + result_1 = remove_punctuation(raw_text) + assert result_1 == 'Hello world This is a test' + + result_2 = remove_punctuation(raw_text, rm_whitespace=False) + assert result_2 == 'Hello world \tThis is a test' + + +def test_extract_words1upper(): + from pyhelpers.text import extract_words1upper + + x1 = 'Network_Waymarks' + assert extract_words1upper(x1) == ['Network', 'Waymarks'] + + x2 = 'NetworkRailRetainingWall' + assert extract_words1upper(x2, join_with=' ') == 'Network Rail Retaining Wall' + + +def test_numeral_english_to_arabic(): + from pyhelpers.text import numeral_english_to_arabic + + assert numeral_english_to_arabic('one') == 1 + + assert numeral_english_to_arabic('one hundred and one') == 101 + + assert numeral_english_to_arabic('a thousand two hundred and three') == 1203 + + assert numeral_english_to_arabic('200 and five') == 205 + + with pytest.raises(Exception, match='Illegal word: "fivety"'): + numeral_english_to_arabic('Two hundred and fivety') # Two hundred and fifty + + +def test_count_words(): + from pyhelpers.text import count_words, remove_punctuation + + raw_text = 'This is an apple. That is a pear. Hello world!' + + assert count_words(raw_text) == { + 'This': 1, + 'is': 2, + 'an': 1, + 'apple': 1, + '.': 2, + 'That': 1, + 'a': 1, + 'pear': 1, + 'Hello': 1, + 'world': 1, + '!': 1, + } + + assert count_words(remove_punctuation(raw_text)) == { + 'This': 1, + 'is': 2, + 'an': 1, + 'apple': 1, + 'That': 1, + 'a': 1, + 'pear': 1, + 'Hello': 1, + 'world': 1, + } + + +def test_calculate_idf(): + from pyhelpers.text import calculate_idf + + raw_doc = [ + 'This is an apple.', + 'That is a pear.', + 'It is human being.', + 'Hello world!'] + + docs_tf_, corpus_idf_ = calculate_idf(raw_doc, rm_punc=False) + assert docs_tf_ == [ + {'This': 1, 'is': 1, 'an': 1, 'apple': 1, '.': 1}, + {'That': 1, 'is': 1, 'a': 1, 'pear': 1, '.': 1}, + {'It': 1, 'is': 1, 'human': 1, 'being': 1, '.': 1}, + {'Hello': 1, 'world': 1, '!': 1}] + assert corpus_idf_ == { + 'This': 0.6931471805599453, + 'is': 0.0, + 'an': 0.6931471805599453, + 'apple': 0.6931471805599453, + '.': 0.0, + 'That': 0.6931471805599453, + 'a': 0.6931471805599453, + 'pear': 0.6931471805599453, + 'It': 0.6931471805599453, + 'human': 0.6931471805599453, + 'being': 0.6931471805599453, + 'Hello': 0.6931471805599453, + 'world': 0.6931471805599453, + '!': 0.6931471805599453} + + docs_tf_, corpus_idf_ = calculate_idf(raw_doc, rm_punc=True) + assert docs_tf_ == [ + {'This': 1, 'is': 1, 'an': 1, 'apple': 1}, + {'That': 1, 'is': 1, 'a': 1, 'pear': 1}, + {'It': 1, 'is': 1, 'human': 1, 'being': 1}, + {'Hello': 1, 'world': 1}] + assert corpus_idf_ == { + 'This': 0.6931471805599453, + 'is': 0.0, + 'an': 0.6931471805599453, + 'apple': 0.6931471805599453, + 'That': 0.6931471805599453, + 'a': 0.6931471805599453, + 'pear': 0.6931471805599453, + 'It': 0.6931471805599453, + 'human': 0.6931471805599453, + 'being': 0.6931471805599453, + 'Hello': 0.6931471805599453, + 'world': 0.6931471805599453} + + +def test_calculate_tf_idf(): + from pyhelpers.text import calculate_tf_idf + + raw_doc = [ + 'This is an apple.', + 'That is a pear.', + 'It is human being.', + 'Hello world!'] + + docs_tf_idf_ = calculate_tf_idf(raw_documents=raw_doc) + assert docs_tf_idf_ == { + 'This': 0.6931471805599453, + 'is': 0.0, + 'an': 0.6931471805599453, + 'apple': 0.6931471805599453, + '.': 0.0, + 'That': 0.6931471805599453, + 'a': 0.6931471805599453, + 'pear': 0.6931471805599453, + 'It': 0.6931471805599453, + 'human': 0.6931471805599453, + 'being': 0.6931471805599453, + 'Hello': 0.6931471805599453, + 'world': 0.6931471805599453, + '!': 0.6931471805599453} + + docs_tf_idf_ = calculate_tf_idf(raw_documents=raw_doc, rm_punc=True) + assert docs_tf_idf_ == { + 'This': 0.6931471805599453, + 'is': 0.0, + 'an': 0.6931471805599453, + 'apple': 0.6931471805599453, + 'That': 0.6931471805599453, + 'a': 0.6931471805599453, + 'pear': 0.6931471805599453, + 'It': 0.6931471805599453, + 'human': 0.6931471805599453, + 'being': 0.6931471805599453, + 'Hello': 0.6931471805599453, + 'world': 0.6931471805599453} + + +def test_euclidean_distance_between_texts(): + from pyhelpers.text import euclidean_distance_between_texts + + txt_1, txt_2 = 'This is an apple.', 'That is a pear.' + + euclidean_distance = euclidean_distance_between_texts(txt_1, txt_2) + assert euclidean_distance == 2.449489742783178 + + +def test_cosine_similarity_between_texts(): + from pyhelpers.text import cosine_similarity_between_texts + + txt_1, txt_2 = 'This is an apple.', 'That is a pear.' + + cos_sim = cosine_similarity_between_texts(txt_1, txt_2) + assert cos_sim == 0.25 + + cos_dist = cosine_similarity_between_texts(txt_1, txt_2, cosine_distance=True) # 1 - cos_sim + assert cos_dist == 0.75 + + +def test_find_matched_str(): + from pyhelpers.text import find_matched_str + + lookup_lst = ['abc', 'aapl', 'app', 'ap', 'ape', 'apex', 'apel'] + res = find_matched_str('apple', lookup_lst) + assert list(res) == [] + + lookup_lst += ['apple'] + assert lookup_lst == ['abc', 'aapl', 'app', 'ap', 'ape', 'apex', 'apel', 'apple'] + + res = find_matched_str('apple', lookup_lst) + assert list(res) == ['apple'] + + res = find_matched_str(r'app(le)?', lookup_lst) + assert list(res) == ['app', 'apple'] + + +def test_find_similar_str(): + from pyhelpers.text import find_similar_str + + lookup_lst = [ + 'Anglia', + 'East Coast', + 'East Midlands', + 'North and East', + 'London North Western', + 'Scotland', + 'South East', + 'Wales', + 'Wessex', + 'Western'] + + y = find_similar_str(x='angle', lookup_list=lookup_lst) + assert y == 'Anglia' + y = find_similar_str(x='angle', lookup_list=lookup_lst, n=2) + assert y == ['Anglia', 'Wales'] + + y = find_similar_str(x='angle', lookup_list=lookup_lst, engine='fuzzywuzzy') + assert y == 'Anglia' + y = find_similar_str('angle', lookup_lst, n=2, engine='fuzzywuzzy') + assert y == ['Anglia', 'Wales'] + + y = find_similar_str(x='x', lookup_list=lookup_lst) + assert y is None + y = find_similar_str(x='x', lookup_list=lookup_lst, cutoff=0.25) + assert y == 'Wessex' + y = find_similar_str(x='x', lookup_list=lookup_lst, n=2, cutoff=0.25) + assert y == 'Wessex' + + y = find_similar_str(x='x', lookup_list=lookup_lst, engine='fuzzywuzzy') + assert y == 'Wessex' + y = find_similar_str(x='x', lookup_list=lookup_lst, n=2, engine='fuzzywuzzy') + assert y == ['Wessex', 'Western'] + + +if __name__ == '__main__': + pytest.main()