diff --git a/CHANGELOG.md b/CHANGELOG.md index 96ee3186..81f6c5f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,14 @@ Emojis for the following are chosen based on [gitmoji](https://gitmoji.dev/). - Scribe-Data now outputs an SQLite table that has keys for target languages for each base language. --> +## Scribe-Data 3.2.1 + +### ♻️ Code Refactoring + +- The docs and tests were grafted into the package using `MANIFEST.in`. +- Minor fixes to file and function docstrings and documentation files. +- `include_package_data=True` is used in `setup.py` to hopefully include all files in the package distribution. + ## Scribe-Data 3.2.0 ### ✨ Features diff --git a/MANIFEST.in b/MANIFEST.in index d254fabb..7b2724b8 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,7 @@ -include CHANGELOG.* LICENSE.* +include CHANGELOG.* CONTRIBUTING.* LICENSE.* graft src +graft docs +prune docs/build +graft tests global-exclude *.py[cod] global-exclude .DS_Store diff --git a/docs/source/conf.py b/docs/source/conf.py index 0b194dab..0d3a3f7b 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -27,7 +27,7 @@ author = "Scribe-Data developers" # The full version, including alpha/beta/rc tags -release = "3.2.0" +release = "3.2.1" # -- General configuration --------------------------------------------------- diff --git a/docs/source/utils.rst b/docs/source/utils.rst index 0606fca6..e016b8eb 100644 --- a/docs/source/utils.rst +++ b/docs/source/utils.rst @@ -1,8 +1,6 @@ utils ===== -The :py:mod:`utils` module provides utility functions for data extraction, formatting and loading. - .. automodule:: scribe_data.utils :members: :private-members: diff --git a/setup.py b/setup.py index 30aaa311..80c601da 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ name="scribe-data", packages=find_packages(where="src"), package_dir={"": "src"}, - version="3.2.0", + version="3.2.1", author="Andrew Tavis McAllister", author_email="andrew.t.mcallister@gmail.com", classifiers=[ @@ -42,6 +42,7 @@ python_requires=">=3.9", install_requires=requirements, package_data={"": ["2021_ranked.tsv"]}, + include_package_data=True, description="Wikidata and Wikipedia language data extraction", long_description=long_description, long_description_content_type="text/markdown", diff --git a/src/scribe_data/checkquery.py b/src/scribe_data/checkquery.py index 1714bcd9..c9949443 100755 --- a/src/scribe_data/checkquery.py +++ b/src/scribe_data/checkquery.py @@ -4,10 +4,10 @@ Command line tool for testing SPARQl queries against an endpoint. Contents: - QueryFile Class: + QueryFile Class load, __repr__, - QueryExecutionException: + QueryExecutionException Class __init__, __str__, ping, @@ -20,7 +20,7 @@ check_timeout, main, error_report, - success_report, + success_report """ import argparse @@ -103,7 +103,7 @@ def ping(url: str, timeout: int) -> bool: Test if a URL is reachable. Parameters - --------- + ---------- url : str The URL to test. diff --git a/src/scribe_data/extract_transform/update_words_to_translate.py b/src/scribe_data/extract_transform/update_words_to_translate.py index 8fed013f..ca238904 100644 --- a/src/scribe_data/extract_transform/update_words_to_translate.py +++ b/src/scribe_data/extract_transform/update_words_to_translate.py @@ -17,6 +17,7 @@ import json import os import sys +import urllib from SPARQLWrapper import JSON, POST, SPARQLWrapper from tqdm.auto import tqdm @@ -25,7 +26,7 @@ PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src" sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) -from scribe_data.utils import ( +from scribe_data.utils import ( # noqa: E402 check_and_return_command_line_args, get_language_qid, get_scribe_languages, @@ -47,37 +48,37 @@ if languages is None: languages = get_scribe_languages() -for l in tqdm( +for lang in tqdm( languages, desc="Data updated", unit="languages", ): - print(f"Querying words for {l}...") + print(f"Querying words for {lang}...") # First format the lines into a multi-line string and then pass this to SPARQLWrapper. with open("query_words_to_translate.sparql", encoding="utf-8") as file: query_lines = file.readlines() - query = "".join(query_lines).replace("LANGUAGE_QID", get_language_qid(l)) + query = "".join(query_lines).replace("LANGUAGE_QID", get_language_qid(lang)) sparql.setQuery(query) results = None try: results = sparql.query().convert() - except HTTPError as err: - print(f"HTTPError with query_words_to_translate.sparql for {l}: {err}") + except urllib.error.HTTPError as err: + print(f"HTTPError with query_words_to_translate.sparql for {lang}: {err}") if results is None: print( - f"Nothing returned by the WDQS server for query_words_to_translate.sparql for {l}" + f"Nothing returned by the WDQS server for query_words_to_translate.sparql for {lang}" ) # Allow for a query to be reran up to two times. - if languages.count(l) < 3: - languages.append(l) + if languages.count(lang) < 3: + languages.append(lang) else: # Subset the returned JSON and the individual results before saving. - print(f"Success! Formatting {l} words...") + print(f"Success! Formatting {lang} words...") query_results = results["results"]["bindings"] results_formatted = [] @@ -87,11 +88,11 @@ results_formatted.append(r_dict) with open( - f"{PATH_TO_ET_FILES}{l}/translations/words_to_translate.json", + f"{PATH_TO_ET_FILES}{lang}/translations/words_to_translate.json", "w", encoding="utf-8", ) as f: json.dump(results_formatted, f, ensure_ascii=False, indent=0) print( - f"Wrote the words to translate to {PATH_TO_ET_FILES}{l}/translations/words_to_translate.json" + f"Wrote the words to translate to {PATH_TO_ET_FILES}{lang}/translations/words_to_translate.json" ) diff --git a/src/scribe_data/utils.py b/src/scribe_data/utils.py index e52b753e..fcbbdac8 100644 --- a/src/scribe_data/utils.py +++ b/src/scribe_data/utils.py @@ -1,7 +1,4 @@ """ -Update Utils ------------- - Utility functions for data extraction, formatting and loading. Contents: @@ -37,7 +34,8 @@ def _load_json(package_path: str, file_name: str, root: str): - """Loads a JSON resource from a package into a python entity. + """ + Loads a JSON resource from a package into a python entity. Parameters ---------- @@ -54,8 +52,7 @@ def _load_json(package_path: str, file_name: str, root: str): ------- A python entity starting at 'root'. """ - # add 'Scribe-Data/src' to PYTHONPATH so that resources.files() - # can find 'package_path' + # Add 'Scribe-Data/src' to PYTHONPATH so that resources.files() can find 'package_path'. parts = Path(__file__).resolve().parts prj_root_idx = parts.index(PROJECT_ROOT) package_root = str(Path(*parts[: prj_root_idx + 1], "src")) @@ -71,7 +68,7 @@ def _load_json(package_path: str, file_name: str, root: str): _languages = _load_json( - package_path="scribe_data.resources", + package_path="scribe_data/resources", file_name="language_meta_data.json", root="languages", )