Package changes to hopefully include non-.py files

scribe-org · Feb 24, 2024 · d55bf2a · d55bf2a
1 parent 883d098
commit d55bf2a
Show file tree

Hide file tree

Showing 8 changed files with 36 additions and 28 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -16,6 +16,14 @@ Emojis for the following are chosen based on [gitmoji](https://gitmoji.dev/).
   - Scribe-Data now outputs an SQLite table that has keys for target languages for each base language. -->
 <!-- - English has been added to the data ETL process. -->
 
+## Scribe-Data 3.2.1
+
+### ♻️ Code Refactoring
+
+- The docs and tests were grafted into the package using `MANIFEST.in`.
+- Minor fixes to file and function docstrings and documentation files.
+- `include_package_data=True` is used in `setup.py` to hopefully include all files in the package distribution.
+
 ## Scribe-Data 3.2.0
 
 ### ✨ Features

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,4 +1,7 @@
-include CHANGELOG.* LICENSE.*
+include CHANGELOG.* CONTRIBUTING.* LICENSE.*
 graft src
+graft docs
+prune docs/build
+graft tests
 global-exclude *.py[cod]
 global-exclude .DS_Store
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -27,7 +27,7 @@
 author = "Scribe-Data developers"
 
 # The full version, including alpha/beta/rc tags
-release = "3.2.0"
+release = "3.2.1"
 
 
 # -- General configuration ---------------------------------------------------

diff --git a/docs/source/utils.rst b/docs/source/utils.rst
@@ -1,8 +1,6 @@
 utils
 =====
 
-The :py:mod:`utils` module provides utility functions for data extraction, formatting and loading.
-
 .. automodule:: scribe_data.utils
     :members:
     :private-members:
diff --git a/setup.py b/setup.py
@@ -24,7 +24,7 @@
     name="scribe-data",
     packages=find_packages(where="src"),
     package_dir={"": "src"},
-    version="3.2.0",
+    version="3.2.1",
     author="Andrew Tavis McAllister",
     author_email="[email protected]",
     classifiers=[
@@ -42,6 +42,7 @@
     python_requires=">=3.9",
     install_requires=requirements,
     package_data={"": ["2021_ranked.tsv"]},
+    include_package_data=True,
     description="Wikidata and Wikipedia language data extraction",
     long_description=long_description,
     long_description_content_type="text/markdown",

diff --git a/src/scribe_data/checkquery.py b/src/scribe_data/checkquery.py
@@ -4,10 +4,10 @@
 Command line tool for testing SPARQl queries against an endpoint.
 
 Contents:
-    QueryFile Class:
+    QueryFile Class
         load,
         __repr__,
-    QueryExecutionException:
+    QueryExecutionException Class
         __init__,
         __str__,
     ping,
@@ -20,7 +20,7 @@
     check_timeout,
     main,
     error_report,
-    success_report,
+    success_report
 """
 
 import argparse
@@ -103,7 +103,7 @@ def ping(url: str, timeout: int) -> bool:
     Test if a URL is reachable.
 
     Parameters
-    ---------
+    ----------
         url : str
             The URL to test.
 

diff --git a/src/scribe_data/extract_transform/update_words_to_translate.py b/src/scribe_data/extract_transform/update_words_to_translate.py
@@ -17,6 +17,7 @@
 import json
 import os
 import sys
+import urllib
 
 from SPARQLWrapper import JSON, POST, SPARQLWrapper
 from tqdm.auto import tqdm
@@ -25,7 +26,7 @@
 PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
 sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)
 
-from scribe_data.utils import (
+from scribe_data.utils import (  # noqa: E402
     check_and_return_command_line_args,
     get_language_qid,
     get_scribe_languages,
@@ -47,37 +48,37 @@
 if languages is None:
     languages = get_scribe_languages()
 
-for l in tqdm(
+for lang in tqdm(
     languages,
     desc="Data updated",
     unit="languages",
 ):
-    print(f"Querying words for {l}...")
+    print(f"Querying words for {lang}...")
     # First format the lines into a multi-line string and then pass this to SPARQLWrapper.
     with open("query_words_to_translate.sparql", encoding="utf-8") as file:
         query_lines = file.readlines()
 
-    query = "".join(query_lines).replace("LANGUAGE_QID", get_language_qid(l))
+    query = "".join(query_lines).replace("LANGUAGE_QID", get_language_qid(lang))
     sparql.setQuery(query)
 
     results = None
     try:
         results = sparql.query().convert()
-    except HTTPError as err:
-        print(f"HTTPError with query_words_to_translate.sparql for {l}: {err}")
+    except urllib.error.HTTPError as err:
+        print(f"HTTPError with query_words_to_translate.sparql for {lang}: {err}")
 
     if results is None:
         print(
-            f"Nothing returned by the WDQS server for query_words_to_translate.sparql for {l}"
+            f"Nothing returned by the WDQS server for query_words_to_translate.sparql for {lang}"
         )
 
         # Allow for a query to be reran up to two times.
-        if languages.count(l) < 3:
-            languages.append(l)
+        if languages.count(lang) < 3:
+            languages.append(lang)
 
     else:
         # Subset the returned JSON and the individual results before saving.
-        print(f"Success! Formatting {l} words...")
+        print(f"Success! Formatting {lang} words...")
         query_results = results["results"]["bindings"]
 
         results_formatted = []
@@ -87,11 +88,11 @@
             results_formatted.append(r_dict)
 
         with open(
-            f"{PATH_TO_ET_FILES}{l}/translations/words_to_translate.json",
+            f"{PATH_TO_ET_FILES}{lang}/translations/words_to_translate.json",
             "w",
             encoding="utf-8",
         ) as f:
             json.dump(results_formatted, f, ensure_ascii=False, indent=0)
             print(
-                f"Wrote the words to translate to {PATH_TO_ET_FILES}{l}/translations/words_to_translate.json"
+                f"Wrote the words to translate to {PATH_TO_ET_FILES}{lang}/translations/words_to_translate.json"
             )
diff --git a/src/scribe_data/utils.py b/src/scribe_data/utils.py
@@ -1,7 +1,4 @@
 """
-Update Utils
-------------
-
 Utility functions for data extraction, formatting and loading.
 
 Contents:
@@ -37,7 +34,8 @@
 
 
 def _load_json(package_path: str, file_name: str, root: str):
-    """Loads a JSON resource from a package into a python entity.
+    """
+    Loads a JSON resource from a package into a python entity.
 
     Parameters
     ----------
@@ -54,8 +52,7 @@ def _load_json(package_path: str, file_name: str, root: str):
     -------
         A python entity starting at 'root'.
     """
-    # add 'Scribe-Data/src' to PYTHONPATH so that resources.files()
-    # can find 'package_path'
+    # Add 'Scribe-Data/src' to PYTHONPATH so that resources.files() can find 'package_path'.
     parts = Path(__file__).resolve().parts
     prj_root_idx = parts.index(PROJECT_ROOT)
     package_root = str(Path(*parts[: prj_root_idx + 1], "src"))
@@ -71,7 +68,7 @@ def _load_json(package_path: str, file_name: str, root: str):
 
 
 _languages = _load_json(
-    package_path="scribe_data.resources",
+    package_path="scribe_data/resources",
     file_name="language_meta_data.json",
     root="languages",
 )