Skip to content

Commit

Permalink
Package changes to hopefully include non-.py files
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewtavis committed Feb 24, 2024
1 parent 883d098 commit d55bf2a
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 28 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@ Emojis for the following are chosen based on [gitmoji](https://gitmoji.dev/).
- Scribe-Data now outputs an SQLite table that has keys for target languages for each base language. -->
<!-- - English has been added to the data ETL process. -->

## Scribe-Data 3.2.1

### ♻️ Code Refactoring

- The docs and tests were grafted into the package using `MANIFEST.in`.
- Minor fixes to file and function docstrings and documentation files.
- `include_package_data=True` is used in `setup.py` to hopefully include all files in the package distribution.

## Scribe-Data 3.2.0

### ✨ Features
Expand Down
5 changes: 4 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
include CHANGELOG.* LICENSE.*
include CHANGELOG.* CONTRIBUTING.* LICENSE.*
graft src
graft docs
prune docs/build
graft tests
global-exclude *.py[cod]
global-exclude .DS_Store
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
author = "Scribe-Data developers"

# The full version, including alpha/beta/rc tags
release = "3.2.0"
release = "3.2.1"


# -- General configuration ---------------------------------------------------
Expand Down
2 changes: 0 additions & 2 deletions docs/source/utils.rst
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
utils
=====

The :py:mod:`utils` module provides utility functions for data extraction, formatting and loading.

.. automodule:: scribe_data.utils
:members:
:private-members:
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
name="scribe-data",
packages=find_packages(where="src"),
package_dir={"": "src"},
version="3.2.0",
version="3.2.1",
author="Andrew Tavis McAllister",
author_email="[email protected]",
classifiers=[
Expand All @@ -42,6 +42,7 @@
python_requires=">=3.9",
install_requires=requirements,
package_data={"": ["2021_ranked.tsv"]},
include_package_data=True,
description="Wikidata and Wikipedia language data extraction",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
8 changes: 4 additions & 4 deletions src/scribe_data/checkquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
Command line tool for testing SPARQl queries against an endpoint.
Contents:
QueryFile Class:
QueryFile Class
load,
__repr__,
QueryExecutionException:
QueryExecutionException Class
__init__,
__str__,
ping,
Expand All @@ -20,7 +20,7 @@
check_timeout,
main,
error_report,
success_report,
success_report
"""

import argparse
Expand Down Expand Up @@ -103,7 +103,7 @@ def ping(url: str, timeout: int) -> bool:
Test if a URL is reachable.
Parameters
---------
----------
url : str
The URL to test.
Expand Down
25 changes: 13 additions & 12 deletions src/scribe_data/extract_transform/update_words_to_translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import json
import os
import sys
import urllib

from SPARQLWrapper import JSON, POST, SPARQLWrapper
from tqdm.auto import tqdm
Expand All @@ -25,7 +26,7 @@
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import (
from scribe_data.utils import ( # noqa: E402
check_and_return_command_line_args,
get_language_qid,
get_scribe_languages,
Expand All @@ -47,37 +48,37 @@
if languages is None:
languages = get_scribe_languages()

for l in tqdm(
for lang in tqdm(
languages,
desc="Data updated",
unit="languages",
):
print(f"Querying words for {l}...")
print(f"Querying words for {lang}...")
# First format the lines into a multi-line string and then pass this to SPARQLWrapper.
with open("query_words_to_translate.sparql", encoding="utf-8") as file:
query_lines = file.readlines()

query = "".join(query_lines).replace("LANGUAGE_QID", get_language_qid(l))
query = "".join(query_lines).replace("LANGUAGE_QID", get_language_qid(lang))
sparql.setQuery(query)

results = None
try:
results = sparql.query().convert()
except HTTPError as err:
print(f"HTTPError with query_words_to_translate.sparql for {l}: {err}")
except urllib.error.HTTPError as err:
print(f"HTTPError with query_words_to_translate.sparql for {lang}: {err}")

if results is None:
print(
f"Nothing returned by the WDQS server for query_words_to_translate.sparql for {l}"
f"Nothing returned by the WDQS server for query_words_to_translate.sparql for {lang}"
)

# Allow for a query to be reran up to two times.
if languages.count(l) < 3:
languages.append(l)
if languages.count(lang) < 3:
languages.append(lang)

else:
# Subset the returned JSON and the individual results before saving.
print(f"Success! Formatting {l} words...")
print(f"Success! Formatting {lang} words...")
query_results = results["results"]["bindings"]

results_formatted = []
Expand All @@ -87,11 +88,11 @@
results_formatted.append(r_dict)

with open(
f"{PATH_TO_ET_FILES}{l}/translations/words_to_translate.json",
f"{PATH_TO_ET_FILES}{lang}/translations/words_to_translate.json",
"w",
encoding="utf-8",
) as f:
json.dump(results_formatted, f, ensure_ascii=False, indent=0)
print(
f"Wrote the words to translate to {PATH_TO_ET_FILES}{l}/translations/words_to_translate.json"
f"Wrote the words to translate to {PATH_TO_ET_FILES}{lang}/translations/words_to_translate.json"
)
11 changes: 4 additions & 7 deletions src/scribe_data/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
"""
Update Utils
------------
Utility functions for data extraction, formatting and loading.
Contents:
Expand Down Expand Up @@ -37,7 +34,8 @@


def _load_json(package_path: str, file_name: str, root: str):
"""Loads a JSON resource from a package into a python entity.
"""
Loads a JSON resource from a package into a python entity.
Parameters
----------
Expand All @@ -54,8 +52,7 @@ def _load_json(package_path: str, file_name: str, root: str):
-------
A python entity starting at 'root'.
"""
# add 'Scribe-Data/src' to PYTHONPATH so that resources.files()
# can find 'package_path'
# Add 'Scribe-Data/src' to PYTHONPATH so that resources.files() can find 'package_path'.
parts = Path(__file__).resolve().parts
prj_root_idx = parts.index(PROJECT_ROOT)
package_root = str(Path(*parts[: prj_root_idx + 1], "src"))
Expand All @@ -71,7 +68,7 @@ def _load_json(package_path: str, file_name: str, root: str):


_languages = _load_json(
package_path="scribe_data.resources",
package_path="scribe_data/resources",
file_name="language_meta_data.json",
root="languages",
)
Expand Down

0 comments on commit d55bf2a

Please sign in to comment.