From 69e5df87789b7adc25968703461d7c08790f8773 Mon Sep 17 00:00:00 2001 From: Jan-Christoph Klie Date: Thu, 21 Apr 2022 10:10:49 +0200 Subject: [PATCH 1/7] No issue - Fix test It broke because the Bavarian Wiki got updated. --- tests/test_mapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_mapper.py b/tests/test_mapper.py index f5de261..205c643 100644 --- a/tests/test_mapper.py +++ b/tests/test_mapper.py @@ -12,7 +12,7 @@ pytest.param("Quadrátkilometa", "Q25343", id="Has redirect"), pytest.param("D'_boarische_Woocha", "Q20616808", id="Has special character"), pytest.param("I am not in the Wiki", None, id="Title not in the wiki"), - pytest.param("Pergentinus_und_Laurentinus", None, id="In the index, but not mapped"), + pytest.param("tungsten", None, id="In the index, but not mapped"), ] From 1d042151796d55e3bfc8efcd8285a528f97485b6 Mon Sep 17 00:00:00 2001 From: Jan-Christoph Klie Date: Thu, 21 Apr 2022 10:14:56 +0200 Subject: [PATCH 2/7] #7 - Avoiding Overflow Error when Building Index on Windows On Windows, csv maxsize is 32bit, on Linux, it is more often than not 64bit. Therefore, we use a hack to determine the correct size. --- wikimapper/processor.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/wikimapper/processor.py b/wikimapper/processor.py index 024ddd0..b1f5179 100644 --- a/wikimapper/processor.py +++ b/wikimapper/processor.py @@ -7,7 +7,7 @@ import logging import os import sqlite3 -import sys +import ctypes as ct _logger = logging.getLogger(__name__) @@ -114,7 +114,8 @@ def create_index(dumpname: str, path_to_dumps: str, path_to_db: str = None) -> s page_props_dump = os.path.join(path_to_dumps, dumpname + "-page_props.sql.gz") redirects_dump = os.path.join(path_to_dumps, dumpname + "-redirect.sql.gz") - csv.field_size_limit(sys.maxsize) + # https://stackoverflow.com/a/54517228 + csv.field_size_limit(int(ct.c_ulong(-1).value // 2)) # (Re)Create the database file try: From 552d8f5d6b2dc1b892c31b392a31317b5df267ae Mon Sep 17 00:00:00 2001 From: Jan-Christoph Klie Date: Thu, 21 Apr 2022 10:17:02 +0200 Subject: [PATCH 3/7] No issue - Move fixtures to conftest We then do not need to import it, as pytest does that magically for us. --- tests/{fixtures.py => conftest.py} | 0 tests/test_mapper.py | 1 - tests/test_processor.py | 1 - 3 files changed, 2 deletions(-) rename tests/{fixtures.py => conftest.py} (100%) diff --git a/tests/fixtures.py b/tests/conftest.py similarity index 100% rename from tests/fixtures.py rename to tests/conftest.py diff --git a/tests/test_mapper.py b/tests/test_mapper.py index 205c643..18f6693 100644 --- a/tests/test_mapper.py +++ b/tests/test_mapper.py @@ -1,6 +1,5 @@ import pytest -from tests.fixtures import * BAVARIAN_PARAMS = [ pytest.param("Stoaboog", "Q168327"), diff --git a/tests/test_processor.py b/tests/test_processor.py index 30109be..6071b37 100644 --- a/tests/test_processor.py +++ b/tests/test_processor.py @@ -3,7 +3,6 @@ from wikimapper import create_index -from tests.fixtures import * def test_create_index(tmpdir, bavarian_wiki_dump): From 1ea7debb90550ce319cc3da6cb45222c7532453c Mon Sep 17 00:00:00 2001 From: Jan-Christoph Klie Date: Thu, 21 Apr 2022 10:19:19 +0200 Subject: [PATCH 4/7] No issue - Format files and isort --- Makefile | 5 +++++ setup.py | 1 + tests/conftest.py | 6 ++---- tests/test_mapper.py | 1 - tests/test_processor.py | 1 - wikimapper/cli.py | 6 +++--- wikimapper/download.py | 4 ++-- wikimapper/mapper.py | 8 ++++---- wikimapper/processor.py | 4 ++-- 9 files changed, 19 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index ea328d1..eefab82 100644 --- a/Makefile +++ b/Makefile @@ -5,5 +5,10 @@ black: black -l 100 wikimapper/ black -l 100 tests/ +isort: + isort --profile black wikimapper/ tests/ + +format: black isort + html: cd docs && make html \ No newline at end of file diff --git a/setup.py b/setup.py index ad26501..993fe8a 100644 --- a/setup.py +++ b/setup.py @@ -30,6 +30,7 @@ dev_dependencies = [ "black", + "isort", "twine", "pygments", "wheel" diff --git a/tests/conftest.py b/tests/conftest.py index 4c2ce38..0a08a3a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,17 +1,15 @@ from collections import namedtuple -import os import pytest -from wikimapper import download_wikidumps, create_index, WikiMapper - +from wikimapper import WikiMapper, create_index, download_wikidumps Wiki = namedtuple("Wiki", ["dumpname", "path"]) @pytest.fixture(scope="package") def bavarian_wiki_dump(tmpdir_factory) -> Wiki: - """ We download the Bavarian Wiki, as it is quite small. """ + """We download the Bavarian Wiki, as it is quite small.""" dumpname = "barwiki-latest" path = tmpdir_factory.mktemp("dumps").strpath diff --git a/tests/test_mapper.py b/tests/test_mapper.py index 18f6693..866d0f3 100644 --- a/tests/test_mapper.py +++ b/tests/test_mapper.py @@ -1,6 +1,5 @@ import pytest - BAVARIAN_PARAMS = [ pytest.param("Stoaboog", "Q168327"), pytest.param("Wechslkrod", "Q243242"), diff --git a/tests/test_processor.py b/tests/test_processor.py index 6071b37..aaa5274 100644 --- a/tests/test_processor.py +++ b/tests/test_processor.py @@ -4,7 +4,6 @@ from wikimapper import create_index - def test_create_index(tmpdir, bavarian_wiki_dump): path_to_db = tmpdir.mkdir("processor").join("index_test.db").strpath diff --git a/wikimapper/cli.py b/wikimapper/cli.py index 1c43d06..6624b76 100644 --- a/wikimapper/cli.py +++ b/wikimapper/cli.py @@ -2,8 +2,8 @@ import logging import os +from wikimapper import WikiMapper, create_index, download_wikidumps from wikimapper.__version__ import __version__ -from wikimapper import download_wikidumps, create_index, WikiMapper def main(): @@ -126,7 +126,7 @@ def main(): def _dir_path(path) -> str: - """ Checks whether `path` is a valid path to a directory. """ + """Checks whether `path` is a valid path to a directory.""" if os.path.isdir(path): return path else: @@ -134,7 +134,7 @@ def _dir_path(path) -> str: def _dump_name(name) -> str: - """ Checks whether `name` is a valid Wikipedia dump name. """ + """Checks whether `name` is a valid Wikipedia dump name.""" parts = name.split("-") err = lambda: argparse.ArgumentTypeError(f"dumpname: [{name}] is not a valid dump name") diff --git a/wikimapper/download.py b/wikimapper/download.py index b9b5405..28766fb 100644 --- a/wikimapper/download.py +++ b/wikimapper/download.py @@ -14,7 +14,7 @@ def _report_hook(count: int, block_size: int, total_size: int): def _download_file(url: str, target: str, overwrite: bool): - """ Downloads the content identified by `url` and saves it in `target`.""" + """Downloads the content identified by `url` and saves it in `target`.""" if os.path.exists(target) and not overwrite: _logger.info("[%s] already exists, skipping downloading [%s]!", target, url) return @@ -29,7 +29,7 @@ def _download_file(url: str, target: str, overwrite: bool): def download_wikidumps( dumpname: str, path: str, mirror: str = "https://dumps.wikimedia.org/", overwrite: bool = False ): - """ Downloads pages, page props and redirect SQL dumps for the dump + """Downloads pages, page props and redirect SQL dumps for the dump specified by `dumpname` to the folder `path`. If `overwrite` is true, then it is downloaded again even if the files already exist. diff --git a/wikimapper/mapper.py b/wikimapper/mapper.py index 3572d09..ae296e5 100644 --- a/wikimapper/mapper.py +++ b/wikimapper/mapper.py @@ -3,13 +3,13 @@ class WikiMapper: - """ Uses a precomputed database created by `create_wikipedia_wikidata_mapping_db`. """ + """Uses a precomputed database created by `create_wikipedia_wikidata_mapping_db`.""" def __init__(self, path_to_db: str): self._path_to_db = path_to_db def title_to_id(self, page_title: str) -> Optional[str]: - """ Given a Wikipedia page title, returns the corresponding Wikidata ID. + """Given a Wikipedia page title, returns the corresponding Wikidata ID. The page title is the last part of a Wikipedia url **unescaped** and spaces replaced by underscores , e.g. for `https://en.wikipedia.org/wiki/Fermat%27s_Last_Theorem`, @@ -35,7 +35,7 @@ def title_to_id(self, page_title: str) -> Optional[str]: return None def url_to_id(self, wiki_url: str) -> Optional[str]: - """ Given an URL to a Wikipedia page, returns the corresponding Wikidata ID. + """Given an URL to a Wikipedia page, returns the corresponding Wikidata ID. This is just a convenience function. It is not checked whether the index and URL are from the same dump. @@ -53,7 +53,7 @@ def url_to_id(self, wiki_url: str) -> Optional[str]: return self.title_to_id(title) def id_to_titles(self, wikidata_id: str) -> List[str]: - """ Given a Wikidata ID, return a list of corresponding pages that are linked to it. + """Given a Wikidata ID, return a list of corresponding pages that are linked to it. Due to redirects, the mapping from Wikidata ID to Wikipedia title is not unique. diff --git a/wikimapper/processor.py b/wikimapper/processor.py index b1f5179..e7dee2b 100644 --- a/wikimapper/processor.py +++ b/wikimapper/processor.py @@ -3,11 +3,11 @@ """ import csv +import ctypes as ct import gzip import logging import os import sqlite3 -import ctypes as ct _logger = logging.getLogger(__name__) @@ -91,7 +91,7 @@ def _parse_values(values): def create_index(dumpname: str, path_to_dumps: str, path_to_db: str = None) -> str: - """ Creates an index mapping Wikipedia page titles to Wikidata IDs and vice versa. + """Creates an index mapping Wikipedia page titles to Wikidata IDs and vice versa. This requires a previously downloaded dump `dumpname` in `path_to_dumps`. Args: From 8aa10478bc79c2e37320cd76ffa33cedaeace189 Mon Sep 17 00:00:00 2001 From: Jan-Christoph Klie Date: Thu, 21 Apr 2022 10:26:46 +0200 Subject: [PATCH 5/7] No issue - Remove travis, add Github actions --- .coveragerc | 7 ------- .github/workflows/run_tests.yml | 29 +++++++++++++++++++++++++++++ .travis.yml | 14 -------------- tox.ini | 9 --------- 4 files changed, 29 insertions(+), 30 deletions(-) delete mode 100644 .coveragerc create mode 100644 .github/workflows/run_tests.yml delete mode 100644 .travis.yml delete mode 100644 tox.ini diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 5f23af4..0000000 --- a/.coveragerc +++ /dev/null @@ -1,7 +0,0 @@ -[run] -source=wikimapper -omit= - setup.py - wikimapper/__init__.py - wikimapper/__version__.py - wikimapper/cli.py \ No newline at end of file diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml new file mode 100644 index 0000000..4eedc0e --- /dev/null +++ b/.github/workflows/run_tests.yml @@ -0,0 +1,29 @@ +name: Run Tests + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, windows-latest] + python-version: [3.7, 3.8, 3.9, 3.10] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + pip install --upgrade -e .[test] + - name: Run tests + run: | + pytest diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 58a0c1b..0000000 --- a/.travis.yml +++ /dev/null @@ -1,14 +0,0 @@ -language: python - -python: - - "3.5" - - "3.6" - - "3.7-dev" - -install: - - pip install --upgrade -e .[test] -script: - - pytest --cov=./ - -after_success: - - codecov \ No newline at end of file diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 809383f..0000000 --- a/tox.ini +++ /dev/null @@ -1,9 +0,0 @@ -[tox] -skipsdist = True -skip_missing_interpreters = True -envlist = py35, py36, py37, py38dev - -[testenv] -whitelist_externals=make -commands = make test -deps = .[test] \ No newline at end of file From 7f2648899d7a5fd04f073eebf08271c2ab0b86af Mon Sep 17 00:00:00 2001 From: Jan-Christoph Klie Date: Thu, 21 Apr 2022 10:28:12 +0200 Subject: [PATCH 6/7] No issue - Remove travis, add Github actions --- .github/workflows/run_tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 4eedc0e..7cb3c3e 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -2,9 +2,9 @@ name: Run Tests on: push: - branches: [ main ] + branches: [ master ] pull_request: - branches: [ main ] + branches: [ master ] jobs: build: From 056e73bd358ebbb781a6eecaa15f37516c0cc2fe Mon Sep 17 00:00:00 2001 From: Jan-Christoph Klie Date: Thu, 21 Apr 2022 10:29:50 +0200 Subject: [PATCH 7/7] No issue - Remove unneeded dependencies --- .github/workflows/run_tests.yml | 2 +- setup.py | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 7cb3c3e..1eeaa02 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -13,7 +13,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, windows-latest] - python-version: [3.7, 3.8, 3.9, 3.10] + python-version: [3.7, 3.8, 3.9] steps: - uses: actions/checkout@v2 diff --git a/setup.py b/setup.py index 993fe8a..946daef 100644 --- a/setup.py +++ b/setup.py @@ -22,10 +22,7 @@ install_requires=[] test_dependencies = [ - "tox", "pytest", - "codecov", - "pytest-cov", ] dev_dependencies = [