-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
more tests for download/parse/extract
- Loading branch information
1 parent
a53b9d9
commit 0610aa7
Showing
29 changed files
with
470 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
[] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import logging | ||
|
||
import pytest | ||
from bs4 import BeautifulSoup | ||
from wpextract.parse.translations import extract_translations | ||
from wpextract.parse.translations._pickers import PolylangCustomDropdown, PolylangWidget | ||
|
||
|
||
class FaultyDummyPicker(PolylangWidget): | ||
def extract(self) -> None: | ||
raise self._build_extraction_fail_err(".dummy") | ||
|
||
|
||
@pytest.fixture() | ||
def parsed_page(shared_datadir): | ||
soup = BeautifulSoup((shared_datadir / "polylang_widget.html").read_text(), "lxml") | ||
return soup | ||
|
||
|
||
def test_extract_translations(parsed_page): | ||
res = extract_translations( | ||
parsed_page, "https://example.org/current-lang-page/", translation_pickers=None | ||
) | ||
|
||
assert str(res.iloc[0]) == "en-US" | ||
assert len(res.iloc[1]) == 1 | ||
|
||
|
||
def test_none_matching(caplog, parsed_page): | ||
with caplog.at_level(logging.DEBUG): | ||
res = extract_translations( | ||
parsed_page, | ||
"https://example.org/current-lang-page/", | ||
translation_pickers=[PolylangCustomDropdown], | ||
) | ||
assert res.iloc[0] is None | ||
assert res.iloc[1] == [] | ||
|
||
assert "No translation pickers matched" in caplog.text | ||
|
||
|
||
def test_error_extracting(caplog, parsed_page): | ||
res = extract_translations( | ||
parsed_page, | ||
"https://example.org/current-lang-page/", | ||
translation_pickers=[FaultyDummyPicker], | ||
) | ||
|
||
assert res.iloc[0] is None | ||
assert "but failed to select element with: .dummy" in caplog.text |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
from typing import Union | ||
|
||
import pytest | ||
from langcodes import Language | ||
from wpextract.parse.translations import TranslationLink | ||
|
||
|
||
@pytest.mark.parametrize( | ||
("input_lang", "expected_language"), | ||
[ | ||
("en", "en"), | ||
("en-GB", "en-GB"), | ||
("fr-FR", "fr-FR"), | ||
(Language.get("en-GB"), "en-GB"), | ||
("zho", "zh"), | ||
], | ||
) | ||
def test_translation_link_lang( | ||
input_lang: Union[str, Language], expected_language: str | ||
): | ||
link = TranslationLink(text=None, href=None, destination=None, lang=input_lang) | ||
|
||
assert str(link.language) == expected_language |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"found": {}, "failed": [], "version": 1} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"found": {"fr/an-example-post-translation/index.html": "https://example.org/fr/an-example-post-translation/", "an-example-post/index.html": "https://example.org/an-example-post/", "an-unrelated-post/index.html": "https://example.org/an-unrelated-post/"}, "failed": ["no-self-url.html"], "version": 2} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
<head> | ||
<meta charset="UTF-8" /> | ||
<meta | ||
name="viewport" | ||
content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0" | ||
/> | ||
<meta http-equiv="X-UA-Compatible" content="ie=edge" /> | ||
<title>An Example Post - example.org</title> | ||
<link rel="canonical" href="https://example.org/an-example-post/" /> | ||
<meta name="post_id_for_mock" content="1" /> | ||
</head> | ||
<body> | ||
<p>This is an example post</p> | ||
<p>It has two paragraphs.</p> | ||
<img src="https://example.org/wp-content/uploads/2022/12/test-image.jpg" alt="an image"> | ||
</body> | ||
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
<head> | ||
<meta charset="UTF-8" /> | ||
<meta | ||
name="viewport" | ||
content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0" | ||
/> | ||
<meta http-equiv="X-UA-Compatible" content="ie=edge" /> | ||
<title>An Unrelated Post - example.org</title> | ||
<link rel="canonical" href="https://example.org/an-unrelated-post/" /> | ||
<meta name="post_id_for_mock" content="3" /> | ||
</head> | ||
<body> | ||
<p>This is an unrelated post.</p> | ||
<p>It has two paragraphs.</p> | ||
</body> | ||
</html> |
21 changes: 21 additions & 0 deletions
21
tests/scrape/data/scrape/fr/an-example-post-translation/index.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
<head> | ||
<meta charset="UTF-8" /> | ||
<meta | ||
name="viewport" | ||
content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0" | ||
/> | ||
<meta http-equiv="X-UA-Compatible" content="ie=edge" /> | ||
<title>An Example Post Translation - example.org</title> | ||
<link | ||
rel="canonical" | ||
href="https://example.org/fr/an-example-post-translation/" | ||
/> | ||
<meta name="post_id_for_mock" content="2" /> | ||
</head> | ||
<body> | ||
<p>This is a translation of the post.</p> | ||
<p>It has two paragraphs.</p> | ||
</body> | ||
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
<!doctype html> | ||
<html> | ||
<head> | ||
<title>Test Document</title> | ||
</head> | ||
<body> | ||
<h1>test document</h1> | ||
</body> | ||
</html> |
Oops, something went wrong.