Skip to content

Commit b42a9ed

Browse files
authored
Playwright (#69)
* Playwight * cleanup * fix bandit issues
1 parent 8ac0050 commit b42a9ed

File tree

10 files changed

+182
-128
lines changed

10 files changed

+182
-128
lines changed

.github/workflows/cicd.yml

+3
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,9 @@ jobs:
202202
- name: Install Pytest
203203
run: pip install pytest pytest-mock
204204

205+
- name: Setup PlayWright
206+
run: playwright install && playwright install-deps
207+
205208
- name: Run Pytest
206209
run: pytest --no-header -vv
207210

.github/workflows/generate-release.yml

+3
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ jobs:
7676
- name: Install Pytest
7777
run: pip install pytest pytest-mock
7878

79+
- name: Setup PlayWright
80+
run: playwright install && playwright install-deps
81+
7982
- name: Run Pytest
8083
run: pytest --no-header -vv
8184

.github/workflows/generate-test-release.yml

+3
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ jobs:
7575
- name: Install Pytest
7676
run: pip install pytest pytest-mock
7777

78+
- name: Setup PlayWright
79+
run: playwright install && playwright install-deps
80+
7881
- name: Run Pytest
7982
run: pytest --no-header -vv
8083

CITATION.cff

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ message: If you use this software, please cite it using these metadata.
33
title: PyPi Extractor
44
abstract: Extract package information for a given user in PyPi.
55
type: software
6-
version: 0.1.2
7-
date-released: 2024-06-26
6+
version: 0.1.3
7+
date-released: 2024-12-12
88
repository-code: https://github.com/DevelopersToolbox/pypi-extractor-package
99
keywords:
1010
- "Wolf Software"

README.md

+27-1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,22 @@ PyPI Extractor is a Python package designed to fetch and process detailed inform
4444
Python Package Index (PyPI). This package is particularly useful for users who want to retrieve and analyze metadata for packages
4545
maintained by a specific PyPI user.
4646

47+
## Significant Update From 0.1.3
48+
49+
pypi.org no longer allow you to scrap details using the requests package, or any package that does not support JavaScript. To resolve this we have
50+
updated this package to utilise [PlayWright](https://pypi.org/project/playwright/) when retrieving a list of packages for a given user. While we have
51+
attempted to automate as much as possible you might want to do some of the work manually.
52+
53+
Playwright needs two commands to be run in order for it to function correctly:
54+
55+
```
56+
playwright install
57+
playwright install-deps
58+
```
59+
60+
We have added an `auto_install` option to the main class so that you can instruct the package to do the install for you, this helps when installing the
61+
package in a fully automated way, e.g. Puppet or similar.
62+
4763
## Features
4864

4965
- Retrieve a list of packages maintained by a specific PyPI user.
@@ -116,11 +132,13 @@ print(package_details)
116132

117133
A class to fetch and process package details for a given PyPI user.
118134

119-
##### `__init__(self, username: str)`
135+
##### `__init__(self, username: str, verbose: bool, auto_install: bool)`
120136

121137
- Initializes the `PyPiExtractor` with a username.
122138
- Parameters:
123139
- `username` (str): The PyPI username.
140+
- `verbose` (bool): Verbose output (Default: False)
141+
- `auto_install` (bool): Auto install PlayWright dependencies (Default: False)
124142
- Raises:
125143
- `PyPiExtractorError`: If the username is not provided.
126144

@@ -132,6 +150,14 @@ A class to fetch and process package details for a given PyPI user.
132150
- Raises:
133151
- `PyPiExtractorError`: If the username is not provided.
134152

153+
##### `enable_verbose(self)`
154+
155+
- Enable verbose mode.
156+
157+
##### `enable_auto_install(self)`
158+
159+
- Enable auto install.
160+
135161
##### `get_user_packages(self) -> list`
136162

137163
- Fetches the list of packages for the given PyPI user.

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
requests==2.32.3
22
beautifulsoup4==4.12.3
3+
playwright==1.49.1

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
setup(
1414
name='wolfsoftware.pypi-extractor',
15-
version='0.1.2',
15+
version='0.1.3',
1616
author='Wolf Software',
1717
author_email='[email protected]',
1818
description='Extract package information for a given user in PyPi.',

tests/testconf.py renamed to tests/conftest.py

+72-77
Original file line numberDiff line numberDiff line change
@@ -16,32 +16,56 @@
1616
import requests
1717

1818

19+
def raise_error(*args, **kwargs):
20+
"""Raise an error if the real playwright gets used."""
21+
raise RuntimeError("Real Playwright should not be invoked!")
22+
23+
1924
@pytest.fixture
20-
def mock_get_user_packages_success() -> Generator[Union[MagicMock, AsyncMock], Any, None]:
21-
"""Fixture to mock requests.get for get_user_packages success case."""
22-
with patch('requests.get') as mock_get:
23-
mock_response = Mock()
24-
mock_response.raise_for_status.return_value = None
25-
mock_response.text = '''
26-
<a class="package-snippet">
27-
<h3 class="package-snippet__title">Package1</h3>
28-
<p class="package-snippet__description">Description1</p>
29-
</a>
30-
<a class="package-snippet">
31-
<h3 class="package-snippet__title">Package2</h3>
32-
<p class="package-snippet__description">Description2</p>
33-
</a>
34-
'''
35-
mock_get.return_value = mock_response
36-
yield mock_get
25+
def mock_playwright() -> Generator[MagicMock, None, None]:
26+
"""Mock the Playwright sync API."""
27+
with patch('wolfsoftware.pypi_extractor.pypi.sync_playwright') as mock_sync_playwright:
28+
mock_playwright_instance = MagicMock()
29+
mock_browser = MagicMock()
30+
mock_context = MagicMock()
31+
mock_page = MagicMock()
32+
33+
# Mock page.goto() and page.wait_for_selector()
34+
mock_page.goto.return_value = None
35+
mock_page.wait_for_selector.return_value = None
36+
37+
# Mock page.query_selector_all() to return simulated package elements
38+
def mock_query_selector_all(selector):
39+
"""Handle mocking the right data."""
40+
if selector == 'a.package-snippet':
41+
return [
42+
MagicMock(query_selector=MagicMock(side_effect=[
43+
MagicMock(inner_text=MagicMock(return_value="Package1")),
44+
MagicMock(inner_text=MagicMock(return_value="Description1")),
45+
])),
46+
MagicMock(query_selector=MagicMock(side_effect=[
47+
MagicMock(inner_text=MagicMock(return_value="Package2")),
48+
MagicMock(inner_text=MagicMock(return_value="Description2")),
49+
])),
50+
]
51+
return []
52+
mock_page.query_selector_all.side_effect = mock_query_selector_all
53+
54+
mock_context.new_page.return_value = mock_page
55+
mock_browser.new_context.return_value = mock_context
56+
mock_playwright_instance.chromium.launch.return_value = mock_browser
57+
mock_sync_playwright.return_value.__enter__.return_value = mock_playwright_instance
58+
yield mock_sync_playwright
3759

3860

3961
@pytest.fixture
40-
def mock_get_user_packages_error() -> Generator[Union[MagicMock, AsyncMock], Any, None]:
41-
"""Fixture to mock requests.get for get_user_packages error case."""
42-
with patch('requests.get') as mock_get:
43-
mock_get.side_effect = requests.RequestException("Request error")
44-
yield mock_get
62+
def mock_playwright_error() -> Generator[MagicMock, None, None]:
63+
"""Fixture to mock Playwright with an error scenario."""
64+
with patch('wolfsoftware.pypi_extractor.pypi.sync_playwright') as mock_sync_playwright:
65+
mock_playwright_instance = MagicMock()
66+
mock_playwright_instance.chromium.launch.side_effect = Exception("Playwright error")
67+
mock_sync_playwright.return_value.__enter__.return_value = mock_playwright_instance
68+
yield mock_sync_playwright
4569

4670

4771
@pytest.fixture
@@ -155,24 +179,13 @@ def mock_get_package_details_error() -> Generator[Union[MagicMock, AsyncMock], A
155179

156180

157181
@pytest.fixture
158-
def mock_get_all_packages_details_success() -> Generator[Union[MagicMock, AsyncMock], Any, None]:
159-
"""Fixture to mock requests.get for get_all_packages_details success case."""
182+
def mock_get_all_packages_details_success() -> Generator[MagicMock, None, None]:
183+
"""Mock requests.get for get_all_packages_details success case."""
160184
with patch('requests.get') as mock_get:
161-
mock_response_user = Mock()
185+
# Mock response for the user packages API
186+
mock_response_user = MagicMock()
162187
mock_response_user.raise_for_status.return_value = None
163-
mock_response_user.text = '''
164-
<a class="package-snippet">
165-
<h3 class="package-snippet__title">Package1</h3>
166-
<p class="package-snippet__description">Description1</p>
167-
</a>
168-
<a class="package-snippet">
169-
<h3 class="package-snippet__title">Package2</h3>
170-
<p class="package-snippet__description">Description2</p>
171-
</a>
172-
'''
173-
mock_response_package1 = Mock()
174-
mock_response_package1.raise_for_status.return_value = None
175-
mock_response_package1.json.return_value = {
188+
mock_response_user.json.return_value = {
176189
'info': {
177190
'name': 'Package1',
178191
'version': '1.0.0',
@@ -186,37 +199,30 @@ def mock_get_all_packages_details_success() -> Generator[Union[MagicMock, AsyncM
186199
'requires_python': '>=3.6',
187200
},
188201
'releases': {
189-
'0.9.0': [
190-
{
191-
'upload_time': '2021-01-01T00:00:00',
192-
'upload_time_iso_8601': '2021-01-01T00:00:00Z',
193-
'python_version': 'py3',
194-
'url': 'https://example.com',
195-
'filename': 'package-0.9.0.tar.gz',
196-
'packagetype': 'sdist',
197-
'md5_digest': 'abc123',
198-
'digests': {'sha256': 'def456'},
199-
'size': 12345
200-
}
201-
],
202202
'1.0.0': [
203203
{
204204
'upload_time': '2021-06-01T00:00:00',
205205
'upload_time_iso_8601': '2021-06-01T00:00:00Z',
206206
'python_version': 'py3',
207-
'url': 'https://example.com',
207+
'url': 'https://example.com/package-1.0.0.tar.gz',
208208
'filename': 'package-1.0.0.tar.gz',
209209
'packagetype': 'sdist',
210-
'md5_digest': 'ghi789',
211-
'digests': {'sha256': 'jkl012'},
212-
'size': 23456
210+
'md5_digest': 'abc123',
211+
'digests': {'sha256': 'def456'},
212+
'size': 12345
213213
}
214-
],
214+
]
215215
},
216216
'requires_dist': ['requests', 'beautifulsoup4'],
217217
'urls': [{'url': 'https://example.com/package-1.0.0.tar.gz'}],
218218
}
219-
mock_response_package2 = Mock()
219+
220+
# Simulate two different package details responses
221+
mock_response_package1 = MagicMock()
222+
mock_response_package1.raise_for_status.return_value = None
223+
mock_response_package1.json.return_value = mock_response_user.json.return_value
224+
225+
mock_response_package2 = MagicMock()
220226
mock_response_package2.raise_for_status.return_value = None
221227
mock_response_package2.json.return_value = {
222228
'info': {
@@ -226,41 +232,30 @@ def mock_get_all_packages_details_success() -> Generator[Union[MagicMock, AsyncM
226232
'author': 'Author2',
227233
'author_email': '[email protected]',
228234
'license': 'MIT',
229-
'home_page': 'https://example.com',
230-
'keywords': 'example, package',
235+
'home_page': 'https://example.com/package2',
236+
'keywords': 'example, package2',
231237
'classifiers': ['Development Status :: 5 - Production/Stable'],
232238
'requires_python': '>=3.6',
233239
},
234240
'releases': {
235-
'1.0.0': [
236-
{
237-
'upload_time': '2021-01-01T00:00:00',
238-
'upload_time_iso_8601': '2021-01-01T00:00:00Z',
239-
'python_version': 'py3',
240-
'url': 'https://example.com',
241-
'filename': 'package-1.0.0.tar.gz',
242-
'packagetype': 'sdist',
243-
'md5_digest': 'abc123',
244-
'digests': {'sha256': 'def456'},
245-
'size': 12345
246-
}
247-
],
248241
'2.0.0': [
249242
{
250-
'upload_time': '2021-06-01T00:00:00',
251-
'upload_time_iso_8601': '2021-06-01T00:00:00Z',
243+
'upload_time': '2022-06-01T00:00:00',
244+
'upload_time_iso_8601': '2022-06-01T00:00:00Z',
252245
'python_version': 'py3',
253-
'url': 'https://example.com',
246+
'url': 'https://example.com/package-2.0.0.tar.gz',
254247
'filename': 'package-2.0.0.tar.gz',
255248
'packagetype': 'sdist',
256249
'md5_digest': 'ghi789',
257250
'digests': {'sha256': 'jkl012'},
258251
'size': 23456
259252
}
260-
],
253+
]
261254
},
262255
'requires_dist': ['requests', 'beautifulsoup4'],
263256
'urls': [{'url': 'https://example.com/package-2.0.0.tar.gz'}],
264257
}
265-
mock_get.side_effect = [mock_response_user, mock_response_package1, mock_response_package2]
258+
259+
# Simulate the sequence of requests
260+
mock_get.side_effect = [mock_response_package1, mock_response_package2]
266261
yield mock_get

0 commit comments

Comments
 (0)