Skip to content

Commit

Permalink
Add Poetry for dependency and packaging management (#32)
Browse files Browse the repository at this point in the history
  • Loading branch information
vdusek authored Jan 30, 2024
1 parent 2273aed commit f4e8abe
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 69 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ __pycache__
.envrc
.python-version

poetry.toml
poetry.lock

*.egg-info/
*.egg
dist/
Expand Down
37 changes: 36 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,41 @@ Docusaurus and published to GitHub pages.

## Release process

Publishing new versions to [PyPI](https://pypi.org/project/apify) happens automatically through GitHub Actions.
Currently, releases to [PyPI](https://pypi.org/project/crawlee) are published manually.
To release a new version, follow these steps:

At first, update the version number under `tool.poetry.version` in `pyproject.toml`.

```toml
[tool.poetry]
name = "crawlee"
version = "x.z.y"
```

Generate the distribution archives for the package using Poetry.

```shell
poetry build
```

Set up the PyPI API token for authentication. Replace `YOUR_API_TOKEN` with the actual PyPI token.

```shell
poetry config pypi-token.pypi YOUR_API_TOKEN
```

Upload the package to the PyPI.

```shell
poetry publish
```

<!--
TODO: update this section once publishing via CI is supported
Original text:
Publishing new versions to [PyPI](https://pypi.org/project/crawlee) happens automatically through GitHub Actions.
On each commit to the `master` branch, a new beta release is published, taking the version number from `pyproject.toml`
and automatically incrementing the beta version suffix by 1 from the last beta release published to PyPI.
Expand All @@ -77,6 +111,7 @@ If there is already a stable version with the same version number as in `pyproje
process fails, so don't forget to update the version number before releasing a new version. The release process also
fails when the released version is not described in `CHANGELOG.md`, so don't forget to describe the changes in
the new version there.
-->

### Beta release checklist

Expand Down
24 changes: 7 additions & 17 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,30 +1,23 @@
.PHONY: clean install-dev build publish twine-check lint unit-tests integration-tests type-check check-code format check-version-availability check-changelog-entry
.PHONY: clean install-dev lint type-check unit-tests unit-tests-cov integration-tests check-code format check-version-availability check-changelog-entry

DIRS_WITH_CODE = src tests scripts

# This is default for local testing, but GitHub workflows override it to a higher value in CI
INTEGRATION_TESTS_CONCURRENCY = 1

clean:
rm -rf build dist .mypy_cache .pytest_cache src/*.egg-info __pycache__
rm -rf .mypy_cache .pytest_cache .ruff_cache build dist htmlcov .coverage

install-dev:
python3 -m pip install --upgrade pip
pip install --no-cache-dir -e ".[dev]"
pre-commit install

build:
python3 -m build

publish:
python3 -m twine upload dist/*

twine-check:
python3 -m twine check dist/*
poetry install
source .venv/bin/activate && pre-commit install

lint:
python3 -m ruff check $(DIRS_WITH_CODE)

type-check:
python3 -m mypy $(DIRS_WITH_CODE)

unit-tests:
python3 -m pytest -n auto -ra tests/unit --cov=src/crawlee

Expand All @@ -34,9 +27,6 @@ unit-tests-cov:
integration-tests:
python3 -m pytest -n $(INTEGRATION_TESTS_CONCURRENCY) -ra tests/integration

type-check:
python3 -m mypy $(DIRS_WITH_CODE)

check-code: lint type-check unit-tests

format:
Expand Down
100 changes: 49 additions & 51 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
[project]
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "crawlee"
version = "0.0.1"
description = "Crawlee for Python"
authors = ["Apify Technologies s.r.o. <[email protected]>"]
license = "Apache-2.0"
readme = "README.md"
license = { text = "Apache Software License" }
authors = [{ name = "Apify Technologies s.r.o.", email = "[email protected]" }]
keywords = ["apify", "automation", "chrome", "crawler", "headless", "scraper", "scraping"]

packages = [{ include = "crawlee", from = "src" }]
classifiers = [
"Development Status :: 2 - Pre-Alpha",
"Intended Audience :: Developers",
Expand All @@ -18,56 +21,51 @@ classifiers = [
"Programming Language :: Python :: 3.12",
"Topic :: Software Development :: Libraries",
]

requires-python = ">=3.9"

# We use inclusive ordered comparison clause for non-Apify packages intentionally in order to enhance the Apify SDK's
# compatibility with a wide range of external packages. This decision was discussed in detail in the following PR:
# https://github.com/apify/apify-sdk-python/pull/154
dependencies = [
"colorama >= 0.4.6",
"more_itertools >= 10.2.0",
"pyee >= 11.1.0",
"typing-extensions >= 4.1.0",
]

[project.optional-dependencies]
dev = [
"build ~= 1.0.3",
"filelock ~= 3.13.1",
"mypy ~= 1.8.0",
"pre-commit ~= 3.4.0",
"pydoc-markdown ~= 4.8.2",
"pytest ~= 7.4.4",
"pytest-asyncio ~= 0.23.4",
"pytest-cov ~= 4.1.0",
"pytest-only ~= 2.0.0",
"pytest-timeout ~= 2.2.0",
"pytest-xdist ~= 3.5.0",
"respx ~= 0.20.1",
"ruff ~= 0.1.13",
"twine ~= 4.0.2",
"types-colorama ~= 0.4.15.20240106",
keywords = [
"apify",
"automation",
"chrome",
"crawlee",
"crawler",
"headless",
"scraper",
"scraping",
]

[project.urls]
[tool.poetry.urls]
"Homepage" = "https://todo.com/"
"Documentation" = "https://todo.com/"
"Source" = "https://github.com/apify/crawlee-py"
"Issue tracker" = "https://github.com/apify/crawlee-py/issues"
"Changelog" = "https://github.com/apify/crawlee-py/blob/master/CHANGELOG.md"
"Apify Homepage" = "https://apify.com"

[build-system]
requires = ["setuptools>=64.0.0", "wheel"]
build-backend = "setuptools.build_meta"

[tool.setuptools.packages.find]
where = ["src"]
include = ["crawlee*"]

[tool.setuptools.package-data]
crawlee = ["py.typed"]
"Changelog" = "https://github.com/apify/crawlee-py/blob/master/CHANGELOG.md"
"Documentation" = "https://todo.com/"
"Issue Tracker" = "https://github.com/apify/crawlee-py/issues"
"Repository" = "https://github.com/apify/crawlee-py"

# We use inclusive ordered comparison clauses for external packages intentionally in order to enhance Crawlee's
# compatibility with external packages. This decision was discussed in detail in the following PR:
# https://github.com/apify/apify-sdk-python/pull/154.
[tool.poetry.dependencies]
python = "^3.9"
colorama = "^0.4.6"
more_itertools = "^10.2.0"
pyee = "^11.1.0"
typing-extensions = "^4.1.0"

[tool.poetry.group.dev.dependencies]
build = "~1.0.3"
filelock = "~3.13.1"
mypy = "~1.8.0"
pre-commit = "~3.4.0"
pydoc-markdown = "~4.8.2"
pytest = "~7.4.4"
pytest-asyncio = "~0.23.4"
pytest-cov = "~4.1.0"
pytest-only = "~2.0.0"
pytest-timeout = "~2.2.0"
pytest-xdist = "~3.5.0"
respx = "~0.20.1"
ruff = "~0.1.13"
twine = "~4.0.2"
types-colorama = "~0.4.15.20240106"

[tool.ruff]
line-length = 120
Expand Down

0 comments on commit f4e8abe

Please sign in to comment.