diff --git a/README.md b/README.md
index 42d9bc5..c6b796e 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,8 @@
# WPextract - WordPress Site Extractor
+
+
+
**WPextract is a tool to create datasets from WordPress sites.**
- Archives posts, pages, tags, categories, media (including files), comments, and users
@@ -7,12 +10,10 @@
- Resolves internal links and media to IDs
- Automatically parses multilingual sites to create parallel datasets
-> [!NOTE]
-> This software was developed for our EMNLP 2023 paper [_Analysing State-Backed Propaganda Websites: a New Dataset and Linguistic Study_](https://aclanthology.org/2023.emnlp-main.349/). The code has been updated since the paper was written; for archival purposes, the precise version used for the study is [available on Zenodo](https://zenodo.org/records/10008086).
## Quickstart
-See the [complete documentation](#) for more detailed usage.
+See the [complete documentation](https://gatenlp.github.io/wordpress-site-extractor/) for more detailed usage.
1. Install with `pipx`
```shell-session
@@ -37,6 +38,9 @@ Available under the Apache 2.0 license. See [LICENSE](LICENSE) for more informat
## Citing
+> [!NOTE]
+> This software was developed for our EMNLP 2023 paper [_Analysing State-Backed Propaganda Websites: a New Dataset and Linguistic Study_](https://aclanthology.org/2023.emnlp-main.349/). The code has been updated since the paper was written; for archival purposes, the precise version used for the study is [available on Zenodo](https://zenodo.org/records/10008086).
+
We'd love to hear about your use of our tool, you can [email us](mailto:frheppell1@sheffield.ac.uk) to let us know! Feel free to create issues and/or pull requests for new features or bugs.
If you use this tool in published work, please cite [our EMNLP paper](https://aclanthology.org/2023.emnlp-main.349/):
diff --git a/poetry.lock b/poetry.lock
index c107442..998c4bd 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1671,13 +1671,13 @@ reference = "pypi-public"
[[package]]
name = "setuptools"
-version = "70.2.0"
+version = "70.3.0"
description = "Easily download, build, install, upgrade, and uninstall Python packages"
optional = false
python-versions = ">=3.8"
files = [
- {file = "setuptools-70.2.0-py3-none-any.whl", hash = "sha256:b8b8060bb426838fbe942479c90296ce976249451118ef566a5a0b7d8b78fb05"},
- {file = "setuptools-70.2.0.tar.gz", hash = "sha256:bd63e505105011b25c3c11f753f7e3b8465ea739efddaccef8f0efac2137bac1"},
+ {file = "setuptools-70.3.0-py3-none-any.whl", hash = "sha256:fe384da74336c398e0d956d1cae0669bc02eed936cdb1d49b57de1990dc11ffc"},
+ {file = "setuptools-70.3.0.tar.gz", hash = "sha256:f171bab1dfbc86b132997f26a119f6056a57950d058587841a0082e8830f9dc5"},
]
[package.extras]
@@ -1888,4 +1888,4 @@ reference = "pypi-public"
[metadata]
lock-version = "2.0"
python-versions = ">=3.9.0,<3.13"
-content-hash = "ace72c716a8e50e5d331cc0e619e0574561ffe2aed7f4dc427f8ce36139d9df7"
+content-hash = "4839f696fac9b937ce620d57bf8345bd9c0a57d2890a03d308b5544d759a7d6e"
diff --git a/pyproject.toml b/pyproject.toml
index 38fa7ca..223cf59 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,13 +1,15 @@
[tool.poetry]
name="wpextract"
-version="0.0.1"
+version="1.0.0a0"
description="Create a dataset from the WordPress API"
authors=["Freddy Heppell "]
packages=[
{ include = "extractor", from = "src"}
]
+homepage="https://gatenlp.github.io/wordpress-site-extractor/"
repository="https://github.com/GateNLP/wordpress-site-extractor"
license="Apache-2.0"
+readme = "README.md"
[tool.poetry.scripts]
wpextract = "extractor.cli.cli:main"
@@ -19,13 +21,13 @@ url = "https://pypi.org/simple/"
[tool.poetry.dependencies]
python = ">=3.9.0,<3.13"
-beautifulsoup4 = ">=4.12.3"
-langcodes = ">=3.4.0"
-lxml = ">=5.2.2"
-numpy = ">=1.26.4"
-pandas = ">=2.2.2"
-tqdm = ">=4.66.4"
-requests = "^2.32.3"
+beautifulsoup4 = ">=4.12.0"
+langcodes = ">=3.3.0"
+lxml = ">=5.0.0"
+numpy = ">=1.23.0"
+pandas = ">=1.5.2"
+tqdm = ">=4.65.0"
+requests = ">=2.32.3"
[tool.poetry.group.dev.dependencies]
build = "==0.9.*,>=0.9.0"
@@ -35,6 +37,8 @@ pytest-mock = "~3.14.0"
ruff = "^0.5.0"
+[tool.poetry.group.docs]
+optional = true
[tool.poetry.group.docs.dependencies]
mkdocs-material = "^9.5.28"
mkdocstrings = "^0.25.1"