diff --git a/README.md b/README.md index 42d9bc5..c6b796e 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,8 @@ # WPextract - WordPress Site Extractor +PyPI - Version +Conda Version + **WPextract is a tool to create datasets from WordPress sites.** - Archives posts, pages, tags, categories, media (including files), comments, and users @@ -7,12 +10,10 @@ - Resolves internal links and media to IDs - Automatically parses multilingual sites to create parallel datasets -> [!NOTE] -> This software was developed for our EMNLP 2023 paper [_Analysing State-Backed Propaganda Websites: a New Dataset and Linguistic Study_](https://aclanthology.org/2023.emnlp-main.349/). The code has been updated since the paper was written; for archival purposes, the precise version used for the study is [available on Zenodo](https://zenodo.org/records/10008086). ## Quickstart -See the [complete documentation](#) for more detailed usage. +See the [complete documentation](https://gatenlp.github.io/wordpress-site-extractor/) for more detailed usage. 1. Install with `pipx` ```shell-session @@ -37,6 +38,9 @@ Available under the Apache 2.0 license. See [LICENSE](LICENSE) for more informat ## Citing +> [!NOTE] +> This software was developed for our EMNLP 2023 paper [_Analysing State-Backed Propaganda Websites: a New Dataset and Linguistic Study_](https://aclanthology.org/2023.emnlp-main.349/). The code has been updated since the paper was written; for archival purposes, the precise version used for the study is [available on Zenodo](https://zenodo.org/records/10008086). + We'd love to hear about your use of our tool, you can [email us](mailto:frheppell1@sheffield.ac.uk) to let us know! Feel free to create issues and/or pull requests for new features or bugs. If you use this tool in published work, please cite [our EMNLP paper](https://aclanthology.org/2023.emnlp-main.349/): diff --git a/poetry.lock b/poetry.lock index c107442..998c4bd 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1671,13 +1671,13 @@ reference = "pypi-public" [[package]] name = "setuptools" -version = "70.2.0" +version = "70.3.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "setuptools-70.2.0-py3-none-any.whl", hash = "sha256:b8b8060bb426838fbe942479c90296ce976249451118ef566a5a0b7d8b78fb05"}, - {file = "setuptools-70.2.0.tar.gz", hash = "sha256:bd63e505105011b25c3c11f753f7e3b8465ea739efddaccef8f0efac2137bac1"}, + {file = "setuptools-70.3.0-py3-none-any.whl", hash = "sha256:fe384da74336c398e0d956d1cae0669bc02eed936cdb1d49b57de1990dc11ffc"}, + {file = "setuptools-70.3.0.tar.gz", hash = "sha256:f171bab1dfbc86b132997f26a119f6056a57950d058587841a0082e8830f9dc5"}, ] [package.extras] @@ -1888,4 +1888,4 @@ reference = "pypi-public" [metadata] lock-version = "2.0" python-versions = ">=3.9.0,<3.13" -content-hash = "ace72c716a8e50e5d331cc0e619e0574561ffe2aed7f4dc427f8ce36139d9df7" +content-hash = "4839f696fac9b937ce620d57bf8345bd9c0a57d2890a03d308b5544d759a7d6e" diff --git a/pyproject.toml b/pyproject.toml index 38fa7ca..223cf59 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,13 +1,15 @@ [tool.poetry] name="wpextract" -version="0.0.1" +version="1.0.0a0" description="Create a dataset from the WordPress API" authors=["Freddy Heppell "] packages=[ { include = "extractor", from = "src"} ] +homepage="https://gatenlp.github.io/wordpress-site-extractor/" repository="https://github.com/GateNLP/wordpress-site-extractor" license="Apache-2.0" +readme = "README.md" [tool.poetry.scripts] wpextract = "extractor.cli.cli:main" @@ -19,13 +21,13 @@ url = "https://pypi.org/simple/" [tool.poetry.dependencies] python = ">=3.9.0,<3.13" -beautifulsoup4 = ">=4.12.3" -langcodes = ">=3.4.0" -lxml = ">=5.2.2" -numpy = ">=1.26.4" -pandas = ">=2.2.2" -tqdm = ">=4.66.4" -requests = "^2.32.3" +beautifulsoup4 = ">=4.12.0" +langcodes = ">=3.3.0" +lxml = ">=5.0.0" +numpy = ">=1.23.0" +pandas = ">=1.5.2" +tqdm = ">=4.65.0" +requests = ">=2.32.3" [tool.poetry.group.dev.dependencies] build = "==0.9.*,>=0.9.0" @@ -35,6 +37,8 @@ pytest-mock = "~3.14.0" ruff = "^0.5.0" +[tool.poetry.group.docs] +optional = true [tool.poetry.group.docs.dependencies] mkdocs-material = "^9.5.28" mkdocstrings = "^0.25.1"