diff --git a/.env.template b/.env.template index a260b2c..e417cd6 100644 --- a/.env.template +++ b/.env.template @@ -9,15 +9,9 @@ GOOGLE_PLACES_API_KEY= OPEN_AI_API_KEY= -DB_USER= -DB_PASSWORD= -DB_CONNECTION= - -FACEBOOK_APP_ID= -FACEBOOK_APP_SECRET= -OPEN_AI_API_KEY= - +# Need to be set when 'DATABASE_TYPE' is 'S3' AWS_ACCESS_KEY_ID= AWS_SECRET_ACCESS_KEY= +# Choose between 'Local' and 'S3' DATABASE_TYPE= diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml new file mode 100644 index 0000000..ab325af --- /dev/null +++ b/.github/workflows/documentation.yml @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2023 Berkay Bozkurt + +name: documentation + +on: [push, pull_request, workflow_dispatch] + +permissions: + contents: write + +jobs: + docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.10 + uses: actions/setup-python@v4 + with: + python-version: "3.10" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pipenv + # if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pipenv install --dev + - name: Generate Sphinx + run: | + cd src/docs + pipenv run sphinx-apidoc -o . .. + pipenv run make clean + pipenv run make html + - name: Deploy to GitHub Pages + uses: peaceiris/actions-gh-pages@v3 + if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} + with: + publish_branch: gh-pages + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: src/docs/_build/html/ + force_orphan: true diff --git a/.gitignore b/.gitignore index 202c3dc..2662d39 100644 --- a/.gitignore +++ b/.gitignore @@ -53,9 +53,17 @@ bin/ !**/data/merged_geo.geojson **/data/reviews/*.json **/data/gpt-results/*.json -**/data/models/* +**/data/models/*.pkl +**/data/models/*.joblib **/data/classification_reports/* +**/docs/* +!**/docs/conf.py +!**/docs/index.rst +!**/docs/make.bat +!**/docs/Makefile +!**/docs/readme_link.md + # Env files *.env @@ -70,3 +78,6 @@ report.pdf **/cache/* !.gitkeep + +# testing +.coverage diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 328ddbe..0000000 --- a/Dockerfile +++ /dev/null @@ -1,16 +0,0 @@ -# SPDX-License-Identifier: MIT -# SPDX-FileCopyrightText: 2023 Felix Zailskas - -FROM python:3.10-slim - -WORKDIR /app - -ADD Pipfile . -RUN pip install pipenv -RUN pipenv install - -ADD src . -ADD .env . - -ENTRYPOINT [ "pipenv", "run" ] -CMD [ "python", "main.py" ] diff --git a/Documentation/SBOM_generator.md b/Documentation/SBOM_generator.md new file mode 100644 index 0000000..65d5d4b --- /dev/null +++ b/Documentation/SBOM_generator.md @@ -0,0 +1,61 @@ +# Automatic SBOM generation + +```console +pipenv install +pipenv shell + +pip install pipreqs +pip install cyclonedx-bom +pip install pip-licenses + +# Create the SBOM (cyclonedx-bom) based on (pipreqs) requirements that are actually imported in the .py files + +$sbom = pipreqs --print | cyclonedx-py -r -pb -o - -i - + +# Create an XmlDocument object +$xml = New-Object System.Xml.XmlDocument + +# Load XML content into the XmlDocument +$xml.LoadXml($sbom) + + +# Create an empty CSV file +$csvPath = "SBOM.csv" + +# Initialize an empty array to store rows +$result = @() + +# Iterate through the XML nodes and create rows for each node +$xml.SelectNodes("//*[local-name()='component']") | ForEach-Object { + + $row = @{ + "Version" = $_.Version + "Context" = $_.Purl + "Name" = if ($_.Name -eq 'scikit_learn') { 'scikit-learn' } else { $_.Name } + } + + # Get license information + $match = pip-licenses --from=mixed --format=csv --with-system --packages $row.Name | ConvertFrom-Csv + + # Add license information to the row + $result += [PSCustomObject]@{ + "Context" = $row.Context + "Name" = $row.Name + "Version" = $row.Version + "License" = $match.License + } +} + +# Export the data to the CSV file +$result | Export-Csv -Path $csvPath -NoTypeInformation + +# Create the license file +$licensePath = $csvPath + '.license' +@" +SPDX-License-Identifier: CC-BY-4.0 +SPDX-FileCopyrightText: 2023 Fabian-Paul Utech +"@ | Out-File -FilePath $licensePath + +exit + +``` diff --git a/Documentation/ideas.md b/Documentation/ideas.md new file mode 100644 index 0000000..c9eb7bc --- /dev/null +++ b/Documentation/ideas.md @@ -0,0 +1,41 @@ + + +# Unused Ideas + +This document lists ideas and implementations which have either not been tried yet or have been deprecated as they are not used in the current product version but still carry some conceptual value. + +## Deprecated + +The original implementation of the deprecated modules can be found in the `deprecated/` directory. + +### Controller + +**_Note:_** This package has the additional dependency `pydantic==2.4.2` + +The controller module was originally planned to be used as a communication device between EVP and BDC. Whenever the salesperson interface would register a new lead the controller is supposed to trigger the BDC pipeline to enrich the data of that lead and preprocess it to create a feature vector. The successful completion of the BDC pipeline is then registered at the controller which will then trigger an inference of the EVP to compute the predicted merchant size and write this back to the lead data. The computed merchant size can then be used to rank the leads and allow the salesperson to decide the value of the leads and which one to call. + +The current implementation of the module supports queueing messages from the BDC and EVP as indicated by their type. Depending on the message type the message is then routed to the corresponding module (EVP or BDC). The actual processing of the messages by the modules is not implemented. All of this is done asynchronously by using the python threading library. + +### FacebookGraphAPI + +**_Note:_** This package has the additional dependency `facebook-sdk==3.1.0`. Also the environment variables `FACEBOOK_APP_ID` `FACEBOOK_APP_SECRET` need to be set with a valid token. + +This step was supposed to be used for querying lead data from the facebook by using either the business owner's name or the company name. The attempt was deprecated as the cost for the needed API token was evaluated too high and because the usage permissions of the facebook API were changed. Furthermore, it is paramount to check the legal ramifications of querying facebook for this kind of data as there might be legal consequences of searching for individuals on facebook instead of their businesses due to data privacy regulations in the EU. + +### ScrapeAddresses + +This step was an early experiment, using only the custom domain from an email address. We check if there's a live website running +for the domain, and then try to parse the main site for a business address using a RegEx pattern. The pattern is not very precise +and calling the website, as well as parsing it, takes quite some time, which accumulates for a lot of entries. The Google places +step yields better results for the business address and is faster, that's why `scrape_addresses.py` was deprecated. + +## Possible ML improvements + +### Creating data subsets + +The data collected by the BDC pipeline has not been refined to only include semantically valuable data fields. It is possible that some data fields contain no predictive power. This would mean they are practically polluting the dataset with unnecessary information. A proper analysis of the predictive power of all data fields would allow cutting down on the amount of data for each lead, reducing processing time and possibly make predictions more precise. This approach has been explored very briefly by the subset 1 as described in `Classifier-Comparison.md`. However, the choice of included features has not been justified by experiments making them somewhat arbitrary. Additionally, an analysis of this type could give insights on which data fields to expand on and what new data one might want to collect to increase the EVP's performance in predicting merchant sizes. + +Possibly filtering data based on some quality metric could also improve general performance. The regional_atlas_score and google_confidence_score have been tried for this but did not improve performance. However, these values are computed somewhat arbitrarily and implementing a more refined quality metric might result in more promising results. diff --git a/Pipfile b/Pipfile index c8c7e99..9c56c91 100644 --- a/Pipfile +++ b/Pipfile @@ -7,53 +7,50 @@ verify_ssl = true name = "pypi" [dev-packages] -pytest = "==7.4.0" coverage = "==7.4.1" -pre-commit = "==3.5.0" flake8 = "==6.0.0" -pytest-env = "==1.0.1" -matplotlib = "==3.8.2" -plotly = "==5.18.0" geopy = "==2.4.1" +matplotlib = "==3.8.2" notebook = "==7.0.6" +plotly = "==5.18.0" +pre-commit = "==3.5.0" +pytest = "==7.4.0" +pytest-env = "==1.0.1" +sphinx = "==7.2.6" +sphinx_rtd_theme = "==2.0.0" +myst_parser = "==2.0.0" [packages] -numpy = "==1.26.1" -requests = "==2.31.0" -scikit-learn = "==1.3.2" -pydantic = "==2.4.2" -email-validator = "==2.1.0.post1" -pandas = "==2.0.3" +autocorrect = "==2.6.1" beautifulsoup4 = "==4.12.2" -tqdm = "==4.65.0" -python-dotenv = "==0.21.0" -googlemaps = "==4.10.0" -phonenumbers = "==8.13.25" -pymongo = "==4.6.0" -facebook-sdk = "==3.1.0" boto3 = "==1.33.1" +colorama = "==0.4.6" +deep-translator = "==1.11.4" +deutschland = "==0.4.0" +email-validator = "==2.1.0.post1" +fsspec = "==2023.12.2" +geopandas = "==0.14.1" +googlemaps = "==4.10.0" +joblib = "==1.3.2" +lightgbm = "==4.3.0" +numpy = "==1.26.1" openai = "==1.3.3" -tiktoken = "==0.5.1" +osmnx = "==1.7.1" +pandas = "==2.0.3" +phonenumbers = "==8.13.25" pylanguagetool = "==0.10.0" +pyspellchecker = "==0.7.2" +python-dotenv = "==0.21.0" reportlab = "==4.0.7" -osmnx = "==1.7.1" -geopandas = "==0.14.1" +requests = "==2.31.0" +s3fs = "==2023.12.2" +scikit-learn = "==1.3.2" shapely = "==2.0.2" -pyspellchecker = "==0.7.2" -autocorrect = "==2.6.1" textblob = "==0.17.1" -deep-translator = "==1.11.4" -fsspec = "2023.12.2" -s3fs = "2023.12.2" -imblearn = "==0.0" -sagemaker = "==2.198.0" -joblib = "1.3.2" +tiktoken = "==0.5.1" +torch = "==2.1.2" +tqdm = "==4.65.0" xgboost = "==2.0.3" -colorama = "==0.4.6" -torch = "2.1.2" -deutschland = "0.4.0" -bs4 = "0.0.2" -lightgbm = "==4.3.0" [requires] python_version = "3.10" diff --git a/Pipfile.lock b/Pipfile.lock index 290f0f5..d3f4ad6 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "6ab737c4b2ed7f02c0f9bbd5eaca04ea76853afbc7da60d5416b3a79d3ccb58f" + "sha256": "8d74161673d9b82cb7933149388452406f1efaf7a82db95bfd11997ef8b36d33" }, "pipfile-spec": 6, "requires": { @@ -18,11 +18,11 @@ "default": { "aiobotocore": { "hashes": [ - "sha256:0b095af50da2d6f94e93ca959e2a4876f0f0d84d534b61b21d8e050832d04ab6", - "sha256:904a7ad7cc8671d662cfd596906dafe839118ea2a66332c37908e3dcfdee1e45" + "sha256:487fede588040bfa3a43df945275c28c1c73ca75bf705295adb9fbadd2e89be7", + "sha256:6dd7352248e3523019c5a54a395d2b1c31080697fc80a9ad2672de4eec8c7abd" ], "markers": "python_version >= '3.8'", - "version": "==2.11.1" + "version": "==2.11.2" }, "aiohttp": { "hashes": [ @@ -187,21 +187,13 @@ "markers": "python_version >= '3.7'", "version": "==1.33.13" }, - "bs4": { - "hashes": [ - "sha256:a48685c58f50fe127722417bae83fe6badf500d54b55f7e39ffe43b798653925", - "sha256:abf8742c0805ef7f662dce4b51cca104cffe52b835238afc169142ab9b3fbccc" - ], - "index": "pypi", - "version": "==0.0.2" - }, "certifi": { "hashes": [ - "sha256:9b469f3a900bf28dc19b8cfbf8019bf47f7fdd1a65a1d4ffb98fc14166beb4d1", - "sha256:e036ab49d5b79556f99cfc2d9320b34cfbe5be05c5871b51de9329f0603b0474" + "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f", + "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1" ], "markers": "python_version >= '3.6'", - "version": "==2023.11.17" + "version": "==2024.2.2" }, "charset-normalizer": { "hashes": [ @@ -322,14 +314,6 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' and python_version < '4'", "version": "==0.7.2" }, - "cloudpickle": { - "hashes": [ - "sha256:61f594d1f4c295fa5cd9014ceb3a1fc4a70b0de1164b94fbc2d854ccba056f9f", - "sha256:d89684b8de9e34a2a43b3460fbca07d09d6e25ce858df4d5a44240403b6178f5" - ], - "markers": "python_version >= '3.6'", - "version": "==2.2.1" - }, "colorama": { "hashes": [ "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", @@ -355,14 +339,6 @@ "markers": "python_version >= '3.5'", "version": "==1.7" }, - "contextlib2": { - "hashes": [ - "sha256:3fbdb64466afd23abaf6c977627b75b6139a5a3e8ce38405c5b413aed7a0471f", - "sha256:ab1e2bfe1d01d968e1b7e8d9023bc51ef3509bba217bb730cee3827e1ee82869" - ], - "markers": "python_version >= '3.6'", - "version": "==21.6.0" - }, "dateparser": { "hashes": [ "sha256:0b21ad96534e562920a0083e97fd45fa959882d4162acc358705144520a35830", @@ -509,14 +485,6 @@ "markers": "python_version >= '3.8' and python_version < '4'", "version": "==0.4.0" }, - "dill": { - "hashes": [ - "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca", - "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7" - ], - "markers": "python_version >= '3.8'", - "version": "==0.3.8" - }, "distro": { "hashes": [ "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", @@ -550,14 +518,6 @@ "markers": "python_version < '3.11'", "version": "==1.2.0" }, - "facebook-sdk": { - "hashes": [ - "sha256:2e987b3e0f466a6f4ee77b935eb023dba1384134f004a2af21f1cfff7fe0806e", - "sha256:cabcd2e69ea3d9f042919c99b353df7aa1e2be86d040121f6e9f5e63c1cf0f8d" - ], - "index": "pypi", - "version": "==3.1.0" - }, "filelock": { "hashes": [ "sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e", @@ -704,14 +664,6 @@ "markers": "python_version >= '3.9'", "version": "==0.14.1" }, - "google-pasta": { - "hashes": [ - "sha256:4612951da876b1a10fe3960d7226f0c7682cf901e16ac06e473b267a5afa8954", - "sha256:b32482794a366b5366a32c92a9a9201b107821889935a02b3e51f6b432ea84ed", - "sha256:c9f2c8dfc8f96d0d5808299920721be30c9eec37f2389f28904f454565c8a16e" - ], - "version": "==0.2.0" - }, "googlemaps": { "hashes": [ "sha256:3055fcbb1aa262a9159b589b5e6af762b10e80634ae11c59495bd44867e47d88" @@ -760,29 +712,6 @@ "markers": "python_version >= '3.5'", "version": "==3.6" }, - "imbalanced-learn": { - "hashes": [ - "sha256:02ef5bc9ef046f44aa20353a904366a948f7944155e77b6f09b500a70981fd13", - "sha256:b9ccd9aaa3028699079d43a6d4d9fc9d039f55376733b31f87c7d9b125dcc165" - ], - "version": "==0.12.0" - }, - "imblearn": { - "hashes": [ - "sha256:d42c2d709d22c00d2b9a91e638d57240a8b79b4014122d92181fcd2549a2f79a", - "sha256:d8fbb662919c1b16f438ad91a8256220e53bcf6815c9ad5502c518b798de34f2" - ], - "index": "pypi", - "version": "==0.0" - }, - "importlib-metadata": { - "hashes": [ - "sha256:1231cf92d825c9e03cfc4da076a16de6422c863558229ea0b22b675657463443", - "sha256:f0afba6205ad8f8947c7d338b5342d5db2afbfd82f9cbef7879a9539cc12eb9b" - ], - "markers": "python_version >= '3.8'", - "version": "==6.11.0" - }, "jinja2": { "hashes": [ "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa", @@ -808,22 +737,6 @@ "markers": "python_version >= '3.7'", "version": "==1.3.2" }, - "jsonschema": { - "hashes": [ - "sha256:7996507afae316306f9e2290407761157c6f78002dcf7419acb99822143d1c6f", - "sha256:85727c00279f5fa6bedbe6238d2aa6403bedd8b4864ab11207d07df3cc1b2ee5" - ], - "markers": "python_version >= '3.8'", - "version": "==4.21.1" - }, - "jsonschema-specifications": { - "hashes": [ - "sha256:48a76787b3e70f5ed53f1160d2b81f586e4ca6d1548c5de7085d1682674764cc", - "sha256:87e4fdf3a94858b8a2ba2778d9ba57d8a9cafca7c7489c46ba0d30a8bc6a9c3c" - ], - "markers": "python_version >= '3.8'", - "version": "==2023.12.1" - }, "lightgbm": { "hashes": [ "sha256:006f5784a9bcee43e5a7e943dc4f02de1ba2ee7a7af1ee5f190d383f3b6c9ebe", @@ -945,69 +858,69 @@ }, "markupsafe": { "hashes": [ - "sha256:0042d6a9880b38e1dd9ff83146cc3c9c18a059b9360ceae207805567aacccc69", - "sha256:0c26f67b3fe27302d3a412b85ef696792c4a2386293c53ba683a89562f9399b0", - "sha256:0fbad3d346df8f9d72622ac71b69565e621ada2ce6572f37c2eae8dacd60385d", - "sha256:15866d7f2dc60cfdde12ebb4e75e41be862348b4728300c36cdf405e258415ec", - "sha256:1c98c33ffe20e9a489145d97070a435ea0679fddaabcafe19982fe9c971987d5", - "sha256:21e7af8091007bf4bebf4521184f4880a6acab8df0df52ef9e513d8e5db23411", - "sha256:23984d1bdae01bee794267424af55eef4dfc038dc5d1272860669b2aa025c9e3", - "sha256:31f57d64c336b8ccb1966d156932f3daa4fee74176b0fdc48ef580be774aae74", - "sha256:3583a3a3ab7958e354dc1d25be74aee6228938312ee875a22330c4dc2e41beb0", - "sha256:36d7626a8cca4d34216875aee5a1d3d654bb3dac201c1c003d182283e3205949", - "sha256:396549cea79e8ca4ba65525470d534e8a41070e6b3500ce2414921099cb73e8d", - "sha256:3a66c36a3864df95e4f62f9167c734b3b1192cb0851b43d7cc08040c074c6279", - "sha256:3aae9af4cac263007fd6309c64c6ab4506dd2b79382d9d19a1994f9240b8db4f", - "sha256:3ab3a886a237f6e9c9f4f7d272067e712cdb4efa774bef494dccad08f39d8ae6", - "sha256:47bb5f0142b8b64ed1399b6b60f700a580335c8e1c57f2f15587bd072012decc", - "sha256:49a3b78a5af63ec10d8604180380c13dcd870aba7928c1fe04e881d5c792dc4e", - "sha256:4df98d4a9cd6a88d6a585852f56f2155c9cdb6aec78361a19f938810aa020954", - "sha256:5045e892cfdaecc5b4c01822f353cf2c8feb88a6ec1c0adef2a2e705eef0f656", - "sha256:5244324676254697fe5c181fc762284e2c5fceeb1c4e3e7f6aca2b6f107e60dc", - "sha256:54635102ba3cf5da26eb6f96c4b8c53af8a9c0d97b64bdcb592596a6255d8518", - "sha256:54a7e1380dfece8847c71bf7e33da5d084e9b889c75eca19100ef98027bd9f56", - "sha256:55d03fea4c4e9fd0ad75dc2e7e2b6757b80c152c032ea1d1de487461d8140efc", - "sha256:698e84142f3f884114ea8cf83e7a67ca8f4ace8454e78fe960646c6c91c63bfa", - "sha256:6aa5e2e7fc9bc042ae82d8b79d795b9a62bd8f15ba1e7594e3db243f158b5565", - "sha256:7653fa39578957bc42e5ebc15cf4361d9e0ee4b702d7d5ec96cdac860953c5b4", - "sha256:765f036a3d00395a326df2835d8f86b637dbaf9832f90f5d196c3b8a7a5080cb", - "sha256:78bc995e004681246e85e28e068111a4c3f35f34e6c62da1471e844ee1446250", - "sha256:7a07f40ef8f0fbc5ef1000d0c78771f4d5ca03b4953fc162749772916b298fc4", - "sha256:8b570a1537367b52396e53325769608f2a687ec9a4363647af1cded8928af959", - "sha256:987d13fe1d23e12a66ca2073b8d2e2a75cec2ecb8eab43ff5624ba0ad42764bc", - "sha256:9896fca4a8eb246defc8b2a7ac77ef7553b638e04fbf170bff78a40fa8a91474", - "sha256:9e9e3c4020aa2dc62d5dd6743a69e399ce3de58320522948af6140ac959ab863", - "sha256:a0b838c37ba596fcbfca71651a104a611543077156cb0a26fe0c475e1f152ee8", - "sha256:a4d176cfdfde84f732c4a53109b293d05883e952bbba68b857ae446fa3119b4f", - "sha256:a76055d5cb1c23485d7ddae533229039b850db711c554a12ea64a0fd8a0129e2", - "sha256:a76cd37d229fc385738bd1ce4cba2a121cf26b53864c1772694ad0ad348e509e", - "sha256:a7cc49ef48a3c7a0005a949f3c04f8baa5409d3f663a1b36f0eba9bfe2a0396e", - "sha256:abf5ebbec056817057bfafc0445916bb688a255a5146f900445d081db08cbabb", - "sha256:b0fe73bac2fed83839dbdbe6da84ae2a31c11cfc1c777a40dbd8ac8a6ed1560f", - "sha256:b6f14a9cd50c3cb100eb94b3273131c80d102e19bb20253ac7bd7336118a673a", - "sha256:b83041cda633871572f0d3c41dddd5582ad7d22f65a72eacd8d3d6d00291df26", - "sha256:b835aba863195269ea358cecc21b400276747cc977492319fd7682b8cd2c253d", - "sha256:bf1196dcc239e608605b716e7b166eb5faf4bc192f8a44b81e85251e62584bd2", - "sha256:c669391319973e49a7c6230c218a1e3044710bc1ce4c8e6eb71f7e6d43a2c131", - "sha256:c7556bafeaa0a50e2fe7dc86e0382dea349ebcad8f010d5a7dc6ba568eaaa789", - "sha256:c8f253a84dbd2c63c19590fa86a032ef3d8cc18923b8049d91bcdeeb2581fbf6", - "sha256:d18b66fe626ac412d96c2ab536306c736c66cf2a31c243a45025156cc190dc8a", - "sha256:d5291d98cd3ad9a562883468c690a2a238c4a6388ab3bd155b0c75dd55ece858", - "sha256:d5c31fe855c77cad679b302aabc42d724ed87c043b1432d457f4976add1c2c3e", - "sha256:d6e427c7378c7f1b2bef6a344c925b8b63623d3321c09a237b7cc0e77dd98ceb", - "sha256:dac1ebf6983148b45b5fa48593950f90ed6d1d26300604f321c74a9ca1609f8e", - "sha256:de8153a7aae3835484ac168a9a9bdaa0c5eee4e0bc595503c95d53b942879c84", - "sha256:e1a0d1924a5013d4f294087e00024ad25668234569289650929ab871231668e7", - "sha256:e7902211afd0af05fbadcc9a312e4cf10f27b779cf1323e78d52377ae4b72bea", - "sha256:e888ff76ceb39601c59e219f281466c6d7e66bd375b4ec1ce83bcdc68306796b", - "sha256:f06e5a9e99b7df44640767842f414ed5d7bedaaa78cd817ce04bbd6fd86e2dd6", - "sha256:f6be2d708a9d0e9b0054856f07ac7070fbe1754be40ca8525d5adccdbda8f475", - "sha256:f9917691f410a2e0897d1ef99619fd3f7dd503647c8ff2475bf90c3cf222ad74", - "sha256:fc1a75aa8f11b87910ffd98de62b29d6520b6d6e8a3de69a70ca34dea85d2a8a", - "sha256:fe8512ed897d5daf089e5bd010c3dc03bb1bdae00b35588c49b98268d4a01e00" + "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf", + "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff", + "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f", + "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3", + "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532", + "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f", + "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617", + "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df", + "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4", + "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906", + "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f", + "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4", + "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8", + "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371", + "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2", + "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465", + "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52", + "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6", + "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169", + "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad", + "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2", + "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0", + "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029", + "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f", + "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a", + "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced", + "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5", + "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c", + "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf", + "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9", + "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb", + "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad", + "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3", + "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1", + "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46", + "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc", + "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a", + "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee", + "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900", + "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5", + "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea", + "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f", + "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5", + "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e", + "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a", + "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f", + "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50", + "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a", + "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b", + "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4", + "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff", + "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2", + "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46", + "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b", + "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf", + "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5", + "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5", + "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab", + "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd", + "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68" ], "markers": "python_version >= '3.7'", - "version": "==2.1.4" + "version": "==2.1.5" }, "more-itertools": { "hashes": [ @@ -1026,101 +939,99 @@ }, "multidict": { "hashes": [ - "sha256:01a3a55bd90018c9c080fbb0b9f4891db37d148a0a18722b42f94694f8b6d4c9", - "sha256:0b1a97283e0c85772d613878028fec909f003993e1007eafa715b24b377cb9b8", - "sha256:0dfad7a5a1e39c53ed00d2dd0c2e36aed4650936dc18fd9a1826a5ae1cad6f03", - "sha256:11bdf3f5e1518b24530b8241529d2050014c884cf18b6fc69c0c2b30ca248710", - "sha256:1502e24330eb681bdaa3eb70d6358e818e8e8f908a22a1851dfd4e15bc2f8161", - "sha256:16ab77bbeb596e14212e7bab8429f24c1579234a3a462105cda4a66904998664", - "sha256:16d232d4e5396c2efbbf4f6d4df89bfa905eb0d4dc5b3549d872ab898451f569", - "sha256:21a12c4eb6ddc9952c415f24eef97e3e55ba3af61f67c7bc388dcdec1404a067", - "sha256:27c523fbfbdfd19c6867af7346332b62b586eed663887392cff78d614f9ec313", - "sha256:281af09f488903fde97923c7744bb001a9b23b039a909460d0f14edc7bf59706", - "sha256:33029f5734336aa0d4c0384525da0387ef89148dc7191aae00ca5fb23d7aafc2", - "sha256:3601a3cece3819534b11d4efc1eb76047488fddd0c85a3948099d5da4d504636", - "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49", - "sha256:36c63aaa167f6c6b04ef2c85704e93af16c11d20de1d133e39de6a0e84582a93", - "sha256:39ff62e7d0f26c248b15e364517a72932a611a9b75f35b45be078d81bdb86603", - "sha256:43644e38f42e3af682690876cff722d301ac585c5b9e1eacc013b7a3f7b696a0", - "sha256:4372381634485bec7e46718edc71528024fcdc6f835baefe517b34a33c731d60", - "sha256:458f37be2d9e4c95e2d8866a851663cbc76e865b78395090786f6cd9b3bbf4f4", - "sha256:45e1ecb0379bfaab5eef059f50115b54571acfbe422a14f668fc8c27ba410e7e", - "sha256:4b9d9e4e2b37daddb5c23ea33a3417901fa7c7b3dee2d855f63ee67a0b21e5b1", - "sha256:4ceef517eca3e03c1cceb22030a3e39cb399ac86bff4e426d4fc6ae49052cc60", - "sha256:4d1a3d7ef5e96b1c9e92f973e43aa5e5b96c659c9bc3124acbbd81b0b9c8a951", - "sha256:4dcbb0906e38440fa3e325df2359ac6cb043df8e58c965bb45f4e406ecb162cc", - "sha256:509eac6cf09c794aa27bcacfd4d62c885cce62bef7b2c3e8b2e49d365b5003fe", - "sha256:52509b5be062d9eafc8170e53026fbc54cf3b32759a23d07fd935fb04fc22d95", - "sha256:52f2dffc8acaba9a2f27174c41c9e57f60b907bb9f096b36b1a1f3be71c6284d", - "sha256:574b7eae1ab267e5f8285f0fe881f17efe4b98c39a40858247720935b893bba8", - "sha256:5979b5632c3e3534e42ca6ff856bb24b2e3071b37861c2c727ce220d80eee9ed", - "sha256:59d43b61c59d82f2effb39a93c48b845efe23a3852d201ed2d24ba830d0b4cf2", - "sha256:5a4dcf02b908c3b8b17a45fb0f15b695bf117a67b76b7ad18b73cf8e92608775", - "sha256:5cad9430ab3e2e4fa4a2ef4450f548768400a2ac635841bc2a56a2052cdbeb87", - "sha256:5fc1b16f586f049820c5c5b17bb4ee7583092fa0d1c4e28b5239181ff9532e0c", - "sha256:62501642008a8b9871ddfccbf83e4222cf8ac0d5aeedf73da36153ef2ec222d2", - "sha256:64bdf1086b6043bf519869678f5f2757f473dee970d7abf6da91ec00acb9cb98", - "sha256:64da238a09d6039e3bd39bb3aee9c21a5e34f28bfa5aa22518581f910ff94af3", - "sha256:666daae833559deb2d609afa4490b85830ab0dfca811a98b70a205621a6109fe", - "sha256:67040058f37a2a51ed8ea8f6b0e6ee5bd78ca67f169ce6122f3e2ec80dfe9b78", - "sha256:6748717bb10339c4760c1e63da040f5f29f5ed6e59d76daee30305894069a660", - "sha256:6b181d8c23da913d4ff585afd1155a0e1194c0b50c54fcfe286f70cdaf2b7176", - "sha256:6ed5f161328b7df384d71b07317f4d8656434e34591f20552c7bcef27b0ab88e", - "sha256:7582a1d1030e15422262de9f58711774e02fa80df0d1578995c76214f6954988", - "sha256:7d18748f2d30f94f498e852c67d61261c643b349b9d2a581131725595c45ec6c", - "sha256:7d6ae9d593ef8641544d6263c7fa6408cc90370c8cb2bbb65f8d43e5b0351d9c", - "sha256:81a4f0b34bd92df3da93315c6a59034df95866014ac08535fc819f043bfd51f0", - "sha256:8316a77808c501004802f9beebde51c9f857054a0c871bd6da8280e718444449", - "sha256:853888594621e6604c978ce2a0444a1e6e70c8d253ab65ba11657659dcc9100f", - "sha256:99b76c052e9f1bc0721f7541e5e8c05db3941eb9ebe7b8553c625ef88d6eefde", - "sha256:a2e4369eb3d47d2034032a26c7a80fcb21a2cb22e1173d761a162f11e562caa5", - "sha256:ab55edc2e84460694295f401215f4a58597f8f7c9466faec545093045476327d", - "sha256:af048912e045a2dc732847d33821a9d84ba553f5c5f028adbd364dd4765092ac", - "sha256:b1a2eeedcead3a41694130495593a559a668f382eee0727352b9a41e1c45759a", - "sha256:b1e8b901e607795ec06c9e42530788c45ac21ef3aaa11dbd0c69de543bfb79a9", - "sha256:b41156839806aecb3641f3208c0dafd3ac7775b9c4c422d82ee2a45c34ba81ca", - "sha256:b692f419760c0e65d060959df05f2a531945af31fda0c8a3b3195d4efd06de11", - "sha256:bc779e9e6f7fda81b3f9aa58e3a6091d49ad528b11ed19f6621408806204ad35", - "sha256:bf6774e60d67a9efe02b3616fee22441d86fab4c6d335f9d2051d19d90a40063", - "sha256:c048099e4c9e9d615545e2001d3d8a4380bd403e1a0578734e0d31703d1b0c0b", - "sha256:c5cb09abb18c1ea940fb99360ea0396f34d46566f157122c92dfa069d3e0e982", - "sha256:cc8e1d0c705233c5dd0c5e6460fbad7827d5d36f310a0fadfd45cc3029762258", - "sha256:d5e3fc56f88cc98ef8139255cf8cd63eb2c586531e43310ff859d6bb3a6b51f1", - "sha256:d6aa0418fcc838522256761b3415822626f866758ee0bc6632c9486b179d0b52", - "sha256:d6c254ba6e45d8e72739281ebc46ea5eb5f101234f3ce171f0e9f5cc86991480", - "sha256:d6d635d5209b82a3492508cf5b365f3446afb65ae7ebd755e70e18f287b0adf7", - "sha256:dcfe792765fab89c365123c81046ad4103fcabbc4f56d1c1997e6715e8015461", - "sha256:ddd3915998d93fbcd2566ddf9cf62cdb35c9e093075f862935573d265cf8f65d", - "sha256:ddff9c4e225a63a5afab9dd15590432c22e8057e1a9a13d28ed128ecf047bbdc", - "sha256:e41b7e2b59679edfa309e8db64fdf22399eec4b0b24694e1b2104fb789207779", - "sha256:e69924bfcdda39b722ef4d9aa762b2dd38e4632b3641b1d9a57ca9cd18f2f83a", - "sha256:ea20853c6dbbb53ed34cb4d080382169b6f4554d394015f1bef35e881bf83547", - "sha256:ee2a1ece51b9b9e7752e742cfb661d2a29e7bcdba2d27e66e28a99f1890e4fa0", - "sha256:eeb6dcc05e911516ae3d1f207d4b0520d07f54484c49dfc294d6e7d63b734171", - "sha256:f70b98cd94886b49d91170ef23ec5c0e8ebb6f242d734ed7ed677b24d50c82cf", - "sha256:fc35cb4676846ef752816d5be2193a1e8367b4c1397b74a565a9d0389c433a1d", - "sha256:ff959bee35038c4624250473988b24f846cbeb2c6639de3602c073f10410ceba" + "sha256:01265f5e40f5a17f8241d52656ed27192be03bfa8764d88e8220141d1e4b3556", + "sha256:0275e35209c27a3f7951e1ce7aaf93ce0d163b28948444bec61dd7badc6d3f8c", + "sha256:04bde7a7b3de05732a4eb39c94574db1ec99abb56162d6c520ad26f83267de29", + "sha256:04da1bb8c8dbadf2a18a452639771951c662c5ad03aefe4884775454be322c9b", + "sha256:09a892e4a9fb47331da06948690ae38eaa2426de97b4ccbfafbdcbe5c8f37ff8", + "sha256:0d63c74e3d7ab26de115c49bffc92cc77ed23395303d496eae515d4204a625e7", + "sha256:107c0cdefe028703fb5dafe640a409cb146d44a6ae201e55b35a4af8e95457dd", + "sha256:141b43360bfd3bdd75f15ed811850763555a251e38b2405967f8e25fb43f7d40", + "sha256:14c2976aa9038c2629efa2c148022ed5eb4cb939e15ec7aace7ca932f48f9ba6", + "sha256:19fe01cea168585ba0f678cad6f58133db2aa14eccaf22f88e4a6dccadfad8b3", + "sha256:1d147090048129ce3c453f0292e7697d333db95e52616b3793922945804a433c", + "sha256:1d9ea7a7e779d7a3561aade7d596649fbecfa5c08a7674b11b423783217933f9", + "sha256:215ed703caf15f578dca76ee6f6b21b7603791ae090fbf1ef9d865571039ade5", + "sha256:21fd81c4ebdb4f214161be351eb5bcf385426bf023041da2fd9e60681f3cebae", + "sha256:220dd781e3f7af2c2c1053da9fa96d9cf3072ca58f057f4c5adaaa1cab8fc442", + "sha256:228b644ae063c10e7f324ab1ab6b548bdf6f8b47f3ec234fef1093bc2735e5f9", + "sha256:29bfeb0dff5cb5fdab2023a7a9947b3b4af63e9c47cae2a10ad58394b517fddc", + "sha256:2f4848aa3baa109e6ab81fe2006c77ed4d3cd1e0ac2c1fbddb7b1277c168788c", + "sha256:2faa5ae9376faba05f630d7e5e6be05be22913782b927b19d12b8145968a85ea", + "sha256:2ffc42c922dbfddb4a4c3b438eb056828719f07608af27d163191cb3e3aa6cc5", + "sha256:37b15024f864916b4951adb95d3a80c9431299080341ab9544ed148091b53f50", + "sha256:3cc2ad10255f903656017363cd59436f2111443a76f996584d1077e43ee51182", + "sha256:3d25f19500588cbc47dc19081d78131c32637c25804df8414463ec908631e453", + "sha256:403c0911cd5d5791605808b942c88a8155c2592e05332d2bf78f18697a5fa15e", + "sha256:411bf8515f3be9813d06004cac41ccf7d1cd46dfe233705933dd163b60e37600", + "sha256:425bf820055005bfc8aa9a0b99ccb52cc2f4070153e34b701acc98d201693733", + "sha256:435a0984199d81ca178b9ae2c26ec3d49692d20ee29bc4c11a2a8d4514c67eda", + "sha256:4a6a4f196f08c58c59e0b8ef8ec441d12aee4125a7d4f4fef000ccb22f8d7241", + "sha256:4cc0ef8b962ac7a5e62b9e826bd0cd5040e7d401bc45a6835910ed699037a461", + "sha256:51d035609b86722963404f711db441cf7134f1889107fb171a970c9701f92e1e", + "sha256:53689bb4e102200a4fafa9de9c7c3c212ab40a7ab2c8e474491914d2305f187e", + "sha256:55205d03e8a598cfc688c71ca8ea5f66447164efff8869517f175ea632c7cb7b", + "sha256:5c0631926c4f58e9a5ccce555ad7747d9a9f8b10619621f22f9635f069f6233e", + "sha256:5cb241881eefd96b46f89b1a056187ea8e9ba14ab88ba632e68d7a2ecb7aadf7", + "sha256:60d698e8179a42ec85172d12f50b1668254628425a6bd611aba022257cac1386", + "sha256:612d1156111ae11d14afaf3a0669ebf6c170dbb735e510a7438ffe2369a847fd", + "sha256:6214c5a5571802c33f80e6c84713b2c79e024995b9c5897f794b43e714daeec9", + "sha256:6939c95381e003f54cd4c5516740faba40cf5ad3eeff460c3ad1d3e0ea2549bf", + "sha256:69db76c09796b313331bb7048229e3bee7928eb62bab5e071e9f7fcc4879caee", + "sha256:6bf7a982604375a8d49b6cc1b781c1747f243d91b81035a9b43a2126c04766f5", + "sha256:766c8f7511df26d9f11cd3a8be623e59cca73d44643abab3f8c8c07620524e4a", + "sha256:76c0de87358b192de7ea9649beb392f107dcad9ad27276324c24c91774ca5271", + "sha256:76f067f5121dcecf0d63a67f29080b26c43c71a98b10c701b0677e4a065fbd54", + "sha256:7901c05ead4b3fb75113fb1dd33eb1253c6d3ee37ce93305acd9d38e0b5f21a4", + "sha256:79660376075cfd4b2c80f295528aa6beb2058fd289f4c9252f986751a4cd0496", + "sha256:79a6d2ba910adb2cbafc95dad936f8b9386e77c84c35bc0add315b856d7c3abb", + "sha256:7afcdd1fc07befad18ec4523a782cde4e93e0a2bf71239894b8d61ee578c1319", + "sha256:7be7047bd08accdb7487737631d25735c9a04327911de89ff1b26b81745bd4e3", + "sha256:7c6390cf87ff6234643428991b7359b5f59cc15155695deb4eda5c777d2b880f", + "sha256:7df704ca8cf4a073334e0427ae2345323613e4df18cc224f647f251e5e75a527", + "sha256:85f67aed7bb647f93e7520633d8f51d3cbc6ab96957c71272b286b2f30dc70ed", + "sha256:896ebdcf62683551312c30e20614305f53125750803b614e9e6ce74a96232604", + "sha256:92d16a3e275e38293623ebf639c471d3e03bb20b8ebb845237e0d3664914caef", + "sha256:99f60d34c048c5c2fabc766108c103612344c46e35d4ed9ae0673d33c8fb26e8", + "sha256:9fe7b0653ba3d9d65cbe7698cca585bf0f8c83dbbcc710db9c90f478e175f2d5", + "sha256:a3145cb08d8625b2d3fee1b2d596a8766352979c9bffe5d7833e0503d0f0b5e5", + "sha256:aeaf541ddbad8311a87dd695ed9642401131ea39ad7bc8cf3ef3967fd093b626", + "sha256:b55358304d7a73d7bdf5de62494aaf70bd33015831ffd98bc498b433dfe5b10c", + "sha256:b82cc8ace10ab5bd93235dfaab2021c70637005e1ac787031f4d1da63d493c1d", + "sha256:c0868d64af83169e4d4152ec612637a543f7a336e4a307b119e98042e852ad9c", + "sha256:c1c1496e73051918fcd4f58ff2e0f2f3066d1c76a0c6aeffd9b45d53243702cc", + "sha256:c9bf56195c6bbd293340ea82eafd0071cb3d450c703d2c93afb89f93b8386ccc", + "sha256:cbebcd5bcaf1eaf302617c114aa67569dd3f090dd0ce8ba9e35e9985b41ac35b", + "sha256:cd6c8fca38178e12c00418de737aef1261576bd1b6e8c6134d3e729a4e858b38", + "sha256:ceb3b7e6a0135e092de86110c5a74e46bda4bd4fbfeeb3a3bcec79c0f861e450", + "sha256:cf590b134eb70629e350691ecca88eac3e3b8b3c86992042fb82e3cb1830d5e1", + "sha256:d3eb1ceec286eba8220c26f3b0096cf189aea7057b6e7b7a2e60ed36b373b77f", + "sha256:d65f25da8e248202bd47445cec78e0025c0fe7582b23ec69c3b27a640dd7a8e3", + "sha256:d6f6d4f185481c9669b9447bf9d9cf3b95a0e9df9d169bbc17e363b7d5487755", + "sha256:d84a5c3a5f7ce6db1f999fb9438f686bc2e09d38143f2d93d8406ed2dd6b9226", + "sha256:d946b0a9eb8aaa590df1fe082cee553ceab173e6cb5b03239716338629c50c7a", + "sha256:dce1c6912ab9ff5f179eaf6efe7365c1f425ed690b03341911bf4939ef2f3046", + "sha256:de170c7b4fe6859beb8926e84f7d7d6c693dfe8e27372ce3b76f01c46e489fcf", + "sha256:e02021f87a5b6932fa6ce916ca004c4d441509d33bbdbeca70d05dff5e9d2479", + "sha256:e030047e85cbcedbfc073f71836d62dd5dadfbe7531cae27789ff66bc551bd5e", + "sha256:e0e79d91e71b9867c73323a3444724d496c037e578a0e1755ae159ba14f4f3d1", + "sha256:e4428b29611e989719874670fd152b6625500ad6c686d464e99f5aaeeaca175a", + "sha256:e4972624066095e52b569e02b5ca97dbd7a7ddd4294bf4e7247d52635630dd83", + "sha256:e7be68734bd8c9a513f2b0cfd508802d6609da068f40dc57d4e3494cefc92929", + "sha256:e8e94e6912639a02ce173341ff62cc1201232ab86b8a8fcc05572741a5dc7d93", + "sha256:ea1456df2a27c73ce51120fa2f519f1bea2f4a03a917f4a43c8707cf4cbbae1a", + "sha256:ebd8d160f91a764652d3e51ce0d2956b38efe37c9231cd82cfc0bed2e40b581c", + "sha256:eca2e9d0cc5a889850e9bbd68e98314ada174ff6ccd1129500103df7a94a7a44", + "sha256:edd08e6f2f1a390bf137080507e44ccc086353c8e98c657e666c017718561b89", + "sha256:f285e862d2f153a70586579c15c44656f888806ed0e5b56b64489afe4a2dbfba", + "sha256:f2a1dee728b52b33eebff5072817176c172050d44d67befd681609b4746e1c2e", + "sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da", + "sha256:fb616be3538599e797a2017cccca78e354c767165e8858ab5116813146041a24", + "sha256:fce28b3c8a81b6b36dfac9feb1de115bab619b3c13905b419ec71d03a3fc1423", + "sha256:fe5d7785250541f7f5019ab9cba2c71169dc7d74d0f45253f8313f436458a4ef" ], "markers": "python_version >= '3.7'", - "version": "==6.0.4" - }, - "multiprocess": { - "hashes": [ - "sha256:0dfd078c306e08d46d7a8d06fb120313d87aa43af60d66da43ffff40b44d2f41", - "sha256:161af703d4652a0e1410be6abccecde4a7ddffd19341be0a7011b94aeb171ac1", - "sha256:37b55f71c07e2d741374998c043b9520b626a8dddc8b3129222ca4f1a06ef67a", - "sha256:476887be10e2f59ff183c006af746cb6f1fd0eadcfd4ef49e605cbe2659920ee", - "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", - "sha256:a71d82033454891091a226dfc319d0cfa8019a4e888ef9ca910372a446de4435", - "sha256:af4cabb0dac72abfb1e794fa7855c325fd2b55a10a44628a3c1ad3311c04127a", - "sha256:ba8c31889abf4511c7308a8c52bb4a30b9d590e7f58523302ba00237702ca054", - "sha256:c4a9944c67bd49f823687463660a2d6daae94c289adff97e0f9d696ba6371d02", - "sha256:d951bed82c8f73929ac82c61f01a7b5ce8f3e5ef40f5b52553b4f547ce2b08ec", - "sha256:e7b9d0f307cd9bd50851afaac0dba2cb6c44449efff697df7c7645f7d3f2be3a", - "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e" - ], - "markers": "python_version >= '3.8'", - "version": "==0.70.16" + "version": "==6.0.5" }, "networkx": { "hashes": [ @@ -1179,32 +1090,33 @@ }, "onnxruntime": { "hashes": [ - "sha256:00cccc37a5195c8fca5011b9690b349db435986bd508eb44c9fce432da9228a4", - "sha256:04ebcd29c20473596a1412e471524b2fb88d55e6301c40b98dd2407b5911595f", - "sha256:212741b519ee61a4822c79c47147d63a8b0ffde25cd33988d3d7be9fbd51005d", - "sha256:28ff758b17ce3ca6bcad3d936ec53bd7f5482e7630a13f6dcae518eba8f71d85", - "sha256:3bc41f323ac77acfed190be8ffdc47a6a75e4beeb3473fbf55eeb075ccca8df2", - "sha256:3c467eaa3d2429c026b10c3d17b78b7f311f718ef9d2a0d6938e5c3c2611b0cf", - "sha256:3e253e572021563226a86f1c024f8f70cdae28f2fb1cc8c3a9221e8b1ce37db5", - "sha256:4137e5d443e2dccebe5e156a47f1d6d66f8077b03587c35f11ee0c7eda98b533", - "sha256:4c2dcf1b70f8434abb1116fe0975c00e740722aaf321997195ea3618cc00558e", - "sha256:5b8f5083f903408238883821dd8c775f8120cb4a604166dbdabe97f4715256d5", - "sha256:5f91f5497fe3df4ceee2f9e66c6148d9bfeb320cd6a71df361c66c5b8bac985a", - "sha256:6829dc2a79d48c911fedaf4c0f01e03c86297d32718a3fdee7a282766dfd282a", - "sha256:76f876c53bfa912c6c242fc38213a6f13f47612d4360bc9d599bd23753e53161", - "sha256:78d81d9af457a1dc90db9a7da0d09f3ccb1288ea1236c6ab19f0ca61f3eee2d3", - "sha256:985a029798744ce4743fcf8442240fed35c8e4d4d30ec7d0c2cdf1388cd44408", - "sha256:9996bab0f202a6435ab867bc55598f15210d0b72794d5de83712b53d564084ae", - "sha256:9aded21fe3d898edd86be8aa2eb995aa375e800ad3dfe4be9f618a20b8ee3630", - "sha256:a225bb683991001d111f75323d355b3590e75e16b5e0f07a0401e741a0143ea1", - "sha256:a82a8f0b4c978d08f9f5c7a6019ae51151bced9fd91e5aaa0c20a9e4ac7a60b6", - "sha256:c56695c1a343c7c008b647fff3df44da63741fbe7b6003ef576758640719be7b", - "sha256:d4a0151e1accd04da6711f6fd89024509602f82c65a754498e960b032359b02d", - "sha256:e8aa5bba78afbd4d8a2654b14ec7462ff3ce4a6aad312a3c2d2c2b65009f2541", - "sha256:ef2b1fc269cabd27f129fb9058917d6fdc89b188c49ed8700f300b945c81f889", - "sha256:f36b56a593b49a3c430be008c2aea6658d91a3030115729609ec1d5ffbaab1b6" - ], - "version": "==1.16.3" + "sha256:16d26badd092c8c257fa57c458bb600d96dc15282c647ccad0ed7b2732e6c03b", + "sha256:1ec485643b93e0a3896c655eb2426decd63e18a278bb7ccebc133b340723624f", + "sha256:461fa0fc7d9c392c352b6cccdedf44d818430f3d6eacd924bb804fdea2dcfd02", + "sha256:4b038324586bc905299e435f7c00007e6242389c856b82fe9357fdc3b1ef2bdc", + "sha256:5632077c3ab8b0cd4f74b0af9c4e924be012b1a7bcd7daa845763c6c6bf14b7d", + "sha256:5a06ab84eaa350bf64b1d747b33ccf10da64221ed1f38f7287f15eccbec81603", + "sha256:5d3d11db2c8242766212a68d0b139745157da7ce53bd96ba349a5c65e5a02357", + "sha256:61a12732cba869b3ad2d4e29ab6cb62c7a96f61b8c213f7fcb961ba412b70b37", + "sha256:6f1273bebcdb47ed932d076c85eb9488bc4768fcea16d5f2747ca692fad4f9d3", + "sha256:7466724e809a40e986b1637cba156ad9fc0d1952468bc00f79ef340bc0199552", + "sha256:83c35809cda898c5a11911c69ceac8a2ac3925911854c526f73bad884582f911", + "sha256:90c0890e36f880281c6c698d9bc3de2afbeee2f76512725ec043665c25c67d21", + "sha256:93d39b3fa1ee01f034f098e1c7769a811a21365b4883f05f96c14a2b60c6028b", + "sha256:ac2f286da3494b29b4186ca193c7d4e6a2c1f770c4184c7192c5da142c3dec28", + "sha256:b4c87d83c6f58d1af2675fc99e3dc810f2dbdb844bcefd0c1b7573632661f6fc", + "sha256:b7b337cd0586f7836601623cbd30a443df9528ef23965860d11c753ceeb009f2", + "sha256:bb1bf1ee575c665b8bbc3813ab906e091a645a24ccc210be7932154b8260eca1", + "sha256:cb60fd3c2c1acd684752eb9680e89ae223e9801a9b0e0dc7b28adabe45a2e380", + "sha256:d2b22a25a94109cc983443116da8d9805ced0256eb215c5e6bc6dcbabefeab96", + "sha256:d47bee7557a8b99c8681b6882657a515a4199778d6d5e24e924d2aafcef55b0a", + "sha256:dba55723bf9b835e358f48c98a814b41692c393eb11f51e02ece0625c756b797", + "sha256:ee48422349cc500273beea7607e33c2237909f58468ae1d6cccfc4aecd158565", + "sha256:f34cc46553359293854e38bdae2ab1be59543aad78a6317e7746d30e311110c3", + "sha256:fa464aa4d81df818375239e481887b656e261377d5b6b9a4692466f5f3261edc", + "sha256:fbb9faaf51d01aa2c147ef52524d9326744c852116d8005b9041809a71838878" + ], + "version": "==1.17.0" }, "openai": { "hashes": [ @@ -1264,14 +1176,6 @@ "markers": "python_version >= '3.8'", "version": "==2.0.3" }, - "pathos": { - "hashes": [ - "sha256:4f2a42bc1e10ccf0fe71961e7145fc1437018b6b21bd93b2446abc3983e49a7a", - "sha256:d669275e6eb4b3fbcd2846d7a6d1bba315fe23add0c614445ba1408d8b38bafe" - ], - "markers": "python_version >= '3.8'", - "version": "==0.3.2" - }, "phonenumbers": { "hashes": [ "sha256:4ae2d2e253a4752a269ae1147822b9aa500f14b2506a91f884e68b136901f128", @@ -1354,30 +1258,6 @@ "markers": "python_version >= '3.8'", "version": "==10.2.0" }, - "platformdirs": { - "hashes": [ - "sha256:11c8f37bcca40db96d8144522d925583bdb7a31f7b0e37e3ed4318400a8e2380", - "sha256:906d548203468492d432bcb294d4bc2fff751bf84971fbb2c10918cc206ee420" - ], - "markers": "python_version >= '3.8'", - "version": "==4.1.0" - }, - "pox": { - "hashes": [ - "sha256:16e6eca84f1bec3828210b06b052adf04cf2ab20c22fd6fbef5f78320c9a6fed", - "sha256:651b8ae8a7b341b7bfd267f67f63106daeb9805f1ac11f323d5280d2da93fdb6" - ], - "markers": "python_version >= '3.8'", - "version": "==0.3.4" - }, - "ppft": { - "hashes": [ - "sha256:76a429a7d7b74c4d743f6dba8351e58d62b6432ed65df9fe204790160dab996d", - "sha256:de2dd4b1b080923dd9627fbdea52649fd741c752fce4f3cf37e26f785df23d9b" - ], - "markers": "python_version >= '3.8'", - "version": "==1.7.6.8" - }, "protobuf": { "hashes": [ "sha256:10894a2885b7175d3984f2be8d9850712c57d5e7587a2410720af8be56cdaf62", @@ -1446,124 +1326,96 @@ }, "pydantic": { "hashes": [ - "sha256:94f336138093a5d7f426aac732dcfe7ab4eb4da243c88f891d65deb4a2556ee7", - "sha256:bc3ddf669d234f4220e6e1c4d96b061abe0998185a8d7855c0126782b7abc8c1" + "sha256:1440966574e1b5b99cf75a13bec7b20e3512e8a61b894ae252f56275e2c465ae", + "sha256:ae887bd94eb404b09d86e4d12f93893bdca79d766e738528c6fa1c849f3c6bcf" ], - "index": "pypi", - "markers": "python_version >= '3.7'", - "version": "==2.4.2" + "markers": "python_version >= '3.8'", + "version": "==2.6.0" }, "pydantic-core": { "hashes": [ - "sha256:042462d8d6ba707fd3ce9649e7bf268633a41018d6a998fb5fbacb7e928a183e", - "sha256:0523aeb76e03f753b58be33b26540880bac5aa54422e4462404c432230543f33", - "sha256:05560ab976012bf40f25d5225a58bfa649bb897b87192a36c6fef1ab132540d7", - "sha256:0675ba5d22de54d07bccde38997e780044dcfa9a71aac9fd7d4d7a1d2e3e65f7", - "sha256:073d4a470b195d2b2245d0343569aac7e979d3a0dcce6c7d2af6d8a920ad0bea", - "sha256:07ec6d7d929ae9c68f716195ce15e745b3e8fa122fc67698ac6498d802ed0fa4", - "sha256:0880e239827b4b5b3e2ce05e6b766a7414e5f5aedc4523be6b68cfbc7f61c5d0", - "sha256:0c27f38dc4fbf07b358b2bc90edf35e82d1703e22ff2efa4af4ad5de1b3833e7", - "sha256:0d8a8adef23d86d8eceed3e32e9cca8879c7481c183f84ed1a8edc7df073af94", - "sha256:0e2a35baa428181cb2270a15864ec6286822d3576f2ed0f4cd7f0c1708472aff", - "sha256:0f8682dbdd2f67f8e1edddcbffcc29f60a6182b4901c367fc8c1c40d30bb0a82", - "sha256:0fa467fd300a6f046bdb248d40cd015b21b7576c168a6bb20aa22e595c8ffcdd", - "sha256:128552af70a64660f21cb0eb4876cbdadf1a1f9d5de820fed6421fa8de07c893", - "sha256:1396e81b83516b9d5c9e26a924fa69164156c148c717131f54f586485ac3c15e", - "sha256:149b8a07712f45b332faee1a2258d8ef1fb4a36f88c0c17cb687f205c5dc6e7d", - "sha256:14ac492c686defc8e6133e3a2d9eaf5261b3df26b8ae97450c1647286750b901", - "sha256:14cfbb00959259e15d684505263d5a21732b31248a5dd4941f73a3be233865b9", - "sha256:14e09ff0b8fe6e46b93d36a878f6e4a3a98ba5303c76bb8e716f4878a3bee92c", - "sha256:154ea7c52e32dce13065dbb20a4a6f0cc012b4f667ac90d648d36b12007fa9f7", - "sha256:15d6bca84ffc966cc9976b09a18cf9543ed4d4ecbd97e7086f9ce9327ea48891", - "sha256:1d40f55222b233e98e3921df7811c27567f0e1a4411b93d4c5c0f4ce131bc42f", - "sha256:25bd966103890ccfa028841a8f30cebcf5875eeac8c4bde4fe221364c92f0c9a", - "sha256:2cf5bb4dd67f20f3bbc1209ef572a259027c49e5ff694fa56bed62959b41e1f9", - "sha256:2e0e2959ef5d5b8dc9ef21e1a305a21a36e254e6a34432d00c72a92fdc5ecda5", - "sha256:320f14bd4542a04ab23747ff2c8a778bde727158b606e2661349557f0770711e", - "sha256:3625578b6010c65964d177626fde80cf60d7f2e297d56b925cb5cdeda6e9925a", - "sha256:39215d809470f4c8d1881758575b2abfb80174a9e8daf8f33b1d4379357e417c", - "sha256:3f0ac9fb8608dbc6eaf17956bf623c9119b4db7dbb511650910a82e261e6600f", - "sha256:417243bf599ba1f1fef2bb8c543ceb918676954734e2dcb82bf162ae9d7bd514", - "sha256:420a692b547736a8d8703c39ea935ab5d8f0d2573f8f123b0a294e49a73f214b", - "sha256:443fed67d33aa85357464f297e3d26e570267d1af6fef1c21ca50921d2976302", - "sha256:48525933fea744a3e7464c19bfede85df4aba79ce90c60b94d8b6e1eddd67096", - "sha256:485a91abe3a07c3a8d1e082ba29254eea3e2bb13cbbd4351ea4e5a21912cc9b0", - "sha256:4a5be350f922430997f240d25f8219f93b0c81e15f7b30b868b2fddfc2d05f27", - "sha256:4d966c47f9dd73c2d32a809d2be529112d509321c5310ebf54076812e6ecd884", - "sha256:524ff0ca3baea164d6d93a32c58ac79eca9f6cf713586fdc0adb66a8cdeab96a", - "sha256:53df009d1e1ba40f696f8995683e067e3967101d4bb4ea6f667931b7d4a01357", - "sha256:5994985da903d0b8a08e4935c46ed8daf5be1cf217489e673910951dc533d430", - "sha256:5cabb9710f09d5d2e9e2748c3e3e20d991a4c5f96ed8f1132518f54ab2967221", - "sha256:5fdb39f67c779b183b0c853cd6b45f7db84b84e0571b3ef1c89cdb1dfc367325", - "sha256:600d04a7b342363058b9190d4e929a8e2e715c5682a70cc37d5ded1e0dd370b4", - "sha256:631cb7415225954fdcc2a024119101946793e5923f6c4d73a5914d27eb3d3a05", - "sha256:63974d168b6233b4ed6a0046296803cb13c56637a7b8106564ab575926572a55", - "sha256:64322bfa13e44c6c30c518729ef08fda6026b96d5c0be724b3c4ae4da939f875", - "sha256:655f8f4c8d6a5963c9a0687793da37b9b681d9ad06f29438a3b2326d4e6b7970", - "sha256:6835451b57c1b467b95ffb03a38bb75b52fb4dc2762bb1d9dbed8de31ea7d0fc", - "sha256:6db2eb9654a85ada248afa5a6db5ff1cf0f7b16043a6b070adc4a5be68c716d6", - "sha256:7c4d1894fe112b0864c1fa75dffa045720a194b227bed12f4be7f6045b25209f", - "sha256:7eb037106f5c6b3b0b864ad226b0b7ab58157124161d48e4b30c4a43fef8bc4b", - "sha256:8282bab177a9a3081fd3d0a0175a07a1e2bfb7fcbbd949519ea0980f8a07144d", - "sha256:82f55187a5bebae7d81d35b1e9aaea5e169d44819789837cdd4720d768c55d15", - "sha256:8572cadbf4cfa95fb4187775b5ade2eaa93511f07947b38f4cd67cf10783b118", - "sha256:8cdbbd92154db2fec4ec973d45c565e767ddc20aa6dbaf50142676484cbff8ee", - "sha256:8f6e6aed5818c264412ac0598b581a002a9f050cb2637a84979859e70197aa9e", - "sha256:92f675fefa977625105708492850bcbc1182bfc3e997f8eecb866d1927c98ae6", - "sha256:962ed72424bf1f72334e2f1e61b68f16c0e596f024ca7ac5daf229f7c26e4208", - "sha256:9badf8d45171d92387410b04639d73811b785b5161ecadabf056ea14d62d4ede", - "sha256:9c120c9ce3b163b985a3b966bb701114beb1da4b0468b9b236fc754783d85aa3", - "sha256:9f6f3e2598604956480f6c8aa24a3384dbf6509fe995d97f6ca6103bb8c2534e", - "sha256:a1254357f7e4c82e77c348dabf2d55f1d14d19d91ff025004775e70a6ef40ada", - "sha256:a1392e0638af203cee360495fd2cfdd6054711f2db5175b6e9c3c461b76f5175", - "sha256:a1c311fd06ab3b10805abb72109f01a134019739bd3286b8ae1bc2fc4e50c07a", - "sha256:a5cb87bdc2e5f620693148b5f8f842d293cae46c5f15a1b1bf7ceeed324a740c", - "sha256:a7a7902bf75779bc12ccfc508bfb7a4c47063f748ea3de87135d433a4cca7a2f", - "sha256:aad7bd686363d1ce4ee930ad39f14e1673248373f4a9d74d2b9554f06199fb58", - "sha256:aafdb89fdeb5fe165043896817eccd6434aee124d5ee9b354f92cd574ba5e78f", - "sha256:ae8a8843b11dc0b03b57b52793e391f0122e740de3df1474814c700d2622950a", - "sha256:b00bc4619f60c853556b35f83731bd817f989cba3e97dc792bb8c97941b8053a", - "sha256:b1f22a9ab44de5f082216270552aa54259db20189e68fc12484873d926426921", - "sha256:b3c01c2fb081fced3bbb3da78510693dc7121bb893a1f0f5f4b48013201f362e", - "sha256:b3dcd587b69bbf54fc04ca157c2323b8911033e827fffaecf0cafa5a892a0904", - "sha256:b4a6db486ac8e99ae696e09efc8b2b9fea67b63c8f88ba7a1a16c24a057a0776", - "sha256:bec7dd208a4182e99c5b6c501ce0b1f49de2802448d4056091f8e630b28e9a52", - "sha256:c0877239307b7e69d025b73774e88e86ce82f6ba6adf98f41069d5b0b78bd1bf", - "sha256:caa48fc31fc7243e50188197b5f0c4228956f97b954f76da157aae7f67269ae8", - "sha256:cfe1090245c078720d250d19cb05d67e21a9cd7c257698ef139bc41cf6c27b4f", - "sha256:d43002441932f9a9ea5d6f9efaa2e21458221a3a4b417a14027a1d530201ef1b", - "sha256:d64728ee14e667ba27c66314b7d880b8eeb050e58ffc5fec3b7a109f8cddbd63", - "sha256:d6495008733c7521a89422d7a68efa0a0122c99a5861f06020ef5b1f51f9ba7c", - "sha256:d8f1ebca515a03e5654f88411420fea6380fc841d1bea08effb28184e3d4899f", - "sha256:d99277877daf2efe074eae6338453a4ed54a2d93fb4678ddfe1209a0c93a2468", - "sha256:da01bec0a26befab4898ed83b362993c844b9a607a86add78604186297eb047e", - "sha256:db9a28c063c7c00844ae42a80203eb6d2d6bbb97070cfa00194dff40e6f545ab", - "sha256:dda81e5ec82485155a19d9624cfcca9be88a405e2857354e5b089c2a982144b2", - "sha256:e357571bb0efd65fd55f18db0a2fb0ed89d0bb1d41d906b138f088933ae618bb", - "sha256:e544246b859f17373bed915182ab841b80849ed9cf23f1f07b73b7c58baee5fb", - "sha256:e562617a45b5a9da5be4abe72b971d4f00bf8555eb29bb91ec2ef2be348cd132", - "sha256:e570ffeb2170e116a5b17e83f19911020ac79d19c96f320cbfa1fa96b470185b", - "sha256:e6f31a17acede6a8cd1ae2d123ce04d8cca74056c9d456075f4f6f85de055607", - "sha256:e9121b4009339b0f751955baf4543a0bfd6bc3f8188f8056b1a25a2d45099934", - "sha256:ebedb45b9feb7258fac0a268a3f6bec0a2ea4d9558f3d6f813f02ff3a6dc6698", - "sha256:ecaac27da855b8d73f92123e5f03612b04c5632fd0a476e469dfc47cd37d6b2e", - "sha256:ecdbde46235f3d560b18be0cb706c8e8ad1b965e5c13bbba7450c86064e96561", - "sha256:ed550ed05540c03f0e69e6d74ad58d026de61b9eaebebbaaf8873e585cbb18de", - "sha256:eeb3d3d6b399ffe55f9a04e09e635554012f1980696d6b0aca3e6cf42a17a03b", - "sha256:ef337945bbd76cce390d1b2496ccf9f90b1c1242a3a7bc242ca4a9fc5993427a", - "sha256:f1365e032a477c1430cfe0cf2856679529a2331426f8081172c4a74186f1d595", - "sha256:f23b55eb5464468f9e0e9a9935ce3ed2a870608d5f534025cd5536bca25b1402", - "sha256:f2e9072d71c1f6cfc79a36d4484c82823c560e6f5599c43c1ca6b5cdbd54f881", - "sha256:f323306d0556351735b54acbf82904fe30a27b6a7147153cbe6e19aaaa2aa429", - "sha256:f36a3489d9e28fe4b67be9992a23029c3cec0babc3bd9afb39f49844a8c721c5", - "sha256:f64f82cc3443149292b32387086d02a6c7fb39b8781563e0ca7b8d7d9cf72bd7", - "sha256:f6defd966ca3b187ec6c366604e9296f585021d922e666b99c47e78738b5666c", - "sha256:f7c2b8eb9fc872e68b46eeaf835e86bccc3a58ba57d0eedc109cbb14177be531", - "sha256:fa7db7558607afeccb33c0e4bf1c9a9a835e26599e76af6fe2fcea45904083a6", - "sha256:fcb83175cc4936a5425dde3356f079ae03c0802bbdf8ff82c035f8a54b333521" - ], - "markers": "python_version >= '3.7'", - "version": "==2.10.1" + "sha256:06f0d5a1d9e1b7932477c172cc720b3b23c18762ed7a8efa8398298a59d177c7", + "sha256:07982b82d121ed3fc1c51faf6e8f57ff09b1325d2efccaa257dd8c0dd937acca", + "sha256:0f478ec204772a5c8218e30eb813ca43e34005dff2eafa03931b3d8caef87d51", + "sha256:102569d371fadc40d8f8598a59379c37ec60164315884467052830b28cc4e9da", + "sha256:10dca874e35bb60ce4f9f6665bfbfad050dd7573596608aeb9e098621ac331dc", + "sha256:150ba5c86f502c040b822777e2e519b5625b47813bd05f9273a8ed169c97d9ae", + "sha256:1661c668c1bb67b7cec96914329d9ab66755911d093bb9063c4c8914188af6d4", + "sha256:1a2fe7b00a49b51047334d84aafd7e39f80b7675cad0083678c58983662da89b", + "sha256:1ae8048cba95f382dba56766525abca438328455e35c283bb202964f41a780b0", + "sha256:20f724a023042588d0f4396bbbcf4cffd0ddd0ad3ed4f0d8e6d4ac4264bae81e", + "sha256:2133b0e412a47868a358713287ff9f9a328879da547dc88be67481cdac529118", + "sha256:21e3298486c4ea4e4d5cc6fb69e06fb02a4e22089304308817035ac006a7f506", + "sha256:21ebaa4bf6386a3b22eec518da7d679c8363fb7fb70cf6972161e5542f470798", + "sha256:23632132f1fd608034f1a56cc3e484be00854db845b3a4a508834be5a6435a6f", + "sha256:2d5bea8012df5bb6dda1e67d0563ac50b7f64a5d5858348b5c8cb5043811c19d", + "sha256:300616102fb71241ff477a2cbbc847321dbec49428434a2f17f37528721c4948", + "sha256:30a8259569fbeec49cfac7fda3ec8123486ef1b729225222f0d41d5f840b476f", + "sha256:399166f24c33a0c5759ecc4801f040dbc87d412c1a6d6292b2349b4c505effc9", + "sha256:3fac641bbfa43d5a1bed99d28aa1fded1984d31c670a95aac1bf1d36ac6ce137", + "sha256:42c29d54ed4501a30cd71015bf982fa95e4a60117b44e1a200290ce687d3e640", + "sha256:462d599299c5971f03c676e2b63aa80fec5ebc572d89ce766cd11ca8bcb56f3f", + "sha256:4eebbd049008eb800f519578e944b8dc8e0f7d59a5abb5924cc2d4ed3a1834ff", + "sha256:502c062a18d84452858f8aea1e520e12a4d5228fc3621ea5061409d666ea1706", + "sha256:5317c04349472e683803da262c781c42c5628a9be73f4750ac7d13040efb5d2d", + "sha256:5511f962dd1b9b553e9534c3b9c6a4b0c9ded3d8c2be96e61d56f933feef9e1f", + "sha256:561be4e3e952c2f9056fba5267b99be4ec2afadc27261505d4992c50b33c513c", + "sha256:601d3e42452cd4f2891c13fa8c70366d71851c1593ed42f57bf37f40f7dca3c8", + "sha256:644904600c15816a1f9a1bafa6aab0d21db2788abcdf4e2a77951280473f33e1", + "sha256:653a5dfd00f601a0ed6654a8b877b18d65ac32c9d9997456e0ab240807be6cf7", + "sha256:694a5e9f1f2c124a17ff2d0be613fd53ba0c26de588eb4bdab8bca855e550d95", + "sha256:71b4a48a7427f14679f0015b13c712863d28bb1ab700bd11776a5368135c7d60", + "sha256:72bf9308a82b75039b8c8edd2be2924c352eda5da14a920551a8b65d5ee89253", + "sha256:735dceec50fa907a3c314b84ed609dec54b76a814aa14eb90da31d1d36873a5e", + "sha256:73802194f10c394c2bedce7a135ba1d8ba6cff23adf4217612bfc5cf060de34c", + "sha256:780daad9e35b18d10d7219d24bfb30148ca2afc309928e1d4d53de86822593dc", + "sha256:8655f55fe68c4685673265a650ef71beb2d31871c049c8b80262026f23605ee3", + "sha256:877045a7969ace04d59516d5d6a7dee13106822f99a5d8df5e6822941f7bedc8", + "sha256:87bce04f09f0552b66fca0c4e10da78d17cb0e71c205864bab4e9595122cb9d9", + "sha256:8d4dfc66abea3ec6d9f83e837a8f8a7d9d3a76d25c9911735c76d6745950e62c", + "sha256:8ec364e280db4235389b5e1e6ee924723c693cbc98e9d28dc1767041ff9bc388", + "sha256:8fa00fa24ffd8c31fac081bf7be7eb495be6d248db127f8776575a746fa55c95", + "sha256:920c4897e55e2881db6a6da151198e5001552c3777cd42b8a4c2f72eedc2ee91", + "sha256:920f4633bee43d7a2818e1a1a788906df5a17b7ab6fe411220ed92b42940f818", + "sha256:9795f56aa6b2296f05ac79d8a424e94056730c0b860a62b0fdcfe6340b658cc8", + "sha256:98f0edee7ee9cc7f9221af2e1b95bd02810e1c7a6d115cfd82698803d385b28f", + "sha256:99c095457eea8550c9fa9a7a992e842aeae1429dab6b6b378710f62bfb70b394", + "sha256:99d3a433ef5dc3021c9534a58a3686c88363c591974c16c54a01af7efd741f13", + "sha256:99f9a50b56713a598d33bc23a9912224fc5d7f9f292444e6664236ae471ddf17", + "sha256:9c46e556ee266ed3fb7b7a882b53df3c76b45e872fdab8d9cf49ae5e91147fd7", + "sha256:9f5d37ff01edcbace53a402e80793640c25798fb7208f105d87a25e6fcc9ea06", + "sha256:a0b4cfe408cd84c53bab7d83e4209458de676a6ec5e9c623ae914ce1cb79b96f", + "sha256:a497be217818c318d93f07e14502ef93d44e6a20c72b04c530611e45e54c2196", + "sha256:ac89ccc39cd1d556cc72d6752f252dc869dde41c7c936e86beac5eb555041b66", + "sha256:adf28099d061a25fbcc6531febb7a091e027605385de9fe14dd6a97319d614cf", + "sha256:afa01d25769af33a8dac0d905d5c7bb2d73c7c3d5161b2dd6f8b5b5eea6a3c4c", + "sha256:b1fc07896fc1851558f532dffc8987e526b682ec73140886c831d773cef44b76", + "sha256:b49c604ace7a7aa8af31196abbf8f2193be605db6739ed905ecaf62af31ccae0", + "sha256:b9f3e0bffad6e238f7acc20c393c1ed8fab4371e3b3bc311020dfa6020d99212", + "sha256:ba07646f35e4e49376c9831130039d1b478fbfa1215ae62ad62d2ee63cf9c18f", + "sha256:bd88f40f2294440d3f3c6308e50d96a0d3d0973d6f1a5732875d10f569acef49", + "sha256:c0be58529d43d38ae849a91932391eb93275a06b93b79a8ab828b012e916a206", + "sha256:c45f62e4107ebd05166717ac58f6feb44471ed450d07fecd90e5f69d9bf03c48", + "sha256:c56da23034fe66221f2208c813d8aa509eea34d97328ce2add56e219c3a9f41c", + "sha256:c94b5537bf6ce66e4d7830c6993152940a188600f6ae044435287753044a8fe2", + "sha256:cebf8d56fee3b08ad40d332a807ecccd4153d3f1ba8231e111d9759f02edfd05", + "sha256:d0bf6f93a55d3fa7a079d811b29100b019784e2ee6bc06b0bb839538272a5610", + "sha256:d195add190abccefc70ad0f9a0141ad7da53e16183048380e688b466702195dd", + "sha256:d25ef0c33f22649b7a088035fd65ac1ce6464fa2876578df1adad9472f918a76", + "sha256:d6cbdf12ef967a6aa401cf5cdf47850559e59eedad10e781471c960583f25aa1", + "sha256:d8c032ccee90b37b44e05948b449a2d6baed7e614df3d3f47fe432c952c21b60", + "sha256:daff04257b49ab7f4b3f73f98283d3dbb1a65bf3500d55c7beac3c66c310fe34", + "sha256:e83ebbf020be727d6e0991c1b192a5c2e7113eb66e3def0cd0c62f9f266247e4", + "sha256:ed3025a8a7e5a59817b7494686d449ebfbe301f3e757b852c8d0d1961d6be864", + "sha256:f1936ef138bed2165dd8573aa65e3095ef7c2b6247faccd0e15186aabdda7f66", + "sha256:f5247a3d74355f8b1d780d0f3b32a23dd9f6d3ff43ef2037c6dcd249f35ecf4c", + "sha256:fa496cd45cda0165d597e9d6f01e36c33c9508f75cf03c0a650018c5048f578e", + "sha256:fb4363e6c9fc87365c2bc777a1f585a22f2f56642501885ffc7942138499bf54", + "sha256:fb4370b15111905bf8b5ba2129b926af9470f014cb0493a67d23e9d7a48348e8", + "sha256:fbec2af0ebafa57eb82c18c304b37c86a8abddf7022955d1742b3d5471a6339e" + ], + "markers": "python_version >= '3.8'", + "version": "==2.16.1" }, "pylanguagetool": { "hashes": [ @@ -1574,95 +1426,6 @@ "markers": "python_version >= '3.8'", "version": "==0.10.0" }, - "pymongo": { - "hashes": [ - "sha256:014e7049dd019a6663747ca7dae328943e14f7261f7c1381045dfc26a04fa330", - "sha256:055f5c266e2767a88bb585d01137d9c7f778b0195d3dbf4a487ef0638be9b651", - "sha256:05c30fd35cc97f14f354916b45feea535d59060ef867446b5c3c7f9b609dd5dc", - "sha256:0634994b026336195778e5693583c060418d4ab453eff21530422690a97e1ee8", - "sha256:09c7de516b08c57647176b9fc21d929d628e35bcebc7422220c89ae40b62126a", - "sha256:107a234dc55affc5802acb3b6d83cbb8c87355b38a9457fcd8806bdeb8bce161", - "sha256:10a379fb60f1b2406ae57b8899bacfe20567918c8e9d2d545e1b93628fcf2050", - "sha256:128b1485753106c54af481789cdfea12b90a228afca0b11fb3828309a907e10e", - "sha256:1394c4737b325166a65ae7c145af1ebdb9fb153ebedd37cf91d676313e4a67b8", - "sha256:1c63e3a2e8fb815c4b1f738c284a4579897e37c3cfd95fdb199229a1ccfb638a", - "sha256:1e4ed21029d80c4f62605ab16398fe1ce093fff4b5f22d114055e7d9fbc4adb0", - "sha256:1ec71ac633b126c0775ed4604ca8f56c3540f5c21a1220639f299e7a544b55f9", - "sha256:21812453354b151200034750cd30b0140e82ec2a01fd4357390f67714a1bfbde", - "sha256:256c503a75bd71cf7fb9ebf889e7e222d49c6036a48aad5a619f98a0adf0e0d7", - "sha256:2703a9f8f5767986b4f51c259ff452cc837c5a83c8ed5f5361f6e49933743b2f", - "sha256:288c21ab9531b037f7efa4e467b33176bc73a0c27223c141b822ab4a0e66ff2a", - "sha256:2972dd1f1285866aba027eff2f4a2bbf8aa98563c2ced14cb34ee5602b36afdf", - "sha256:2973f113e079fb98515722cd728e1820282721ec9fd52830e4b73cabdbf1eb28", - "sha256:2ca0ba501898b2ec31e6c3acf90c31910944f01d454ad8e489213a156ccf1bda", - "sha256:2d2be5c9c3488fa8a70f83ed925940f488eac2837a996708d98a0e54a861f212", - "sha256:2f8c04277d879146eacda920476e93d520eff8bec6c022ac108cfa6280d84348", - "sha256:325701ae7b56daa5b0692305b7cb505ca50f80a1288abb32ff420a8a209b01ca", - "sha256:3729b8db02063da50eeb3db88a27670d85953afb9a7f14c213ac9e3dca93034b", - "sha256:3919708594b86d0f5cdc713eb6fccd3f9b9532af09ea7a5d843c933825ef56c4", - "sha256:39a1cd5d383b37285641d5a7a86be85274466ae336a61b51117155936529f9b3", - "sha256:3ec6c20385c5a58e16b1ea60c5e4993ea060540671d7d12664f385f2fb32fe79", - "sha256:47aa128be2e66abd9d1a9b0437c62499d812d291f17b55185cb4aa33a5f710a4", - "sha256:49f2af6cf82509b15093ce3569229e0d53c90ad8ae2eef940652d4cf1f81e045", - "sha256:4a0269811661ba93c472c8a60ea82640e838c2eb148d252720a09b5123f2c2fe", - "sha256:518c90bdd6e842c446d01a766b9136fec5ec6cc94f3b8c3f8b4a332786ee6b64", - "sha256:5717a308a703dda2886a5796a07489c698b442f5e409cf7dc2ac93de8d61d764", - "sha256:5802acc012bbb4bce4dff92973dff76482f30ef35dd4cb8ab5b0e06aa8f08c80", - "sha256:5e63146dbdb1eac207464f6e0cfcdb640c9c5ff0f57b754fa96fe252314a1dc6", - "sha256:6695d7136a435c1305b261a9ddb9b3ecec9863e05aab3935b96038145fd3a977", - "sha256:680fa0fc719e1a3dcb81130858368f51d83667d431924d0bcf249644bce8f303", - "sha256:6b18276f14b4b6d92e707ab6db19b938e112bd2f1dc3f9f1a628df58e4fd3f0d", - "sha256:6bafea6061d63059d8bc2ffc545e2f049221c8a4457d236c5cd6a66678673eab", - "sha256:6d6a1b1361f118e7fefa17ae3114e77f10ee1b228b20d50c47c9f351346180c8", - "sha256:747c84f4e690fbe6999c90ac97246c95d31460d890510e4a3fa61b7d2b87aa34", - "sha256:79f41576b3022c2fe9780ae3e44202b2438128a25284a8ddfa038f0785d87019", - "sha256:7b0e6361754ac596cd16bfc6ed49f69ffcd9b60b7bc4bcd3ea65c6a83475e4ff", - "sha256:7e3b0127b260d4abae7b62203c4c7ef0874c901b55155692353db19de4b18bc4", - "sha256:7fc2bb8a74dcfcdd32f89528e38dcbf70a3a6594963d60dc9595e3b35b66e414", - "sha256:806e094e9e85d8badc978af8c95b69c556077f11844655cb8cd2d1758769e521", - "sha256:81dd1308bd5630d2bb5980f00aa163b986b133f1e9ed66c66ce2a5bc3572e891", - "sha256:82e620842e12e8cb4050d2643a81c8149361cd82c0a920fa5a15dc4ca8a4000f", - "sha256:85f2cdc400ee87f5952ebf2a117488f2525a3fb2e23863a8efe3e4ee9e54e4d1", - "sha256:8ab6bcc8e424e07c1d4ba6df96f7fb963bcb48f590b9456de9ebd03b88084fe8", - "sha256:8adf014f2779992eba3b513e060d06f075f0ab2fb3ad956f413a102312f65cdf", - "sha256:9b0f98481ad5dc4cb430a60bbb8869f05505283b9ae1c62bdb65eb5e020ee8e3", - "sha256:9bea9138b0fc6e2218147e9c6ce1ff76ff8e29dc00bb1b64842bd1ca107aee9f", - "sha256:a09bfb51953930e7e838972ddf646c5d5f984992a66d79da6ba7f6a8d8a890cd", - "sha256:a0be99b599da95b7a90a918dd927b20c434bea5e1c9b3efc6a3c6cd67c23f813", - "sha256:a49aca4d961823b2846b739380c847e8964ff7ae0f0a683992b9d926054f0d6d", - "sha256:a4dc1319d0c162919ee7f4ee6face076becae2abbd351cc14f1fe70af5fb20d9", - "sha256:a8273e1abbcff1d7d29cbbb1ea7e57d38be72f1af3c597c854168508b91516c2", - "sha256:a8f7f9feecae53fa18d6a3ea7c75f9e9a1d4d20e5c3f9ce3fba83f07bcc4eee2", - "sha256:ad4f66fbb893b55f96f03020e67dcab49ffde0177c6565ccf9dec4fdf974eb61", - "sha256:af425f323fce1b07755edd783581e7283557296946212f5b1a934441718e7528", - "sha256:b14dd73f595199f4275bed4fb509277470d9b9059310537e3b3daba12b30c157", - "sha256:b4ad70d7cac4ca0c7b31444a0148bd3af01a2662fa12b1ad6f57cd4a04e21766", - "sha256:b80a4ee19b3442c57c38afa978adca546521a8822d663310b63ae2a7d7b13f3a", - "sha256:ba51129fcc510824b6ca6e2ce1c27e3e4d048b6e35d3ae6f7e517bed1b8b25ce", - "sha256:c011bd5ad03cc096f99ffcfdd18a1817354132c1331bed7a837a25226659845f", - "sha256:cc94f9fea17a5af8cf1a343597711a26b0117c0b812550d99934acb89d526ed2", - "sha256:ccd785fafa1c931deff6a7116e9a0d402d59fabe51644b0d0c268295ff847b25", - "sha256:d16a534da0e39785687b7295e2fcf9a339f4a20689024983d11afaa4657f8507", - "sha256:d3077a31633beef77d057c6523f5de7271ddef7bde5e019285b00c0cc9cac1e3", - "sha256:d603edea1ff7408638b2504905c032193b7dcee7af269802dbb35bc8c3310ed5", - "sha256:db082f728160369d9a6ed2e722438291558fc15ce06d0a7d696a8dad735c236b", - "sha256:ddef295aaf80cefb0c1606f1995899efcb17edc6b327eb6589e234e614b87756", - "sha256:e16ade71c93f6814d095d25cd6d28a90d63511ea396bd96e9ffcb886b278baaa", - "sha256:e24025625bad66895b1bc3ae1647f48f0a92dd014108fb1be404c77f0b69ca67", - "sha256:e3db7d833a7c38c317dc95b54e27f1d27012e031b45a7c24e360b53197d5f6e7", - "sha256:e5e193f89f4f8c1fe273f9a6e6df915092c9f2af6db2d1afb8bd53855025c11f", - "sha256:eb438a8bf6b695bf50d57e6a059ff09652a07968b2041178b3744ea785fcef9b", - "sha256:ebf02c32afa6b67e5861a27183dd98ed88419a94a2ab843cc145fb0bafcc5b28", - "sha256:ecd9e1fa97aa11bf67472220285775fa15e896da108f425e55d23d7540a712ce", - "sha256:ef67fedd863ffffd4adfd46d9d992b0f929c7f61a8307366d664d93517f2c78e", - "sha256:f28ae33dc5a0b9cee06e95fd420e42155d83271ab75964baf747ce959cac5f52", - "sha256:fb1c56d891f9e34303c451998ef62ba52659648bb0d75b03c5e4ac223a3342c2", - "sha256:fe03bf25fae4b95d8afe40004a321df644400fdcba4c8e5e1a19c1085b740888" - ], - "index": "pypi", - "markers": "python_version >= '3.7'", - "version": "==4.6.0" - }, "pyproj": { "hashes": [ "sha256:18faa54a3ca475bfe6255156f2f2874e9a1c8917b0004eee9f664b86ccc513d3", @@ -1696,6 +1459,14 @@ "markers": "python_version >= '3.9'", "version": "==3.6.1" }, + "pyreadline3": { + "hashes": [ + "sha256:6f3d1f7b8a31ba32b73917cefc1f28cc660562f39aea8646d30bd6eff21f7bae", + "sha256:b0efb6516fd4fb07b45949053826a62fa4cb353db5be2bbb4a7aa1fdd1e345fb" + ], + "markers": "python_version >= '3.8' and sys_platform == 'win32'", + "version": "==3.4.1" + }, "pyspellchecker": { "hashes": [ "sha256:b5ef23437702b8d03626f814b9646779b572d378b325ad252d8a8e616b3d76db", @@ -1724,75 +1495,10 @@ }, "pytz": { "hashes": [ - "sha256:31d4583c4ed539cd037956140d695e42c033a19e984bfce9964a3f7d59bc2b40", - "sha256:f90ef520d95e7c46951105338d918664ebfd6f1d995bd7d153127ce90efafa6a" + "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812", + "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319" ], - "version": "==2023.4" - }, - "pyyaml": { - "hashes": [ - "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5", - "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc", - "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df", - "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741", - "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206", - "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27", - "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595", - "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62", - "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98", - "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696", - "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290", - "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9", - "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d", - "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6", - "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867", - "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47", - "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486", - "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6", - "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3", - "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007", - "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938", - "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0", - "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c", - "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735", - "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d", - "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28", - "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4", - "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba", - "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8", - "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef", - "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5", - "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd", - "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3", - "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0", - "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515", - "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c", - "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c", - "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924", - "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34", - "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43", - "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859", - "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673", - "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54", - "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a", - "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b", - "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab", - "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa", - "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c", - "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585", - "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d", - "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f" - ], - "markers": "python_version >= '3.6'", - "version": "==6.0.1" - }, - "referencing": { - "hashes": [ - "sha256:39240f2ecc770258f28b642dd47fd74bc8b02484de54e1882b74b35ebd779bd5", - "sha256:c775fedf74bc0f9189c2a3be1c12fd03e8c23f4d371dce795df44e06c5b412f7" - ], - "markers": "python_version >= '3.8'", - "version": "==0.33.0" + "version": "==2024.1" }, "regex": { "hashes": [ @@ -1911,111 +1617,6 @@ "markers": "python_version >= '3.7'", "version": "==2.31.0" }, - "rpds-py": { - "hashes": [ - "sha256:01f58a7306b64e0a4fe042047dd2b7d411ee82e54240284bab63e325762c1147", - "sha256:0210b2668f24c078307260bf88bdac9d6f1093635df5123789bfee4d8d7fc8e7", - "sha256:02866e060219514940342a1f84303a1ef7a1dad0ac311792fbbe19b521b489d2", - "sha256:0387ce69ba06e43df54e43968090f3626e231e4bc9150e4c3246947567695f68", - "sha256:060f412230d5f19fc8c8b75f315931b408d8ebf56aec33ef4168d1b9e54200b1", - "sha256:071bc28c589b86bc6351a339114fb7a029f5cddbaca34103aa573eba7b482382", - "sha256:0bfb09bf41fe7c51413f563373e5f537eaa653d7adc4830399d4e9bdc199959d", - "sha256:10162fe3f5f47c37ebf6d8ff5a2368508fe22007e3077bf25b9c7d803454d921", - "sha256:149c5cd24f729e3567b56e1795f74577aa3126c14c11e457bec1b1c90d212e38", - "sha256:1701fc54460ae2e5efc1dd6350eafd7a760f516df8dbe51d4a1c79d69472fbd4", - "sha256:1957a2ab607f9added64478a6982742eb29f109d89d065fa44e01691a20fc20a", - "sha256:1a746a6d49665058a5896000e8d9d2f1a6acba8a03b389c1e4c06e11e0b7f40d", - "sha256:1bfcad3109c1e5ba3cbe2f421614e70439f72897515a96c462ea657261b96518", - "sha256:1d36b2b59e8cc6e576f8f7b671e32f2ff43153f0ad6d0201250a7c07f25d570e", - "sha256:1db228102ab9d1ff4c64148c96320d0be7044fa28bd865a9ce628ce98da5973d", - "sha256:1dc29db3900cb1bb40353772417800f29c3d078dbc8024fd64655a04ee3c4bdf", - "sha256:1e626b365293a2142a62b9a614e1f8e331b28f3ca57b9f05ebbf4cf2a0f0bdc5", - "sha256:1f3c3461ebb4c4f1bbc70b15d20b565759f97a5aaf13af811fcefc892e9197ba", - "sha256:20de7b7179e2031a04042e85dc463a93a82bc177eeba5ddd13ff746325558aa6", - "sha256:24e4900a6643f87058a27320f81336d527ccfe503984528edde4bb660c8c8d59", - "sha256:2528ff96d09f12e638695f3a2e0c609c7b84c6df7c5ae9bfeb9252b6fa686253", - "sha256:25f071737dae674ca8937a73d0f43f5a52e92c2d178330b4c0bb6ab05586ffa6", - "sha256:270987bc22e7e5a962b1094953ae901395e8c1e1e83ad016c5cfcfff75a15a3f", - "sha256:292f7344a3301802e7c25c53792fae7d1593cb0e50964e7bcdcc5cf533d634e3", - "sha256:2953937f83820376b5979318840f3ee47477d94c17b940fe31d9458d79ae7eea", - "sha256:2a792b2e1d3038daa83fa474d559acfd6dc1e3650ee93b2662ddc17dbff20ad1", - "sha256:2a7b2f2f56a16a6d62e55354dd329d929560442bd92e87397b7a9586a32e3e76", - "sha256:2f4eb548daf4836e3b2c662033bfbfc551db58d30fd8fe660314f86bf8510b93", - "sha256:3664d126d3388a887db44c2e293f87d500c4184ec43d5d14d2d2babdb4c64cad", - "sha256:3677fcca7fb728c86a78660c7fb1b07b69b281964673f486ae72860e13f512ad", - "sha256:380e0df2e9d5d5d339803cfc6d183a5442ad7ab3c63c2a0982e8c824566c5ccc", - "sha256:3ac732390d529d8469b831949c78085b034bff67f584559340008d0f6041a049", - "sha256:4128980a14ed805e1b91a7ed551250282a8ddf8201a4e9f8f5b7e6225f54170d", - "sha256:4341bd7579611cf50e7b20bb8c2e23512a3dc79de987a1f411cb458ab670eb90", - "sha256:436474f17733c7dca0fbf096d36ae65277e8645039df12a0fa52445ca494729d", - "sha256:4dc889a9d8a34758d0fcc9ac86adb97bab3fb7f0c4d29794357eb147536483fd", - "sha256:4e21b76075c01d65d0f0f34302b5a7457d95721d5e0667aea65e5bb3ab415c25", - "sha256:516fb8c77805159e97a689e2f1c80655c7658f5af601c34ffdb916605598cda2", - "sha256:5576ee2f3a309d2bb403ec292d5958ce03953b0e57a11d224c1f134feaf8c40f", - "sha256:5a024fa96d541fd7edaa0e9d904601c6445e95a729a2900c5aec6555fe921ed6", - "sha256:5d0e8a6434a3fbf77d11448c9c25b2f25244226cfbec1a5159947cac5b8c5fa4", - "sha256:5e7d63ec01fe7c76c2dbb7e972fece45acbb8836e72682bde138e7e039906e2c", - "sha256:60e820ee1004327609b28db8307acc27f5f2e9a0b185b2064c5f23e815f248f8", - "sha256:637b802f3f069a64436d432117a7e58fab414b4e27a7e81049817ae94de45d8d", - "sha256:65dcf105c1943cba45d19207ef51b8bc46d232a381e94dd38719d52d3980015b", - "sha256:698ea95a60c8b16b58be9d854c9f993c639f5c214cf9ba782eca53a8789d6b19", - "sha256:70fcc6c2906cfa5c6a552ba7ae2ce64b6c32f437d8f3f8eea49925b278a61453", - "sha256:720215373a280f78a1814becb1312d4e4d1077b1202a56d2b0815e95ccb99ce9", - "sha256:7450dbd659fed6dd41d1a7d47ed767e893ba402af8ae664c157c255ec6067fde", - "sha256:7b7d9ca34542099b4e185b3c2a2b2eda2e318a7dbde0b0d83357a6d4421b5296", - "sha256:7fbd70cb8b54fe745301921b0816c08b6d917593429dfc437fd024b5ba713c58", - "sha256:81038ff87a4e04c22e1d81f947c6ac46f122e0c80460b9006e6517c4d842a6ec", - "sha256:810685321f4a304b2b55577c915bece4c4a06dfe38f6e62d9cc1d6ca8ee86b99", - "sha256:82ada4a8ed9e82e443fcef87e22a3eed3654dd3adf6e3b3a0deb70f03e86142a", - "sha256:841320e1841bb53fada91c9725e766bb25009cfd4144e92298db296fb6c894fb", - "sha256:8587fd64c2a91c33cdc39d0cebdaf30e79491cc029a37fcd458ba863f8815383", - "sha256:8ffe53e1d8ef2520ebcf0c9fec15bb721da59e8ef283b6ff3079613b1e30513d", - "sha256:9051e3d2af8f55b42061603e29e744724cb5f65b128a491446cc029b3e2ea896", - "sha256:91e5a8200e65aaac342a791272c564dffcf1281abd635d304d6c4e6b495f29dc", - "sha256:93432e747fb07fa567ad9cc7aaadd6e29710e515aabf939dfbed8046041346c6", - "sha256:938eab7323a736533f015e6069a7d53ef2dcc841e4e533b782c2bfb9fb12d84b", - "sha256:9584f8f52010295a4a417221861df9bea4c72d9632562b6e59b3c7b87a1522b7", - "sha256:9737bdaa0ad33d34c0efc718741abaafce62fadae72c8b251df9b0c823c63b22", - "sha256:99da0a4686ada4ed0f778120a0ea8d066de1a0a92ab0d13ae68492a437db78bf", - "sha256:99f567dae93e10be2daaa896e07513dd4bf9c2ecf0576e0533ac36ba3b1d5394", - "sha256:9bdf1303df671179eaf2cb41e8515a07fc78d9d00f111eadbe3e14262f59c3d0", - "sha256:9f0e4dc0f17dcea4ab9d13ac5c666b6b5337042b4d8f27e01b70fae41dd65c57", - "sha256:a000133a90eea274a6f28adc3084643263b1e7c1a5a66eb0a0a7a36aa757ed74", - "sha256:a3264e3e858de4fc601741498215835ff324ff2482fd4e4af61b46512dd7fc83", - "sha256:a71169d505af63bb4d20d23a8fbd4c6ce272e7bce6cc31f617152aa784436f29", - "sha256:a967dd6afda7715d911c25a6ba1517975acd8d1092b2f326718725461a3d33f9", - "sha256:aa5bfb13f1e89151ade0eb812f7b0d7a4d643406caaad65ce1cbabe0a66d695f", - "sha256:ae35e8e6801c5ab071b992cb2da958eee76340e6926ec693b5ff7d6381441745", - "sha256:b686f25377f9c006acbac63f61614416a6317133ab7fafe5de5f7dc8a06d42eb", - "sha256:b760a56e080a826c2e5af09002c1a037382ed21d03134eb6294812dda268c811", - "sha256:b86b21b348f7e5485fae740d845c65a880f5d1eda1e063bc59bef92d1f7d0c55", - "sha256:b9412abdf0ba70faa6e2ee6c0cc62a8defb772e78860cef419865917d86c7342", - "sha256:bd345a13ce06e94c753dab52f8e71e5252aec1e4f8022d24d56decd31e1b9b23", - "sha256:be22ae34d68544df293152b7e50895ba70d2a833ad9566932d750d3625918b82", - "sha256:bf046179d011e6114daf12a534d874958b039342b347348a78b7cdf0dd9d6041", - "sha256:c3d2010656999b63e628a3c694f23020322b4178c450dc478558a2b6ef3cb9bb", - "sha256:c64602e8be701c6cfe42064b71c84ce62ce66ddc6422c15463fd8127db3d8066", - "sha256:d65e6b4f1443048eb7e833c2accb4fa7ee67cc7d54f31b4f0555b474758bee55", - "sha256:d8bbd8e56f3ba25a7d0cf980fc42b34028848a53a0e36c9918550e0280b9d0b6", - "sha256:da1ead63368c04a9bded7904757dfcae01eba0e0f9bc41d3d7f57ebf1c04015a", - "sha256:dbbb95e6fc91ea3102505d111b327004d1c4ce98d56a4a02e82cd451f9f57140", - "sha256:dbc56680ecf585a384fbd93cd42bc82668b77cb525343170a2d86dafaed2a84b", - "sha256:df3b6f45ba4515632c5064e35ca7f31d51d13d1479673185ba8f9fefbbed58b9", - "sha256:dfe07308b311a8293a0d5ef4e61411c5c20f682db6b5e73de6c7c8824272c256", - "sha256:e796051f2070f47230c745d0a77a91088fbee2cc0502e9b796b9c6471983718c", - "sha256:efa767c220d94aa4ac3a6dd3aeb986e9f229eaf5bce92d8b1b3018d06bed3772", - "sha256:f0b8bf5b8db49d8fd40f54772a1dcf262e8be0ad2ab0206b5a2ec109c176c0a4", - "sha256:f175e95a197f6a4059b50757a3dca33b32b61691bdbd22c29e8a8d21d3914cae", - "sha256:f2f3b28b40fddcb6c1f1f6c88c6f3769cd933fa493ceb79da45968a21dccc920", - "sha256:f6c43b6f97209e370124baf2bf40bb1e8edc25311a158867eb1c3a5d449ebc7a", - "sha256:f7f4cb1f173385e8a39c29510dd11a78bf44e360fb75610594973f5ea141028b", - "sha256:fad059a4bd14c45776600d223ec194e77db6c20255578bb5bcdd7c18fd169361", - "sha256:ff1dcb8e8bc2261a088821b2595ef031c91d499a0c1b031c152d43fe0a6ecec8", - "sha256:ffee088ea9b593cc6160518ba9bd319b5475e5f3e578e4552d63818773c6f56a" - ], - "markers": "python_version >= '3.8'", - "version": "==0.17.1" - }, "s3fs": { "hashes": [ "sha256:0d5a99039665f30b2dbee5495de3b299a022d51b3195a9440f5df47c2621b777", @@ -2033,21 +1634,6 @@ "markers": "python_version >= '3.7'", "version": "==0.8.2" }, - "sagemaker": { - "hashes": [ - "sha256:6798d51a32e583c6d29355d947423b53e0d10271ae36bce609c8dd5ddced3e7b" - ], - "index": "pypi", - "markers": "python_version >= '3.8'", - "version": "==2.198.0" - }, - "schema": { - "hashes": [ - "sha256:f06717112c61895cabc4707752b88716e8420a8819d71404501e114f91043197", - "sha256:f3ffdeeada09ec34bf40d7d79996d9f7175db93b7a5065de0faa7f41083c1e6c" - ], - "version": "==0.7.5" - }, "scikit-learn": { "hashes": [ "sha256:0402638c9a7c219ee52c94cbebc8fcb5eb9fe9c773717965c1f4185588ad3107", @@ -2176,14 +1762,6 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.16.0" }, - "smdebug-rulesconfig": { - "hashes": [ - "sha256:104da3e6931ecf879dfc687ca4bbb3bee5ea2bc27f4478e9dbb3ee3655f1ae61", - "sha256:7a19e6eb2e6bcfefbc07e4a86ef7a88f32495001a038bf28c7d8e77ab793fcd6" - ], - "markers": "python_version >= '2.7'", - "version": "==1.0.1" - }, "sniffio": { "hashes": [ "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101", @@ -2208,14 +1786,6 @@ "markers": "python_version >= '3.8'", "version": "==1.12" }, - "tblib": { - "hashes": [ - "sha256:059bd77306ea7b419d4f76016aef6d7027cc8a0785579b5aad198803435f882c", - "sha256:289fa7359e580950e7d9743eab36b0691f0310fce64dee7d9c31065b8f723e23" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", - "version": "==1.7.0" - }, "textblob": { "hashes": [ "sha256:15546d7f309e96a3f542bee42751c8e5ce4d519d3d274ee79df2318141f0b788", @@ -2520,17 +2090,17 @@ ], "markers": "python_version >= '3.7'", "version": "==1.9.4" - }, - "zipp": { - "hashes": [ - "sha256:0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31", - "sha256:84e64a1c28cf7e91ed2078bb8cc8c259cb19b76942096c8d7b84947690cabaf0" - ], - "markers": "python_version >= '3.8'", - "version": "==3.17.0" } }, "develop": { + "alabaster": { + "hashes": [ + "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65", + "sha256:b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92" + ], + "markers": "python_version >= '3.9'", + "version": "==0.7.16" + }, "anyio": { "hashes": [ "sha256:44a3c9aba0f5defa43261a8b3efb97891f2bd7d804e0e1f56419befa1adfc780", @@ -2539,14 +2109,6 @@ "markers": "python_version >= '3.7'", "version": "==3.7.1" }, - "appnope": { - "hashes": [ - "sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24", - "sha256:265a455292d0bd8a72453494fa24df5a11eb18373a60c7c0430889f22548605e" - ], - "markers": "platform_system == 'Darwin'", - "version": "==0.1.3" - }, "argon2-cffi": { "hashes": [ "sha256:879c3e79a2729ce768ebb7d36d4609e3a78a4ca2ec3a9f12286ca057e3d0db08", @@ -2640,11 +2202,11 @@ }, "certifi": { "hashes": [ - "sha256:9b469f3a900bf28dc19b8cfbf8019bf47f7fdd1a65a1d4ffb98fc14166beb4d1", - "sha256:e036ab49d5b79556f99cfc2d9320b34cfbe5be05c5871b51de9329f0603b0474" + "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f", + "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1" ], "markers": "python_version >= '3.6'", - "version": "==2023.11.17" + "version": "==2024.2.2" }, "cffi": { "hashes": [ @@ -2808,6 +2370,15 @@ "markers": "python_full_version >= '3.7.0'", "version": "==3.3.2" }, + "colorama": { + "hashes": [ + "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", + "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6" + ], + "index": "pypi", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'", + "version": "==0.4.6" + }, "comm": { "hashes": [ "sha256:0bc91edae1344d39d3661dcbc36937181fdaddb304790458f8b044dbc064b89a", @@ -2980,6 +2551,14 @@ ], "version": "==0.3.8" }, + "docutils": { + "hashes": [ + "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6", + "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b" + ], + "markers": "python_version >= '3.7'", + "version": "==0.20.1" + }, "exceptiongroup": { "hashes": [ "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14", @@ -3108,6 +2687,14 @@ "markers": "python_version >= '3.5'", "version": "==3.6" }, + "imagesize": { + "hashes": [ + "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b", + "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.4.1" + }, "iniconfig": { "hashes": [ "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", @@ -3126,11 +2713,11 @@ }, "ipython": { "hashes": [ - "sha256:2f21bd3fc1d51550c89ee3944ae04bbc7bc79e129ea0937da6e6c68bfdbf117a", - "sha256:bc9716aad6f29f36c449e30821c9dd0c1c1a7b59ddcc26931685b87b4c569619" + "sha256:1050a3ab8473488d7eee163796b02e511d0735cf43a04ba2a8348bd0f2eaf8a5", + "sha256:48fbc236fbe0e138b88773fa0437751f14c3645fb483f1d4c5dee58b37e5ce73" ], "markers": "python_version >= '3.10'", - "version": "==8.20.0" + "version": "==8.21.0" }, "isoduration": { "hashes": [ @@ -3170,6 +2757,7 @@ "version": "==2.4" }, "jsonschema": { + "extras": ["format-nongpl"], "hashes": [ "sha256:7996507afae316306f9e2290407761157c6f78002dcf7419acb99822143d1c6f", "sha256:85727c00279f5fa6bedbe6238d2aa6403bedd8b4864ab11207d07df3cc1b2ee5" @@ -3367,71 +2955,79 @@ "markers": "python_version >= '3.7'", "version": "==1.4.5" }, + "markdown-it-py": { + "hashes": [ + "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", + "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb" + ], + "markers": "python_version >= '3.8'", + "version": "==3.0.0" + }, "markupsafe": { "hashes": [ - "sha256:0042d6a9880b38e1dd9ff83146cc3c9c18a059b9360ceae207805567aacccc69", - "sha256:0c26f67b3fe27302d3a412b85ef696792c4a2386293c53ba683a89562f9399b0", - "sha256:0fbad3d346df8f9d72622ac71b69565e621ada2ce6572f37c2eae8dacd60385d", - "sha256:15866d7f2dc60cfdde12ebb4e75e41be862348b4728300c36cdf405e258415ec", - "sha256:1c98c33ffe20e9a489145d97070a435ea0679fddaabcafe19982fe9c971987d5", - "sha256:21e7af8091007bf4bebf4521184f4880a6acab8df0df52ef9e513d8e5db23411", - "sha256:23984d1bdae01bee794267424af55eef4dfc038dc5d1272860669b2aa025c9e3", - "sha256:31f57d64c336b8ccb1966d156932f3daa4fee74176b0fdc48ef580be774aae74", - "sha256:3583a3a3ab7958e354dc1d25be74aee6228938312ee875a22330c4dc2e41beb0", - "sha256:36d7626a8cca4d34216875aee5a1d3d654bb3dac201c1c003d182283e3205949", - "sha256:396549cea79e8ca4ba65525470d534e8a41070e6b3500ce2414921099cb73e8d", - "sha256:3a66c36a3864df95e4f62f9167c734b3b1192cb0851b43d7cc08040c074c6279", - "sha256:3aae9af4cac263007fd6309c64c6ab4506dd2b79382d9d19a1994f9240b8db4f", - "sha256:3ab3a886a237f6e9c9f4f7d272067e712cdb4efa774bef494dccad08f39d8ae6", - "sha256:47bb5f0142b8b64ed1399b6b60f700a580335c8e1c57f2f15587bd072012decc", - "sha256:49a3b78a5af63ec10d8604180380c13dcd870aba7928c1fe04e881d5c792dc4e", - "sha256:4df98d4a9cd6a88d6a585852f56f2155c9cdb6aec78361a19f938810aa020954", - "sha256:5045e892cfdaecc5b4c01822f353cf2c8feb88a6ec1c0adef2a2e705eef0f656", - "sha256:5244324676254697fe5c181fc762284e2c5fceeb1c4e3e7f6aca2b6f107e60dc", - "sha256:54635102ba3cf5da26eb6f96c4b8c53af8a9c0d97b64bdcb592596a6255d8518", - "sha256:54a7e1380dfece8847c71bf7e33da5d084e9b889c75eca19100ef98027bd9f56", - "sha256:55d03fea4c4e9fd0ad75dc2e7e2b6757b80c152c032ea1d1de487461d8140efc", - "sha256:698e84142f3f884114ea8cf83e7a67ca8f4ace8454e78fe960646c6c91c63bfa", - "sha256:6aa5e2e7fc9bc042ae82d8b79d795b9a62bd8f15ba1e7594e3db243f158b5565", - "sha256:7653fa39578957bc42e5ebc15cf4361d9e0ee4b702d7d5ec96cdac860953c5b4", - "sha256:765f036a3d00395a326df2835d8f86b637dbaf9832f90f5d196c3b8a7a5080cb", - "sha256:78bc995e004681246e85e28e068111a4c3f35f34e6c62da1471e844ee1446250", - "sha256:7a07f40ef8f0fbc5ef1000d0c78771f4d5ca03b4953fc162749772916b298fc4", - "sha256:8b570a1537367b52396e53325769608f2a687ec9a4363647af1cded8928af959", - "sha256:987d13fe1d23e12a66ca2073b8d2e2a75cec2ecb8eab43ff5624ba0ad42764bc", - "sha256:9896fca4a8eb246defc8b2a7ac77ef7553b638e04fbf170bff78a40fa8a91474", - "sha256:9e9e3c4020aa2dc62d5dd6743a69e399ce3de58320522948af6140ac959ab863", - "sha256:a0b838c37ba596fcbfca71651a104a611543077156cb0a26fe0c475e1f152ee8", - "sha256:a4d176cfdfde84f732c4a53109b293d05883e952bbba68b857ae446fa3119b4f", - "sha256:a76055d5cb1c23485d7ddae533229039b850db711c554a12ea64a0fd8a0129e2", - "sha256:a76cd37d229fc385738bd1ce4cba2a121cf26b53864c1772694ad0ad348e509e", - "sha256:a7cc49ef48a3c7a0005a949f3c04f8baa5409d3f663a1b36f0eba9bfe2a0396e", - "sha256:abf5ebbec056817057bfafc0445916bb688a255a5146f900445d081db08cbabb", - "sha256:b0fe73bac2fed83839dbdbe6da84ae2a31c11cfc1c777a40dbd8ac8a6ed1560f", - "sha256:b6f14a9cd50c3cb100eb94b3273131c80d102e19bb20253ac7bd7336118a673a", - "sha256:b83041cda633871572f0d3c41dddd5582ad7d22f65a72eacd8d3d6d00291df26", - "sha256:b835aba863195269ea358cecc21b400276747cc977492319fd7682b8cd2c253d", - "sha256:bf1196dcc239e608605b716e7b166eb5faf4bc192f8a44b81e85251e62584bd2", - "sha256:c669391319973e49a7c6230c218a1e3044710bc1ce4c8e6eb71f7e6d43a2c131", - "sha256:c7556bafeaa0a50e2fe7dc86e0382dea349ebcad8f010d5a7dc6ba568eaaa789", - "sha256:c8f253a84dbd2c63c19590fa86a032ef3d8cc18923b8049d91bcdeeb2581fbf6", - "sha256:d18b66fe626ac412d96c2ab536306c736c66cf2a31c243a45025156cc190dc8a", - "sha256:d5291d98cd3ad9a562883468c690a2a238c4a6388ab3bd155b0c75dd55ece858", - "sha256:d5c31fe855c77cad679b302aabc42d724ed87c043b1432d457f4976add1c2c3e", - "sha256:d6e427c7378c7f1b2bef6a344c925b8b63623d3321c09a237b7cc0e77dd98ceb", - "sha256:dac1ebf6983148b45b5fa48593950f90ed6d1d26300604f321c74a9ca1609f8e", - "sha256:de8153a7aae3835484ac168a9a9bdaa0c5eee4e0bc595503c95d53b942879c84", - "sha256:e1a0d1924a5013d4f294087e00024ad25668234569289650929ab871231668e7", - "sha256:e7902211afd0af05fbadcc9a312e4cf10f27b779cf1323e78d52377ae4b72bea", - "sha256:e888ff76ceb39601c59e219f281466c6d7e66bd375b4ec1ce83bcdc68306796b", - "sha256:f06e5a9e99b7df44640767842f414ed5d7bedaaa78cd817ce04bbd6fd86e2dd6", - "sha256:f6be2d708a9d0e9b0054856f07ac7070fbe1754be40ca8525d5adccdbda8f475", - "sha256:f9917691f410a2e0897d1ef99619fd3f7dd503647c8ff2475bf90c3cf222ad74", - "sha256:fc1a75aa8f11b87910ffd98de62b29d6520b6d6e8a3de69a70ca34dea85d2a8a", - "sha256:fe8512ed897d5daf089e5bd010c3dc03bb1bdae00b35588c49b98268d4a01e00" + "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf", + "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff", + "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f", + "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3", + "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532", + "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f", + "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617", + "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df", + "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4", + "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906", + "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f", + "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4", + "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8", + "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371", + "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2", + "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465", + "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52", + "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6", + "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169", + "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad", + "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2", + "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0", + "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029", + "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f", + "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a", + "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced", + "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5", + "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c", + "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf", + "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9", + "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb", + "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad", + "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3", + "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1", + "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46", + "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc", + "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a", + "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee", + "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900", + "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5", + "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea", + "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f", + "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5", + "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e", + "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a", + "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f", + "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50", + "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a", + "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b", + "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4", + "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff", + "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2", + "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46", + "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b", + "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf", + "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5", + "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5", + "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab", + "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd", + "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68" ], "markers": "python_version >= '3.7'", - "version": "==2.1.4" + "version": "==2.1.5" }, "matplotlib": { "hashes": [ @@ -3484,6 +3080,22 @@ "markers": "python_version >= '3.6'", "version": "==0.7.0" }, + "mdit-py-plugins": { + "hashes": [ + "sha256:b51b3bb70691f57f974e257e367107857a93b36f322a9e6d44ca5bf28ec2def9", + "sha256:d8ab27e9aed6c38aa716819fedfde15ca275715955f8a185a8e1cf90fb1d2c1b" + ], + "markers": "python_version >= '3.8'", + "version": "==0.4.0" + }, + "mdurl": { + "hashes": [ + "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", + "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba" + ], + "markers": "python_version >= '3.7'", + "version": "==0.1.2" + }, "mistune": { "hashes": [ "sha256:71481854c30fdbc938963d3605b72501f5c10a9320ecd412c121c163a1c7d205", @@ -3492,6 +3104,14 @@ "markers": "python_version >= '3.7'", "version": "==3.0.2" }, + "myst-parser": { + "hashes": [ + "sha256:7c36344ae39c8e740dad7fdabf5aa6fc4897a813083c6cc9990044eb93656b14", + "sha256:ea929a67a6a0b1683cdbe19b8d2e724cd7643f8aa3e7bb18dd65beac3483bead" + ], + "markers": "python_version >= '3.8'", + "version": "==2.0.0" + }, "nbclient": { "hashes": [ "sha256:4b28c207877cf33ef3a9838cdc7a54c5ceff981194a82eac59d558f05487295e", @@ -3620,14 +3240,6 @@ "markers": "python_version >= '3.6'", "version": "==0.8.3" }, - "pexpect": { - "hashes": [ - "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", - "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f" - ], - "markers": "sys_platform != 'win32'", - "version": "==4.9.0" - }, "pillow": { "hashes": [ "sha256:0304004f8067386b477d20a518b50f3fa658a28d44e4116970abfcd94fac34a8", @@ -3704,11 +3316,11 @@ }, "platformdirs": { "hashes": [ - "sha256:11c8f37bcca40db96d8144522d925583bdb7a31f7b0e37e3ed4318400a8e2380", - "sha256:906d548203468492d432bcb294d4bc2fff751bf84971fbb2c10918cc206ee420" + "sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068", + "sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768" ], "markers": "python_version >= '3.8'", - "version": "==4.1.0" + "version": "==4.2.0" }, "plotly": { "hashes": [ @@ -3774,14 +3386,6 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", "version": "==5.9.8" }, - "ptyprocess": { - "hashes": [ - "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", - "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220" - ], - "markers": "os_name != 'nt'", - "version": "==0.7.0" - }, "pure-eval": { "hashes": [ "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350", @@ -3862,6 +3466,38 @@ "markers": "python_version >= '3.6'", "version": "==2.0.7" }, + "pywin32": { + "hashes": [ + "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d", + "sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65", + "sha256:37257794c1ad39ee9be652da0462dc2e394c8159dfd913a8a4e8eb6fd346da0e", + "sha256:383229d515657f4e3ed1343da8be101000562bf514591ff383ae940cad65458b", + "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4", + "sha256:5821ec52f6d321aa59e2db7e0a35b997de60c201943557d108af9d4ae1ec7040", + "sha256:70dba0c913d19f942a2db25217d9a1b726c278f483a919f1abfed79c9cf64d3a", + "sha256:72c5f621542d7bdd4fdb716227be0dd3f8565c11b280be6315b06ace35487d36", + "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8", + "sha256:a7639f51c184c0272e93f244eb24dafca9b1855707d94c192d4a0b4c01e1100e", + "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802", + "sha256:e4c092e2589b5cf0d365849e73e02c391c1349958c5ac3e9d5ccb9a28e017b3a", + "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407", + "sha256:e8ac1ae3601bee6ca9f7cb4b5363bf1c0badb935ef243c4733ff9a393b1690c0" + ], + "markers": "sys_platform == 'win32' and platform_python_implementation != 'PyPy'", + "version": "==306" + }, + "pywinpty": { + "hashes": [ + "sha256:1617b729999eb6713590e17665052b1a6ae0ad76ee31e60b444147c5b6a35dca", + "sha256:189380469ca143d06e19e19ff3fba0fcefe8b4a8cc942140a6b863aed7eebb2d", + "sha256:21319cd1d7c8844fb2c970fb3a55a3db5543f112ff9cfcd623746b9c47501575", + "sha256:7520575b6546db23e693cbd865db2764097bd6d4ef5dc18c92555904cd62c3d4", + "sha256:8197de460ae8ebb7f5d1701dfa1b5df45b157bb832e92acba316305e18ca00dd", + "sha256:853985a8f48f4731a716653170cd735da36ffbdc79dcb4c7b7140bce11d8c722" + ], + "markers": "os_name == 'nt'", + "version": "==2.0.12" + }, "pyyaml": { "hashes": [ "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5", @@ -4188,6 +3824,13 @@ "markers": "python_version >= '3.7'", "version": "==1.3.0" }, + "snowballstemmer": { + "hashes": [ + "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1", + "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a" + ], + "version": "==2.2.0" + }, "soupsieve": { "hashes": [ "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690", @@ -4196,6 +3839,79 @@ "markers": "python_version >= '3.8'", "version": "==2.5" }, + "sphinx": { + "hashes": [ + "sha256:1e09160a40b956dc623c910118fa636da93bd3ca0b9876a7b3df90f07d691560", + "sha256:9a5160e1ea90688d5963ba09a2dcd8bdd526620edbb65c328728f1b2228d5ab5" + ], + "index": "pypi", + "markers": "python_version >= '3.9'", + "version": "==7.2.6" + }, + "sphinx-rtd-theme": { + "hashes": [ + "sha256:bd5d7b80622406762073a04ef8fadc5f9151261563d47027de09910ce03afe6b", + "sha256:ec93d0856dc280cf3aee9a4c9807c60e027c7f7b461b77aeffed682e68f0e586" + ], + "markers": "python_version >= '3.6'", + "version": "==2.0.0" + }, + "sphinxcontrib-applehelp": { + "hashes": [ + "sha256:c40a4f96f3776c4393d933412053962fac2b84f4c99a7982ba42e09576a70619", + "sha256:cb61eb0ec1b61f349e5cc36b2028e9e7ca765be05e49641c97241274753067b4" + ], + "markers": "python_version >= '3.9'", + "version": "==1.0.8" + }, + "sphinxcontrib-devhelp": { + "hashes": [ + "sha256:6485d09629944511c893fa11355bda18b742b83a2b181f9a009f7e500595c90f", + "sha256:9893fd3f90506bc4b97bdb977ceb8fbd823989f4316b28c3841ec128544372d3" + ], + "markers": "python_version >= '3.9'", + "version": "==1.0.6" + }, + "sphinxcontrib-htmlhelp": { + "hashes": [ + "sha256:0dc87637d5de53dd5eec3a6a01753b1ccf99494bd756aafecd74b4fa9e729015", + "sha256:393f04f112b4d2f53d93448d4bce35842f62b307ccdc549ec1585e950bc35e04" + ], + "markers": "python_version >= '3.9'", + "version": "==2.0.5" + }, + "sphinxcontrib-jquery": { + "hashes": [ + "sha256:1620739f04e36a2c779f1a131a2dfd49b2fd07351bf1968ced074365933abc7a", + "sha256:f936030d7d0147dd026a4f2b5a57343d233f1fc7b363f68b3d4f1cb0993878ae" + ], + "markers": "python_version >= '2.7'", + "version": "==4.1" + }, + "sphinxcontrib-jsmath": { + "hashes": [ + "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", + "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8" + ], + "markers": "python_version >= '3.5'", + "version": "==1.0.1" + }, + "sphinxcontrib-qthelp": { + "hashes": [ + "sha256:053dedc38823a80a7209a80860b16b722e9e0209e32fea98c90e4e6624588ed6", + "sha256:e2ae3b5c492d58fcbd73281fbd27e34b8393ec34a073c792642cd8e529288182" + ], + "markers": "python_version >= '3.9'", + "version": "==1.0.7" + }, + "sphinxcontrib-serializinghtml": { + "hashes": [ + "sha256:326369b8df80a7d2d8d7f99aa5ac577f51ea51556ed974e7716cfd4fca3f6cb7", + "sha256:93f3f5dc458b91b192fe10c397e324f262cf163d79f3282c158e8436a2c4511f" + ], + "markers": "python_version >= '3.9'", + "version": "==1.1.10" + }, "stack-data": { "hashes": [ "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9", diff --git a/README.md b/README.md index c9ab0ae..39a44fa 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ SPDX-FileCopyrightText: 2023 Berkay Bozkurt # Sales-Lead-Qualifier Project (AMOS WS 2023/24) -## Sum Insight Logo +## Sum Insights Logo diff --git a/build_app.sh b/build_app.sh deleted file mode 100755 index 5556ce2..0000000 --- a/build_app.sh +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-License-Identifier: MIT -# SPDX-FileCopyrightText: 2023 Felix Zailskas - -#!/bin/bash - -application_name="sumup_app" - -docker build -t $application_name . diff --git a/src/controller/Controller.py b/deprecated/controller/Controller.py similarity index 100% rename from src/controller/Controller.py rename to deprecated/controller/Controller.py diff --git a/src/controller/messenger.py b/deprecated/controller/messenger.py similarity index 100% rename from src/controller/messenger.py rename to deprecated/controller/messenger.py diff --git a/deprecated/steps/report.py b/deprecated/steps/report.py new file mode 100644 index 0000000..6eab25c --- /dev/null +++ b/deprecated/steps/report.py @@ -0,0 +1,230 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2023 Fabian-Paul Utech +# SPDX-FileCopyrightText: 2023 Ahmed Sheta + +import argparse +import os + +import pandas as pd +from reportlab.lib import colors +from reportlab.lib.pagesizes import A4 +from reportlab.lib.styles import getSampleStyleSheet +from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer, Table, TableStyle + +report_list = [] + +standard_group_format = { + # 1 pdf per lead (1 row in .csv) + "Lead": [ + "Last Name", + "First Name", + "Company / Account", + "Phone", + "Email", + "Predicted Size", + ], + # "Reviews": [ + # "google_places_user_ratings_total", + # "google_places_rating", + # "google_places_price_level", + # "reviews_sentiment_score", + # ], + #'Region':[] starts with regional_atlas + # Regarding columns names if there are more than one '_' take the split after the second _ +} + +file_list = [] + + +def process_lead(lead): + # Input search string (either specific leads or a whole file) + # Output: pd.series of a lead from leads_enriched.csv + try: + df = pd.read_csv("src/data/dummy_leads_email.csv", delimiter=",") + except FileNotFoundError: + raise FileNotFoundError("File not found.") + if os.path.exists( + os.path.dirname(lead) + ): # If a path was specified (by default the dummy dataset) + df = pd.read_csv(lead, delimiter=",") + return df + elif isinstance(lead, list): # A specified group of leads + rows = df[df["Company / Account"] in lead] + return rows + + elif isinstance(lead, str): # One specified lead + row = df[df["Company / Account"] == lead] + return row + else: + raise ValueError( + "Invalid type for 'lead'. It should be a single string, a list of strings, or a file path." + ) + + +def process_format(fmt): + if isinstance(fmt, list): # Transform list to dictionary + new_fmt = {} + + for value in fmt: + try: + key = str(standard_group_format[value]) + except: + key = "Others" + if key in new_fmt: + new_fmt[key] = new_fmt[key].append(str(value)) + else: + new_fmt[key] = [str(value)] + + return new_fmt + elif isinstance(fmt, dict): + return fmt + elif fmt is None: + return standard_group_format + else: + raise ValueError( + "Invalid type for 'format'. It should be either a list or a dictionary." + ) + + +def create_pdf(lead, format): + """ + Input: lead: pd.series + format: dict + Description: Function to create reports. + A report consists of tables of grouped features. + Output: '...'.pdf + """ + doc = SimpleDocTemplate( + f"src/data/reports/{lead['Company / Account']}.pdf", pagesize=A4 + ) + file_list.append(f"src/data/reports/{lead['Company / Account']}.pdf") + + report_list.append(f"src/data/reports/{lead['Company / Account']}.pdf") + + # Creating a Paragraph with a large font size and centered alignment + headline_style = getSampleStyleSheet()["Title"] + headline_style.fontSize = 32 + headline_style.alignment = 0 + + headline_paragraph = Paragraph(lead["Company / Account"], headline_style) + + # List for the 'Flowable' objects + elements = [headline_paragraph] + elements.append(Spacer(1, 50)) + + # Styles for tables and paragraphs + styles = getSampleStyleSheet() + + groups = format.keys() + + for group in groups: + title_paragraph = Paragraph(group, styles["Title"]) + elements.append(title_paragraph) + + col_names = format[group] + + # Header row + split_col = [col_names[i : i + 4] for i in range(0, len(col_names), 5)] + + # Center the table on the page + table_style = TableStyle( + [ + ("ALIGN", (0, 0), (-1, -1), "CENTER"), # center the text + ( + "VALIGN", + (0, 0), + (-1, -1), + "MIDDLE", + ), # put the text in the middle of the cell + ("TEXTCOLOR", (0, 0), (-1, 0), colors.black), + ("GRID", (0, 0), (-1, -1), 1, colors.black), + ( + "SPLITBYROWS", + (0, 0), + (-1, -1), + True, + ), # Ensure rows are not split between pages + ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"), + ] + ) + + for group_columns in split_col: + header_row = group_columns + data_row = [] + for column in group_columns: + try: + if lead[column] == "nan": + data_row.append("") + else: + data_row.append(str(lead[column])) + except: + data_row.append("") + + table = [header_row, data_row] + + pdf_table = Table(table) + pdf_table.setStyle(table_style) + + # Add the table to the elements + elements.append(pdf_table) + + # Add an empty line between tables + elements.append(Spacer(1, 25)) + + """for k,v in tmp_data.items(): + if isinstance(v, dict): + + ul_items=[] + for key,val in v.items(): + bolded_text = f'{key}:{val}' + ul_items.append(Paragraph(bolded_text,styles['Normal'])) + + col_index = list(tmp_data.keys()).index(k) + table_data[1][col_index] = ul_items""" + + """# Set left alignment for all non-header cells + for col in range(len(table_data[0])): + table_style.add('FONTNAME', (col, 0), (col, 0), 'Helvetica-Bold') + table_style.add('ALIGN', (col, 1), (col, -1), 'LEFT')""" + + # Build the PDF document + doc.build(elements) + + +def main(): + # file_list=[] + parser = argparse.ArgumentParser(description="Process lead and format arguments.") + parser.add_argument( + "--lead", + default="src/data/dummy_leads_email.csv", + help="Lead argument: a single search-string, a list of strings, or a file path.", + ) + parser.add_argument( + "--format", nargs="+", help="Format argument: a list or a dictionary." + ) + + args = parser.parse_args() + + # Process lead argument (result: either specific row(/s) or a table) + # Choose lead with + processed_lead = process_lead(args.lead) + print("Generate the reports for the following leads: ") + print(processed_lead) + + # Process format argument (result: format that is a dictionary) + processed_format = process_format(args.format) + + # Generate report for every lead + + for index, lead in processed_lead.iterrows(): + create_pdf(lead, processed_format) + + print("\nReports saved:") + for file in file_list: + print(f"{file}") + + print() + + +if __name__ == "__main__": + main() diff --git a/src/bdc/steps/scrape_address.py b/deprecated/steps/scrape_address.py similarity index 100% rename from src/bdc/steps/scrape_address.py rename to deprecated/steps/scrape_address.py diff --git a/src/bdc/steps/social_media_api.py b/deprecated/steps/social_media_api.py similarity index 100% rename from src/bdc/steps/social_media_api.py rename to deprecated/steps/social_media_api.py diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index bdf1f14..0000000 --- a/docker-compose.yml +++ /dev/null @@ -1,30 +0,0 @@ -# SPDX-License-Identifier: MIT -# SPDX-FileCopyrightText: 2023 Ruchita Nathani - -version: "3" -services: - mongodb: - image: mongo:latest - ports: - - "27017:27017" - environment: - MONGO_INITDB_ROOT_USERNAME: ${DB_USER} - MONGO_INITDB_ROOT_PASSWORD: ${DB_PASSWORD} - networks: - - network_private - - sumup_app: - build: . - depends_on: - - mongodb - env_file: - - .env - # volumes: - # - .:opt/sumup_app - command: python main.py - networks: - - network_private - -networks: - network_private: - driver: bridge diff --git a/notebooks/fabian_feature_analysis.ipynb b/notebooks/fabian_feature_analysis.ipynb new file mode 100644 index 0000000..5bfa4aa --- /dev/null +++ b/notebooks/fabian_feature_analysis.ipynb @@ -0,0 +1,1043 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "9d3b226f", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "df = pd.read_csv(\"s3://amos-training-data/100k_historic_enriched.csv\")\n", + "categories_order = ['XS', 'S', 'M', 'L','XL']\n", + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f508fadb", + "metadata": {}, + "outputs": [], + "source": [ + "print(len(df))\n", + "df.groupby(by=['MerchantSizeByDPV']).size()/len(df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a850c5e7", + "metadata": {}, + "outputs": [], + "source": [ + "# Check if there are repetitions (> 0 => leads that exist multiple times according to the identifier)\n", + "identifier = df[['Phone','Email','Company Name','number_formatted','google_places_place_id','google_places_formatted_address','google_places_name','google_places_detailed_website']]\n", + "for col in identifier:\n", + " print(f'{col}: {len(df[col].unique())} ({1-len(df[col].unique())/df[col].count()})') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7d07397", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\" Regionalatlas: Placeholder\n", + "2222222222: nichts vorhanden, genau 0\n", + "5555555555: Zahlenwert unbekannt oder geheim zu halten\n", + "6666666666: Tabellenfach gesperrt, da Aussage nicht sinnvoll\n", + "7777777777: keine Angabe, da Zahlenwert nicht sicher genug\n", + "8888888888: Angabe fällt später an\n", + "\"\"\"\n", + "\n", + "exclude_values = [2222222222.0, 5555555555.0, 6666666666.0, 7777777777.0, 8888888888.0]\n", + "regional_df = df.filter(like='regional', axis=1).dropna()\n", + "\n", + "# Dictionary to know which columns and indices have problematic values\n", + "rem_dic = {}\n", + "columns = []\n", + "\n", + "filter_df = regional_df.copy()\n", + "\n", + "for exc in exclude_values:\n", + " # Find all columns that have those values we need to exclude\n", + " col = regional_df.loc[:,(np.sum(df == exc,axis=0)>0)].columns.tolist()\n", + "\n", + " columns+=col\n", + " \n", + " \n", + " # Now we can use those columns to find the corresponding rows\n", + " for c in col:\n", + " indices = regional_df.loc[(np.sum(df == exc,axis=1)>0),col].index.tolist() \n", + " \n", + " rem_dic[c] = {str(exc):indices}\n", + " \n", + " filter_df = filter_df[df[c]!=exc]\n", + " print(f'column:{c}, value:{exc}')\n", + " \n", + "print(rem_dic)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16fcbf05", + "metadata": {}, + "outputs": [], + "source": [ + "# Irregular values defined by regionalatlas needs to be translated to nan so we can handle it later on\n", + "import numpy as np\n", + "regional_atlas = [col for col in df if col.startswith('regional_atlas')]\n", + "\n", + "print(\"Changed the following features, because of irregular values of regionalatlas:\")\n", + "for col in regional_atlas:\n", + " n_irr = (df[col]>=2222222222).sum()\n", + " n = (df[col].notnull()).sum()\n", + " \n", + " if (n_irr>0):\n", + " print(col+': '+str(n_irr)+' out of '+ str(n))\n", + " df[col] = np.where(df[col] >= 2222222222, np.nan, df[col])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "730b574a", + "metadata": {}, + "outputs": [], + "source": [ + "isna = sum(df['google_places_place_id_matches_phone_search'].isna())\n", + "print(f'Empty: {isna}')\n", + "print(f'Not empty: {df.shape[0]-isna}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35186540", + "metadata": {}, + "outputs": [], + "source": [ + "print(df.groupby('MerchantSizeByDPV').count()['number_area'].reindex(categories_order))\n", + "\n", + "tmp = df[df['number_country']=='Germany'].groupby('MerchantSizeByDPV').count()\n", + "(tmp / (sum(tmp['Last Name'].values)/129))['First Name'].reindex(categories_order)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27014d88", + "metadata": {}, + "outputs": [], + "source": [ + "min_max_df = df.agg({\n", + "'google_places_user_ratings_total':[\"min\",\"max\"],\n", + "'google_places_rating':[\"min\",\"max\"],\n", + "'google_places_price_level':[\"min\",\"max\"],\n", + "'reviews_sentiment_score':[\"min\",\"max\"],\n", + "'regional_atlas_age_0':[\"min\",\"max\"],\n", + "'regional_atlas_age_1':[\"min\",\"max\"],\n", + "'regional_atlas_age_2':[\"min\",\"max\"],\n", + "'regional_atlas_age_3':[\"min\",\"max\"],\n", + "'regional_atlas_age_4':[\"min\",\"max\"],\n", + "'regional_atlas_per_service_sector':[\"min\",\"max\"],\n", + "'regional_atlas_per_trade':[\"min\",\"max\"],\n", + "'regional_atlas_employment_rate':[\"min\",\"max\"],\n", + "'regional_atlas_unemployment_rate':[\"min\",\"max\"],\n", + "'regional_atlas_per_long_term_unemployment':[\"min\",\"max\"],\n", + "'regional_atlas_pop_density':[\"min\",\"max\"],\n", + "'regional_atlas_pop_development':[\"min\",\"max\"],\n", + "'regional_atlas_pop_avg_age':[\"min\",\"max\"],\n", + "'regional_atlas_investments_p_employee':[\"min\",\"max\"],\n", + "'regional_atlas_gross_salary_p_employee':[\"min\",\"max\"],\n", + "'regional_atlas_disp_income_p_inhabitant':[\"min\",\"max\"],\n", + "'regional_atlas_tot_income_p_taxpayer':[\"min\",\"max\"],\n", + "'regional_atlas_gdp_p_employee':[\"min\",\"max\"],\n", + "'regional_atlas_gdp_development':[\"min\",\"max\"],\n", + "'regional_atlas_gdp_p_inhabitant':[\"min\",\"max\"],\n", + "'regional_atlas_gdp_p_workhours':[\"min\",\"max\"],\n", + "'regional_atlas_pop_avg_age_zensus':[\"min\",\"max\"],\n", + "'regional_atlas_regional_score':[\"min\",\"max\"]\n", + "})\n", + "\n", + "# Apply the function for each column\n", + "for col in min_max_df.columns:\n", + " min_feature = min_max_df[col]['min']\n", + " max_feature = min_max_df[col]['max']\n", + " print(f'{col}: [{min_feature}, {max_feature}]') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51d78040", + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.stats import percentileofscore\n", + "\n", + "percentile_col = [\n", + "'regional_atlas_age_0',\n", + "'regional_atlas_age_1',\n", + " 'regional_atlas_age_2',\n", + "'regional_atlas_age_3',\n", + "'regional_atlas_age_4',\n", + "'google_places_user_ratings_total','google_places_rating','reviews_sentiment_score','regional_atlas_pop_density',\n", + "'regional_atlas_pop_development',\n", + "'regional_atlas_pop_avg_age',\n", + "'regional_atlas_per_service_sector',\n", + "'regional_atlas_per_trade',\n", + "'regional_atlas_employment_rate',\n", + "'regional_atlas_unemployment_rate',\n", + "'regional_atlas_per_long_term_unemployment',\n", + "'regional_atlas_investments_p_employee',\n", + "'regional_atlas_gross_salary_p_employee',\n", + "'regional_atlas_disp_income_p_inhabitant',\n", + "'regional_atlas_tot_income_p_taxpayer',\n", + "'regional_atlas_gdp_p_employee',\n", + "'regional_atlas_gdp_development',\n", + "'regional_atlas_gdp_p_inhabitant',\n", + "'regional_atlas_gdp_p_workhours',\n", + "'regional_atlas_pop_avg_age_zensus',\n", + "'regional_atlas_regional_score']\n", + "\n", + "for col in percentile_col:\n", + " no_nan = df[col][df[col].notnull()]\n", + " col_name = col+'_percentiles' \n", + " df[col_name] = no_nan.apply(lambda x: percentileofscore(no_nan, x))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "307c0e39", + "metadata": {}, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Adding the percentiles as columns for analysis and report\n", + "\n", + "for col in percentile_col:\n", + " feature = col+\"_percentiles\"\n", + " not_nan = df[feature].notnull()\n", + "\n", + " classes = df['MerchantSizeByDPV'].unique()\n", + "\n", + " for c in classes:\n", + " sns.kdeplot(df[not_nan][df[not_nan]['MerchantSizeByDPV']==c][feature], fill=False, label=c)\n", + " \n", + " # Add labels and title\n", + " plt.xlabel('Value')\n", + " plt.ylabel('Density')\n", + " plt.title('Distribution of '+col)\n", + " plt.legend()\n", + "\n", + " # Show the plot\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8073b0a8", + "metadata": {}, + "outputs": [], + "source": [ + "for c in classes:\n", + " tmp = df[not_nan][df[not_nan]['MerchantSizeByDPV']==c]\n", + " sns.kdeplot(x=tmp['google_places_user_ratings_total_percentiles'], y=tmp['google_places_rating_percentiles'], fill=False, label=c)\n", + "\n", + " plt.xlabel('ratings_total')\n", + " plt.ylabel('rating_avg')\n", + " plt.title('Distribution of '+c)\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "edb5923a", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "arr_false = {}\n", + "\n", + "for column in df:\n", + " \n", + " if df[column].dtype == bool:\n", + " false_count = np.count_nonzero(df[column] == False)\n", + " arr_false[column] = false_count\n", + " \n", + "print(arr_false)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30d74633", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "Dangerous to come to a conclusion based on Gender.\n", + "\"\"\"\n", + "import gender_guesser.detector as gender\n", + "gd = gender.Detector()\n", + "df['Gender'] = df['First Name'].apply(str.capitalize).map(lambda x: gd.get_gender(x))\n", + "\n", + "group_feature = 'Gender' # MerchantSizeByDPV or Gender\n", + "total_counts = df[group_feature].value_counts().reset_index(name='total_count')\n", + "total_counts = total_counts.rename(columns={'index':group_feature})\n", + "grouped_counts = df.groupby(['Gender', 'MerchantSizeByDPV']).size().reset_index(name='count')\n", + "\n", + "result = pd.merge(grouped_counts, total_counts, on=group_feature)\n", + "result['proportion'] = result['count'] / result['total_count']\n", + "\n", + "category_order = ['XS','S','M','L','XL']\n", + "\n", + "\n", + "# Create separate DataFrames for each gender\n", + "# For better depiction .drop(index='XS') and take away XS from category_order\n", + "# andy: androgynous\n", + "andy_data = result[result['Gender'] == 'andy'].set_index('MerchantSizeByDPV')['proportion']\n", + "unknown_data = result[result['Gender'] == 'unknown'].set_index('MerchantSizeByDPV')['proportion']\n", + "mostly_female_data = result[result['Gender'] == 'mostly_female'].set_index('MerchantSizeByDPV')['proportion']\n", + "mostly_male_data = result[result['Gender'] == 'mostly_male'].set_index('MerchantSizeByDPV')['proportion']\n", + "male_data = result[result['Gender'] == 'male'].set_index('MerchantSizeByDPV')['proportion']\n", + "female_data = result[result['Gender'] == 'female'].set_index('MerchantSizeByDPV')['proportion']\n", + "\n", + "# Plotting\n", + "plt.plot(category_order, andy_data, label='Andy')\n", + "plt.plot(category_order, unknown_data, label='Unknown')\n", + "plt.plot(category_order, mostly_female_data, label='Mostly Female')\n", + "plt.plot(category_order, mostly_male_data, label='Mostly Male')\n", + "plt.plot(category_order, male_data, label='Male')\n", + "plt.plot(category_order, female_data, label='Female')\n", + "\n", + "# Set labels and title\n", + "plt.xlabel('MerchantSizeByDPV')\n", + "plt.ylabel('Proportion')\n", + "plt.title('Proportion of MerchantSizeByDPV for Each Gender')\n", + "\n", + "# Display the plot\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd4a15f7", + "metadata": {}, + "outputs": [], + "source": [ + "mcc_group = df.groupby(by=['MCC Level','MerchantSizeByDPV']).size()\n", + "grouped = mcc_group.unstack()\n", + "mcc_sum = mcc_group.groupby(level=0).sum()\n", + "\n", + "mcc_df = pd.concat([grouped, sum_test], axis=1)\n", + "tmp = mcc_df[0]\n", + "mcc_df = mcc_df.divide(mcc_df[0], axis=0).sort_values(by='XS', ascending=True)\n", + "mcc_df['Sum'] = tmp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e787a6cd", + "metadata": {}, + "outputs": [], + "source": [ + "print('Dropped the rows due to less than 50 examples:')\n", + "print(mcc_df[mcc_df['Sum']<50].index.values)\n", + "mcc_df = mcc_df[mcc_df['Sum']>=50]\n", + "\n", + "# Show every 10 categories (previously ordered by ascending XS), to compare the categories\n", + "# The first categories are the most attractive ones\n", + "for i in range(mcc_df.shape[0]): \n", + " if i % 10 == 0:\n", + " mcc_df.drop([0,'Sum','XS'],axis=1)[i:(i+5)].transpose().plot.line()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5fc010da", + "metadata": {}, + "outputs": [], + "source": [ + "import ast\n", + "\n", + "data = df[df['google_places_detailed_type'].notnull()]\n", + "test = pd.Series([x for item in data.google_places_detailed_type for x in ast.literal_eval(item)]).value_counts()\n", + "test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd76690a", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.feature_extraction.text import CountVectorizer\n", + "\n", + "docs = df['google_places_detailed_type'][df['google_places_detailed_type'].notna()]\n", + "docs = docs.apply(lambda row: ast.literal_eval(row))\n", + "\n", + "vectorizer = CountVectorizer(analyzer=lambda x: x) # , min_df = 50\n", + "categories = vectorizer.fit_transform(docs).toarray()\n", + "vectorizer.get_feature_names_out()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd08fc9b", + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.stats import chi2_contingency\n", + "\n", + "# Create a contingency table for each feature\n", + "contingency_tables = {}\n", + "\n", + "cat_col = df[['google_places_candidate_count_mail','google_places_candidate_count_phone','google_places_rating','google_places_price_level','google_places_confidence','MCC Level', 'Gender','number_area','first_name_in_account','last_name_in_account','google_places_business_status','number_country','number_valid','number_possible','google_places_place_id_matches_phone_search']].fillna('no_data')\n", + "cat_col['b_google_website'] = df['google_places_detailed_website'].notnull()\n", + "\n", + "#for feature_column in df.columns[df.columns != 'label']:\n", + "for feature_column in cat_col.columns:\n", + " contingency_table = pd.crosstab(df['MerchantSizeByDPV'], cat_col[feature_column])\n", + " contingency_tables[feature_column] = contingency_table\n", + "\n", + "# Perform chi-squared test for each feature\n", + "results = {}\n", + "for feature, table in contingency_tables.items():\n", + " chi2_stat, p_value, dof, expected = chi2_contingency(table)\n", + " results[feature] = {'Chi-squared stat': chi2_stat, 'P-value': p_value, 'Degrees of Freedom': dof}\n", + "\n", + "# Display the results\n", + "for feature, result in results.items():\n", + " print(f\"\\nChi-squared test for {feature}:\")\n", + " print(f\"Chi-squared statistic: {result['Chi-squared stat']:.2f}\")\n", + " print(f\"P-value: {result['P-value']:.4f}\")\n", + " print(f\"Degrees of freedom: {result['Degrees of Freedom']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c46eb0f4", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "def b_bayesian(df,bin_column,b_value=True):\n", + " \n", + " prior_A = df.groupby('MerchantSizeByDPV').count()['Email']/df.shape[0] \n", + " prior_B = df[df[bin_column]==b_value].shape[0] / df[bin_column].shape[0]\n", + " evidence_A = df[df[bin_column]==b_value].groupby('MerchantSizeByDPV').count()[bin_column] / df.groupby('MerchantSizeByDPV').count()[bin_column]\n", + " posterior_B = (prior_A*evidence_A) / prior_B\n", + " \n", + " return posterior_B.reindex(index=['XS', 'S', 'M', 'L','XL'])\n", + "\n", + "per_size = (df.groupby('MerchantSizeByDPV').count()['Email']/df.shape[0]).reindex(index=['XS', 'S', 'M', 'L','XL'])\n", + "\n", + "\n", + "series_not_possible =b_bayesian(df,'number_possible',False)-per_size\n", + "series_invalid = b_bayesian(df,'number_valid',False)-per_size\n", + "series_first_name = b_bayesian(df,'first_name_in_account',True)-per_size\n", + "series_last_name = b_bayesian(df,'last_name_in_account',True)-per_size\n", + "\n", + "series_possible =b_bayesian(df,'number_possible',True)-per_size\n", + "series_valid = b_bayesian(df,'number_valid',True)-per_size\n", + "series_no_first_name = b_bayesian(df,'first_name_in_account',False)-per_size\n", + "series_no_last_name = b_bayesian(df,'last_name_in_account',False)-per_size\n", + "\n", + "# Ensure the 'Category' column is ordered\n", + "categories_order = ['XS', 'S', 'M', 'L','XL']\n", + "\n", + "# Plot the lines\n", + "plt.figure(figsize=(10, 6))\n", + "\n", + "\n", + "plt.plot(categories_order, series_not_possible, label='Number not possible', marker='o')\n", + "plt.plot(categories_order, series_invalid, label='Number invalid', marker='d')\n", + "plt.plot(categories_order, series_first_name, label='First name in account')\n", + "plt.plot(categories_order, series_last_name, label='Last name in account')\n", + "plt.plot(categories_order, series_possible, label='Number possible')\n", + "plt.plot(categories_order, series_valid, label='Number valid')\n", + "plt.plot(categories_order, series_no_first_name, label='First name not in account')\n", + "plt.plot(categories_order, series_no_last_name, label='Last name not in account')\n", + "#plt.plot(categories_order, per_size, label='Percentage of merchant size', marker='s',c='black')\n", + "\n", + "\n", + "plt.title('Bayesian')\n", + "plt.xlabel('Categories')\n", + "plt.ylabel('Percentages')\n", + "plt.legend()\n", + "plt.grid(True)\n", + "\n", + "# Show the plot\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a18821e", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import numpy as np\n", + "\n", + "\n", + "class_colors = sns.color_palette(\"colorblind\")[:5]\n", + "regional_df = df.filter(like='regional', axis=1)\n", + "regional_df['MerchantSizeByDPV'] = df['MerchantSizeByDPV']\n", + "\n", + "# Plot boxplots for each column with different MerchantSizeByDPV boxplots next to each other\n", + "for i, column in enumerate(regional_df.columns[:-1]): # Exclude the last column ('MerchantSizeByDPV') \n", + " \n", + " if column == 'regional_atlas_pop_development': \n", + " axes = sns.boxplot(x=\"MerchantSizeByDPV\", hue =\"MerchantSizeByDPV\", y=column, data=regional_df[regional_df['regional_atlas_pop_development']<2000],palette=class_colors, order=['XS', 'S','M','L','XL']) \n", + "\n", + " elif column == 'regional_atlas_gdp_development':\n", + " axes = sns.boxplot(x=\"MerchantSizeByDPV\", hue =\"MerchantSizeByDPV\", y=column, data=regional_df[regional_df['regional_atlas_gdp_development']<60],palette=class_colors, order=['XS', 'S','M','L','XL']) \n", + " \n", + " else:\n", + " axes = sns.boxplot(x=\"MerchantSizeByDPV\", hue =\"MerchantSizeByDPV\", y=column, data=regional_df,palette=class_colors, order=['XS', 'S','M','L','XL'])\n", + " \n", + " axes.set_title(f'Boxplot of {column}')\n", + " axes.set_xlabel('MerchantSizeByDPV')\n", + " axes.set_ylabel(column) \n", + " \n", + " median_value = regional_df[regional_df['MerchantSizeByDPV'] == 'XL'][column].median()\n", + " axes.axhline(y=median_value, color='red', linestyle='--', label=f'Median (XL)')\n", + " axes.legend(bbox_to_anchor=(1.05, 0.5), loc='upper right')\n", + " \n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18949200", + "metadata": {}, + "outputs": [], + "source": [ + "# Same like the boxplots but now with violinplots\n", + "for column in regional_df.filter(like='regional', axis=1).columns: \n", + " if column == 'regional_atlas_pop_development': \n", + " axes = sns.violinplot(x=\"MerchantSizeByDPV\", hue =\"MerchantSizeByDPV\", y=column, data=regional_df[regional_df['regional_atlas_pop_development']<2000],palette=class_colors, order=['XS', 'S','M','L','XL']) \n", + "\n", + " elif column == 'regional_atlas_gdp_development':\n", + " axes = sns.violinplot(x=\"MerchantSizeByDPV\", hue =\"MerchantSizeByDPV\", y=column, data=regional_df[regional_df['regional_atlas_gdp_development']<60],palette=class_colors, order=['XS', 'S','M','L','XL']) \n", + " \n", + " else:\n", + " axes = sns.violinplot(x=\"MerchantSizeByDPV\", hue =\"MerchantSizeByDPV\", y=column, data=regional_df,palette=class_colors, order=['XS', 'S','M','L','XL'])\n", + " \n", + " axes.set_title(f'Boxplot of {column}')\n", + " axes.set_xlabel('MerchantSizeByDPV')\n", + " axes.set_ylabel(column) \n", + " \n", + " median_value = regional_df[regional_df['MerchantSizeByDPV'] == 'XL'][column].median()\n", + " axes.axhline(y=median_value, color='red', linestyle='--', label=f'Median (XL)')\n", + " axes.legend(bbox_to_anchor=(1.05, 0.5), loc='upper right')\n", + " \n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c7b2074", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn import preprocessing\n", + "\n", + "# Normalize the features before comparing / dividing them\n", + "x = regional_df.drop('MerchantSizeByDPV', axis = 1).values #returns a numpy array\n", + "min_max_scaler = preprocessing.MinMaxScaler()\n", + "x_scaled = min_max_scaler.fit_transform(x)\n", + "norm_regio = pd.DataFrame(x_scaled, columns=regional_df.drop('MerchantSizeByDPV', axis = 1).columns)\n", + "\n", + "# Compute the stats of the normalized regional data, to find a heuristic to evaluate the features' discriminative magnitudes\n", + "df_stats_XL = norm_regio[regional_df['MerchantSizeByDPV']=='XL'].describe()\n", + "df_stats_XS = norm_regio[regional_df['MerchantSizeByDPV']=='XS'].describe()\n", + "\n", + "((df_stats_XL.loc['50%'] - df_stats_XS.loc['50%'])/(df_stats_XL.loc['75%'] - df_stats_XL.loc['25%'])).sort_values(ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "684af78c", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "# Compute a correlation matrix for all float values of our dataframe\n", + "float_cols = df.columns[df.dtypes==float]\n", + "corr_matrix = df[float_cols].corr()\n", + "\n", + "# The diagonal values (correlation of each feature with itself) should be considered 0, to filter them out\n", + "np.fill_diagonal(corr_matrix.values, 0)\n", + "\n", + "# Create a new DataFrame that transforms all values to 0 that are below a value of defined by variable \"correlation_threshold\" \n", + "correlation_threshold = 0.89\n", + "filtered_correlation_df = corr_matrix.applymap(lambda x: x if abs(x) >= correlation_threshold else 0)\n", + "\n", + "# Identify the rows and columns that not only consists of 0 values (after filtering)\n", + "non_zero_rows = filtered_correlation_df.index[~(filtered_correlation_df == 0).all(axis=1)]\n", + "non_zero_columns = filtered_correlation_df.columns[~(filtered_correlation_df == 0).all(axis=0)]\n", + "new_correlation_df = filtered_correlation_df.loc[non_zero_rows, non_zero_columns]\n", + "\n", + "# Print the new correlation matrix and the corresponding plot\n", + "print(f\"New Correlation Matrix (values greater than {correlation_threshold}):\")\n", + "\n", + "plt.figure(figsize=(12, 10))\n", + "heatmap = sns.heatmap(new_correlation_df, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)\n", + "plt.title('Correlation Matrix Heatmap')\n", + "plt.savefig('correlation_matrix.svg', format='svg')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ba9ee1b", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.decomposition import PCA\n", + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "\n", + "reg_df = df.filter(like='regional', axis=1).dropna()\n", + "\n", + "# Standardize the features\n", + "scaler = StandardScaler()\n", + "scaled_data = scaler.fit_transform(reg_df.drop('MerchantSizeByDPV', axis=1))\n", + "\n", + "# Apply PCA\n", + "pca = PCA()\n", + "principal_components = pca.fit_transform(scaled_data)\n", + "\n", + "# Retrieve explained variance ratios\n", + "explained_variance_ratio = pca.explained_variance_ratio_\n", + "\n", + "components = pd.DataFrame(pca.components_, columns=filter_df.columns)\n", + "\n", + "# Print explained variance ratios\n", + "for i, ratio in enumerate(explained_variance_ratio, 1):\n", + " print(f\"Principal Component {i}: Explained Variance Ratio = {ratio:.4f}\")\n", + "\n", + "# Plot the cumulative explained variance\n", + "cumulative_variance = explained_variance_ratio.cumsum()\n", + "\n", + "plt.plot(range(1, len(cumulative_variance) + 1), cumulative_variance, marker='o')\n", + "plt.title('Cumulative Explained Variance')\n", + "plt.xlabel('Number of Principal Components')\n", + "plt.ylabel('Cumulative Variance Explained')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d731eff3", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.feature_extraction.text import CountVectorizer\n", + "\n", + "# Count only those words, existing in a minum amount of 100 email adresses\n", + "count_vectorizer = CountVectorizer(min_df=50)\n", + "\n", + "# Fit and transform the text data\n", + "count_matrix = count_vectorizer.fit_transform(df['Email'])\n", + "\n", + "# Convert the matrix to a DataFrame for better readability\n", + "count_df = pd.DataFrame(count_matrix.toarray(), columns=count_vectorizer.get_feature_names_out())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4314186c", + "metadata": {}, + "outputs": [], + "source": [ + "common_words = pd.DataFrame(count_df.sum()).transpose()\n", + "\n", + "for word in common_words:\n", + " print(word)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fdf6ba75", + "metadata": {}, + "outputs": [], + "source": [ + "# Names\n", + "names = []\n", + "\n", + "# Weird terms\n", + "weird_terms = []\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa498485", + "metadata": {}, + "outputs": [], + "source": [ + "grouped_common_words = []\n", + "per_size = (df.groupby('MerchantSizeByDPV').count()['Email']/df.shape[0]).reindex(index=['XS', 'S', 'M', 'L','XL'])\n", + "\n", + "for word in common_words.drop(weird_terms,axis=1): #common_words[names], common_words[weird_terms]\n", + " \n", + " indices= count_df[count_df[word]>0].index \n", + " per_word = (df.loc[indices].groupby('MerchantSizeByDPV').count()['Email']/len(df.loc[indices])).reindex(index=['XS', 'S', 'M', 'L','XL']) \n", + " \n", + " grouped_common_words.append((per_word-per_size).rename(word)) \n", + " \n", + "common_df = pd.concat(grouped_common_words, axis=1)\n", + "common_df = common_df.transpose()\n", + "\n", + "common_df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3aeb199a", + "metadata": {}, + "outputs": [], + "source": [ + "# The min/mean/max probability decrease (-) or increase (+) by a value of x with the existence of a certain common word\n", + "\n", + "print(f'{np.min(common_df[\"XS\"])}, {np.mean(common_df[\"XS\"])},{np.max(common_df[\"XS\"])}')\n", + "print(f'{np.min(common_df[\"S\"])}, {np.mean(common_df[\"S\"])},{np.max(common_df[\"S\"])}')\n", + "print(f'{np.min(common_df[\"M\"])}, {np.mean(common_df[\"M\"])},{np.max(common_df[\"M\"])}')\n", + "print(f'{np.min(common_df[\"L\"])}, {np.mean(common_df[\"L\"])},{np.max(common_df[\"L\"])}')\n", + "print(f'{np.min(common_df[\"XL\"])}, {np.mean(common_df[\"XL\"])},{np.max(common_df[\"XL\"])}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38d47b36", + "metadata": {}, + "outputs": [], + "source": [ + "import multiprocessing\n", + "import time\n", + "import pandas as pd\n", + "import numpy as np\n", + "from deutschland.bundesanzeiger import Bundesanzeiger\n", + "import pickle\n", + "import time\n", + "\n", + "def access_ba(company,b_bundesanzeiger,):\n", + "\n", + " b_bundesanzeiger.append(True)\n", + " try:\n", + " ba = Bundesanzeiger()\n", + " data = ba.get_reports(company)\n", + " except:\n", + " b_bundesanzeiger[-1] = False\n", + " return\n", + "\n", + " if __name__ == '__main__':\n", + "\n", + " \"\"\"\n", + " with open('list_file.pkl', 'rb') as file:\n", + " loaded_list = pickle.load(file)\n", + " print(loaded_list)\n", + " \"\"\"\n", + "\n", + " pd.set_option('display.max_columns', None)\n", + "\n", + " historic = pd.read_csv('historic.csv',sep = ',')#_enriched\n", + "\n", + " df = historic.groupby('MerchantSizeByDPV').apply(lambda x: x.sample(100))\n", + "\n", + "\n", + " with multiprocessing.Manager() as manager:\n", + "\n", + " b_bundesanzeiger = manager.list()\n", + " content_array = []\n", + " durations = []\n", + "\n", + " for i, company in enumerate(df[\"Company Name\"]):\n", + "\n", + " print(i)\n", + "\n", + " start = time.time()\n", + "\n", + " # Start access_ba as a process\n", + " p = multiprocessing.Process(target=access_ba, name=\"access_ba\", args=(company,b_bundesanzeiger))\n", + "\n", + " p.start()\n", + "\n", + " # Wait 8 seconds for access_ba\t\n", + " p.join(8)\n", + "\n", + " # If thread is active\n", + " if p.is_alive():\n", + " print (\"Terminate access_ba\")\n", + "\n", + " # Terminate access_ba\n", + " p.terminate()\n", + " b_bundesanzeiger[-1] = 'killed'\n", + "\n", + " # Cleanup\n", + " p.join()\n", + " i+=1\n", + "\n", + " print(b_bundesanzeiger[-1])\n", + " end = time.time()\n", + " print(end-start)\n", + " print()\n", + " durations.append(end-start)\n", + "\n", + " \"\"\"if i==100:\n", + " with open('list_file.pkl', 'wb') as file:\n", + " pickle.dump(list(b_bundesanzeiger), file)\n", + " print(np.mean(np.array(list(b_bundesanzeiger))))\n", + " break\n", + " \"\"\"\n", + "\n", + " with open('list_file.pkl', 'wb') as file:\n", + " pickle.dump(list(b_bundesanzeiger), file)\n", + "\n", + " with open('time.pkl', 'wb') as file:\n", + " pickle.dump(durations, file)\n", + "\n", + " df.to_pickle(\"./dataframe_sample.pkl\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c8a6f460", + "metadata": {}, + "outputs": [], + "source": [ + "with open('dataframe_sample.pkl', 'rb') as f:\n", + " df = pickle.load(f)\n", + "\n", + "df = df.reset_index(drop=True)\n", + "\n", + "with open('list_file.pkl', 'rb') as f:\n", + " mynewlist = pickle.load(f)\n", + "\n", + "with open('time.pkl', 'rb') as f:\n", + " time = pickle.load(f)\n", + "\n", + "df_stats = pd.DataFrame({'b_bundesanzeiger': mynewlist, 'time': time})\n", + "\n", + "df['b_bundesanzeiger'] = df_stats['b_bundesanzeiger']\n", + "df['time'] = df_stats['time']\n", + "\n", + "\n", + "counts =df.groupby('MerchantSizeByDPV')['b_bundesanzeiger'].value_counts()\n", + "\n", + "desired_value_counts = counts.unstack().fillna(0)\n", + "\n", + "# Compute total counts per category\n", + "total_counts_per_category = counts.groupby('MerchantSizeByDPV').sum()\n", + "\n", + "# Compute probability for each category\n", + "probabilities = desired_value_counts.apply(lambda x: x / total_counts_per_category)\n", + "\n", + "print(probabilities)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30b18dea", + "metadata": {}, + "outputs": [], + "source": [ + "df['b_google_places'] = df[\"google_places_place_id\"].notnull()\n", + "counts =df.groupby('MerchantSizeByDPV')['b_google_places'].value_counts()\n", + "\n", + "desired_value_counts = counts.unstack().fillna(0)\n", + "\n", + "# Compute total counts per category\n", + "total_counts_per_category = counts.groupby('MerchantSizeByDPV').sum()\n", + "\n", + "# Compute probability for each category\n", + "probabilities = desired_value_counts.apply(lambda x: x / total_counts_per_category)\n", + "\n", + "print(probabilities)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f167c71", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import accuracy_score, classification_report, confusion_matrix\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "\n", + "\n", + "# Separate features (X) and target variable (y)\n", + "table = df[regional_columns+['MerchantSizeByDPV']].dropna()\n", + "y = table['MerchantSizeByDPV']\n", + "X=table[regional_columns]\n", + "\n", + "X_resampled, y_resampled = SMOTE(random_state=42).fit_resample(X, y)\n", + "\n", + "# Split the data into training and testing sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)\n", + "\n", + "# Create a logistic regression model\n", + "model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=4000, class_weight='balanced')\n", + "\n", + "# Fit the model on the training data\n", + "model.fit(X_train, y_train)\n", + "\n", + "# Make predictions on the testing data\n", + "y_pred = model.predict(X_test)\n", + "\n", + "# Evaluate the model\n", + "accuracy = accuracy_score(y_test, y_pred)\n", + "conf_matrix = confusion_matrix(y_test, y_pred)\n", + "class_report = classification_report(y_test, y_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e6afdec", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.metrics import classification_report, accuracy_score, confusion_matrix\n", + "\n", + "# Assuming X and y are your feature matrix and target variable\n", + "X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)\n", + "\n", + "# Create and fit the Random Forest model\n", + "rf_model = RandomForestClassifier(n_estimators=100, random_state=42)\n", + "rf_model.fit(X_train, y_train)\n", + "\n", + "# Make predictions on the test set\n", + "y_pred = rf_model.predict(X_test)\n", + "\n", + "# Evaluate the model\n", + "accuracy = accuracy_score(y_test, y_pred)\n", + "conf_matrix = confusion_matrix(y_test, y_pred)\n", + "class_report = classification_report(y_test, y_pred)\n", + "\n", + "# Display evaluation metrics\n", + "print(f'Accuracy: {accuracy:.2f}')\n", + "print(f'Confusion Matrix:\\n{conf_matrix}')\n", + "print(f'Classification Report:\\n{class_report}')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "abea2245", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.ensemble import IsolationForest\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import classification_report, accuracy_score, confusion_matrix\n", + "\n", + "# Assuming X and y are your feature matrix and target variable\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "# Define the set of rare classes\n", + "rare_classes = ['XL'] # Replace with the actual class labels you consider rare\n", + " # MAYBE NOT ONLY XL, but also L and M\n", + "# Create a binary target variable indicating whether each instance is rare or not\n", + "y_train_rare = y_train.isin(rare_classes).astype(int)\n", + "y_test_rare = y_test.isin(rare_classes).astype(int)\n", + "\n", + "# Create and fit the Isolation Forest model\n", + "if_model = IsolationForest(contamination='auto')\n", + "if_model.fit(X_train)\n", + "\n", + "# Predict anomalies on the test set\n", + "y_pred_rare = if_model.predict(X_test)\n", + "\n", + "# Convert the predicted labels to binary (1 for anomalies, -1 for normal instances)\n", + "y_pred_rare_binary = (y_pred_rare == -1).astype(int)\n", + "\n", + "# Evaluate the model\n", + "accuracy = accuracy_score(y_test_rare, y_pred_rare_binary)\n", + "conf_matrix = confusion_matrix(y_test_rare, y_pred_rare_binary)\n", + "class_report = classification_report(y_test_rare, y_pred_rare_binary)\n", + "\n", + "# Display evaluation metrics\n", + "print(f'Accuracy: {accuracy:.2f}')\n", + "print(f'Confusion Matrix:\\n{conf_matrix}')\n", + "print(f'Classification Report:\\n{class_report}')\n", + "\n", + "plt.figure(figsize=(6, 4))\n", + "sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False,\n", + " xticklabels=[0,1], yticklabels=[0,1])\n", + "plt.xlabel('Predicted Label')\n", + "plt.ylabel('True Label')\n", + "plt.title('Confusion Matrix')\n", + "plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/fabian_feature_analysis.ipynb.license b/notebooks/fabian_feature_analysis.ipynb.license new file mode 100644 index 0000000..72915b0 --- /dev/null +++ b/notebooks/fabian_feature_analysis.ipynb.license @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2023 Fabian-Paul Utech diff --git a/run_app.sh b/run_app.sh deleted file mode 100755 index 33f544c..0000000 --- a/run_app.sh +++ /dev/null @@ -1,25 +0,0 @@ -# SPDX-License-Identifier: MIT -# SPDX-FileCopyrightText: 2023 Felix Zailskas - -#!/bin/bash - -application_name="sumup_app" - -# Start building the docker command -command="docker run -i" - -# Read each line in the file -while IFS= read -r line; do - # Skip lines that start with '#' or are empty - if [[ $line =~ ^# ]] || [[ -z $line ]]; then - continue - fi - - # Append non-comment lines to the command - command+=" -e $line" -done < ".env" - -command+=" $application_name" - -# Run the command -eval $command diff --git a/src/create_geojson.py b/scripts/create_geojson.py similarity index 93% rename from src/create_geojson.py rename to scripts/create_geojson.py index daab877..9ef0260 100644 --- a/src/create_geojson.py +++ b/scripts/create_geojson.py @@ -7,7 +7,11 @@ import pandas as pd import regex as re -geojson_directory = "./src/data" +abspath = os.path.abspath(__file__) +dname = os.path.dirname(abspath) +os.chdir(dname) + +geojson_directory = "../src/data" # List all GeoJSON files in the directory geojson_files = [ diff --git a/src/evp/ml_model.py b/scripts/model_testing/ml_model.py similarity index 100% rename from src/evp/ml_model.py rename to scripts/model_testing/ml_model.py diff --git a/src/evp/nn_model.py b/scripts/model_testing/nn_model.py similarity index 100% rename from src/evp/nn_model.py rename to scripts/model_testing/nn_model.py diff --git a/src/evp/requirements.txt b/scripts/model_testing/requirements.txt similarity index 100% rename from src/evp/requirements.txt rename to scripts/model_testing/requirements.txt diff --git a/src/evp/sagemaker_training.py b/scripts/model_testing/sagemaker_training.py similarity index 100% rename from src/evp/sagemaker_training.py rename to scripts/model_testing/sagemaker_training.py diff --git a/src/report.py b/scripts/report.py similarity index 100% rename from src/report.py rename to scripts/report.py diff --git a/src/bdc/__init__.py b/src/bdc/__init__.py index 5d5bca2..b443ba0 100644 --- a/src/bdc/__init__.py +++ b/src/bdc/__init__.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: MIT # SPDX-FileCopyrightText: 2023 Ruchita Nathani -from .data_collector import DataCollector +from .pipeline import * +from .steps import * diff --git a/src/bdc/data_collector.py b/src/bdc/data_collector.py deleted file mode 100644 index 3802183..0000000 --- a/src/bdc/data_collector.py +++ /dev/null @@ -1,95 +0,0 @@ -# SPDX-License-Identifier: MIT -# SPDX-FileCopyrightText: 2023 Ruchita Nathani - -import csv -import json -import os -import random - -import requests - -from database.models import AnnualIncome, ProductOfInterest -from logger import get_logger - -log = get_logger() - - -class DataCollector: - # Limit API calls for testing - API_LIMIT = 10 - - def __init__(self): - self.data = [] - - def get_data_from_csv(self, file_path: str = "../data/sumup_leads_email.csv"): - """Retrieve information from the CSV file and utilize it in the Google API""" - self.data = [] - file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), file_path) - try: - with open(file_path, "r", encoding="utf8") as file: - csv_reader = csv.reader(file) - next(csv_reader) - - for row in csv_reader: - data_dict = { - "last_name": row[0], - "first_name": row[1], - "company_account": row[2], - "phone_number": row[3], - "email_address": row[4], - } - - self.data.append(data_dict) - log.info(f"Successfully read data from {file_path}") - except FileNotFoundError as e: - log.error(f"Error: Input file {file_path} for BDC not found.") - - return self.data - - def get_data_from_api(self, file_path: str = "../data/collected_data.json"): - """will utilize the data from the CSV file in the API key we are using, retrieve the necessary information from the API, and extract specific information that we need for the predictor. This relevant data will be stored in a JSON file.""" - api_url = "https://dummyjson.com/users" - try: - response = requests.get(api_url) - except Exception as e: - log.error("Error when fetching dummies") - return None - - if response.status_code == 200: - data = response.json() - file_path = os.path.join( - os.path.abspath(os.path.dirname(__file__)), - file_path, - ) - with open(file_path, "w") as json_file: - user_data = [] - for users in data["users"]: - data_dict = { - "lead_id": users["id"], - "first_name": users["firstName"], - "last_name": users["lastName"], - "phone_number": users["phone"], - "email_address": users["email"], - "company_address": users["company"]["address"]["address"], - "company_department": users["company"]["department"], - "company_name": users["company"]["name"], - "annual_income": random.randint(0, AnnualIncome.Class10.value), - "life_time_value": random.randint( - 0, AnnualIncome.Class10.value - ), - "customer_probability": random.random(), - "product_of_interest": random.choice(list(ProductOfInterest)), - } - - user_data.append(data_dict) - - json.dump(user_data, json_file, indent=4) - log.info( - f"Successfully fetched data from {api_url} and stored at {file_path}" - ) - return random.choice(user_data) - else: - log.warning( - f"Failed to fetch data from {api_url}. Status code: {response.status_code}" - ) - return None diff --git a/src/bdc/pipeline.py b/src/bdc/pipeline.py index 7764899..22f2294 100644 --- a/src/bdc/pipeline.py +++ b/src/bdc/pipeline.py @@ -52,7 +52,7 @@ def run(self): # cleanup step.finish() - except StepError as e: + except (StepError, Exception) as e: error_occurred = True log.error(f"Step {step.name} failed! {e}") finally: diff --git a/src/bdc/steps/__init__.py b/src/bdc/steps/__init__.py index 3150b5e..5736e45 100644 --- a/src/bdc/steps/__init__.py +++ b/src/bdc/steps/__init__.py @@ -1,14 +1,13 @@ # SPDX-License-Identifier: MIT # SPDX-FileCopyrightText: 2023 Lucca Baumgärtner -from .analyze_emails import AnalyzeEmails -from .analyze_reviews import GPTReviewSentimentAnalyzer, SmartReviewInsightsEnhancer -from .google_places import GooglePlaces -from .google_places_detailed import GooglePlacesDetailed -from .gpt_summarizer import GPTSummarizer -from .hash_generator import HashGenerator -from .preprocess_phonenumbers import PreprocessPhonenumbers -from .regionalatlas import RegionalAtlas -from .scrape_address import ScrapeAddress -from .search_offeneregister import SearchOffeneRegister -from .social_media_api import FacebookGraphAPI +from .analyze_emails import * +from .analyze_reviews import * +from .google_places import * +from .google_places_detailed import * +from .gpt_summarizer import * +from .hash_generator import * +from .preprocess_phonenumbers import * +from .regionalatlas import * +from .search_offeneregister import * +from .step import * diff --git a/src/bdc/steps/google_places.py b/src/bdc/steps/google_places.py index 96dea27..94dde24 100644 --- a/src/bdc/steps/google_places.py +++ b/src/bdc/steps/google_places.py @@ -228,7 +228,7 @@ def get_first_place_candidate(self, query, input_type) -> (dict, int): return None, 0 if not response["status"] == HTTPStatus.OK.name: - log.warning( + log.debug( f"Failed to fetch data. Status code: {response['status']}", ) return None, 0 diff --git a/src/bdc/steps/helpers/__init__.py b/src/bdc/steps/helpers/__init__.py index 8b04f93..31a6103 100644 --- a/src/bdc/steps/helpers/__init__.py +++ b/src/bdc/steps/helpers/__init__.py @@ -1,9 +1,9 @@ # SPDX-License-Identifier: MIT # SPDX-FileCopyrightText: 2023 Berkay Bozkurt -from .generate_hash_leads import LeadHashGenerator -from .offeneregister_api import OffeneRegisterAPI -from .text_analyzer import TextAnalyzer +from .generate_hash_leads import * +from .offeneregister_api import * +from .text_analyzer import * _lead_hash_generator = None diff --git a/src/bdc/steps/helpers/generate_hash_leads.py b/src/bdc/steps/helpers/generate_hash_leads.py index ea66c0f..4ab3ef0 100644 --- a/src/bdc/steps/helpers/generate_hash_leads.py +++ b/src/bdc/steps/helpers/generate_hash_leads.py @@ -7,7 +7,6 @@ import pandas as pd -from bdc.steps.step import Step from database import get_database from logger import get_logger @@ -56,12 +55,12 @@ def hash_check( if lead_hash in lookup_table: # If the hash exists in the lookup table, return the corresponding data - log.info(f"Hash {lead_hash} already exists in the lookup table.") + log.debug(f"Hash {lead_hash} already exists in the lookup table.") try: previous_data = lead_data[fields_tofill] return previous_data except KeyError as e: - log.info( + log.debug( f"Hash is present but data fields {fields_tofill} were not found." ) lookup_table[lead_hash] = lookup_table[lead_hash][:-1] + [ diff --git a/src/bdc/steps/regionalatlas.py b/src/bdc/steps/regionalatlas.py index 38b9613..e24f634 100644 --- a/src/bdc/steps/regionalatlas.py +++ b/src/bdc/steps/regionalatlas.py @@ -6,7 +6,6 @@ import geopandas as gpd import osmnx import pandas as pd -from geopandas.tools import sjoin from pandas import DataFrame from tqdm import tqdm @@ -144,13 +143,13 @@ def run(self) -> DataFrame: tqdm.pandas(desc="Computing Regional Score") - self.df[self.added_cols[:-1]] = self.df.progress_apply( + self.df[self.added_cols[-1:]] = self.df.progress_apply( lambda lead: pd.Series( get_lead_hash_generator().hash_check( lead, self.calculate_regional_score, self.name + "_Regional-Score", - self.added_cols[:-1], + self.added_cols[-1:], lead, ) ), diff --git a/src/models/.gitkeep b/src/data/models/.gitkeep similarity index 100% rename from src/models/.gitkeep rename to src/data/models/.gitkeep diff --git a/src/data/preprocessed_data_files/.gitkeep b/src/data/preprocessed_data_files/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/database/__init__.py b/src/database/__init__.py index 27d62e8..1c26196 100644 --- a/src/database/__init__.py +++ b/src/database/__init__.py @@ -4,8 +4,6 @@ from config import DATABASE_TYPE from logger import get_logger -from .database_dummy import DatabaseDummy -from .db_connection import mongo_connection from .leads import LocalRepository, Repository, S3Repository _database = None diff --git a/src/database/database_dummy.py b/src/database/database_dummy.py deleted file mode 100644 index 1bd61a9..0000000 --- a/src/database/database_dummy.py +++ /dev/null @@ -1,31 +0,0 @@ -# SPDX-License-Identifier: MIT -# SPDX-FileCopyrightText: 2023 Felix Zailskas - -from typing import List - -import pandas as pd - -from database.models import Lead -from database.parsers import LeadParser -from logger import get_logger - -log = get_logger() - - -class DatabaseDummy: - def __init__(self, input_file: str = "data/leads_enriched.csv") -> None: - self.file = input_file - self.leads = LeadParser.parse_leads_from_csv(self.file) - - def get_lead_by_id(self, id_: int) -> Lead: - return self.leads[id_] - - def get_all_leads(self) -> List[Lead]: - return self.leads - - def get_cardinality(self) -> int: - return len(self.leads) - - def update_lead(self, lead: Lead): - log.debug(f"Updating database entry for lead#{lead.lead_id}") - log.debug(f"Update values: {lead}") diff --git a/src/database/db_connection.py b/src/database/db_connection.py deleted file mode 100644 index f0d5d99..0000000 --- a/src/database/db_connection.py +++ /dev/null @@ -1,17 +0,0 @@ -# SPDX-License-Identifier: MIT -# SPDX-FileCopyrightText: 2023 Ruchita Nathani - -import pymongo - -from config import DB_CONNECTION - -_client = None - - -def mongo_connection(collection_name="default"): - global _client - if _client is None: - _client = pymongo.MongoClient(DB_CONNECTION) - db = _client["leads_enriched"] - collection = db[collection_name] - return collection diff --git a/src/database/leads/__init__.py b/src/database/leads/__init__.py index 7631206..72e0837 100644 --- a/src/database/leads/__init__.py +++ b/src/database/leads/__init__.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: MIT # SPDX-FileCopyrightText: 2023 Sophie Heasman -from .local_repository import LocalRepository -from .repository import Repository -from .s3_repository import S3Repository, decode_s3_url +from .local_repository import * +from .repository import * +from .s3_repository import * diff --git a/src/database/leads/local_repository.py b/src/database/leads/local_repository.py index ebeb90b..3bade62 100644 --- a/src/database/leads/local_repository.py +++ b/src/database/leads/local_repository.py @@ -4,7 +4,6 @@ import csv import json import os -from datetime import datetime from pathlib import Path import joblib @@ -25,9 +24,15 @@ class LocalRepository(Repository): DF_OUTPUT = os.path.abspath( os.path.join(BASE_PATH, "../../data/leads_enriched.csv") ) + DF_HISTORICAL_OUTPUT = os.path.abspath( + os.path.join(BASE_PATH, "../../data/100k_historic_enriched.csv") + ) DF_PREPROCESSED_INPUT = os.path.abspath( os.path.join(BASE_PATH, "../../data/preprocessed_data_files/") ) + DF_PREDICTION_OUTPUT = os.path.abspath( + os.path.join(BASE_PATH, "../../data/leads_predicted_size.csv") + ) REVIEWS = os.path.abspath(os.path.join(BASE_PATH, "../../data/reviews/")) SNAPSHOTS = os.path.abspath(os.path.join(BASE_PATH, "../../data/snapshots/")) GPT_RESULTS = os.path.abspath(os.path.join(BASE_PATH, "../../data/gpt-results/")) @@ -52,6 +57,13 @@ def save_dataframe(self): self.df.to_csv(self.DF_OUTPUT, index=False) log.info(f"Saved enriched data locally to {self.DF_OUTPUT}") + def save_prediction(self, df): + """ + Save dataframe in df parameter in chosen output location + """ + df.to_csv(self.DF_PREDICTION_OUTPUT, index=False) + log.info(f"Saved prediction result locally to {self.DF_PREDICTION_OUTPUT}") + def insert_data(self, data): """ TODO: Insert new data into specified dataframe @@ -69,7 +81,7 @@ def save_review(self, review, place_id, force_refresh=False): json_file_path = os.path.join(self.REVIEWS, file_name) if os.path.exists(json_file_path): - log.info(f"Reviews for {place_id} already exist") + log.debug(f"Reviews for {place_id} already exist") return with open(json_file_path, "w", encoding="utf-8") as json_file: @@ -197,6 +209,11 @@ def fetch_gpt_result(self, file_id, operation_name): try: with open(json_file_path, "r", encoding="utf-8") as json_file: data = json.load(json_file) + if operation_name not in data: + log.info( + f"Data for operation {operation_name} was not found in {json_file_path}" + ) + return "" return data[operation_name] except: log.warning(f"Error loading GPT results from path {json_file_path}.") @@ -249,8 +266,17 @@ def save_classification_report(self, report, model_name: str): except Exception as e: log.error(f"Could not save report at {report_file_path}! Error: {str(e)}") - def load_preprocessed_data(self, file_name: str = "preprocessed_data.csv"): + def get_preprocessed_data_path(self, historical: bool = True): + file_name = ( + "historical_preprocessed_data.csv" + if historical + else "preprocessed_data.csv" + ) + file_path = os.path.join(self.DF_PREPROCESSED_INPUT, file_name) + return file_path + + def load_preprocessed_data(self, historical: bool = True): try: - return pd.read_csv(os.path.join(self.DF_PREPROCESSED_INPUT, file_name)) + return pd.read_csv(self.get_preprocessed_data_path(historical)) except FileNotFoundError: log.error("Error: Could not find input file for preprocessed data.") diff --git a/src/database/leads/repository.py b/src/database/leads/repository.py index a44cc5b..90c9a78 100644 --- a/src/database/leads/repository.py +++ b/src/database/leads/repository.py @@ -18,12 +18,21 @@ def DF_INPUT(self): pass @property + @abstractmethod def DF_OUTPUT(self): """ Define database path to store dataframe """ pass + @property + @abstractmethod + def DF_HISTORICAL_OUTPUT(self): + """ + Define database path to store historical enriched dataframe (used for preprocessing input) + """ + pass + @property @abstractmethod def REVIEWS(self): @@ -65,7 +74,9 @@ def set_dataframe(self, df): def get_input_path(self): return self.DF_INPUT - def get_output_path(self): + def get_enriched_data_path(self, historical=False): + if historical: + return self.DF_HISTORICAL_OUTPUT return self.DF_OUTPUT @abstractmethod @@ -82,6 +93,13 @@ def save_dataframe(self): """ pass + @abstractmethod + def save_prediction(self, df): + """ + Save dataframe in df parameter in chosen output location + """ + pass + @abstractmethod def insert_data(self, data): """ @@ -221,7 +239,14 @@ def save_classification_report(self, report, model_name: str): pass @abstractmethod - def load_preprocessed_data(self, file_name: str): + def get_preprocessed_data_path(self, historical: bool = True): + """ + Returns the path for a preprocessed data file (either historical or current) + """ + pass + + @abstractmethod + def load_preprocessed_data(self, historical: bool = True): """ Load the preprocessed data from the given file """ diff --git a/src/database/leads/s3_repository.py b/src/database/leads/s3_repository.py index 4264ef4..912f5b3 100644 --- a/src/database/leads/s3_repository.py +++ b/src/database/leads/s3_repository.py @@ -43,6 +43,10 @@ class S3Repository(Repository): MODELS_BUCKET = "amos--models" DF_INPUT = f"s3://{EVENTS_BUCKET}/leads/enriched.csv" DF_OUTPUT = f"s3://{EVENTS_BUCKET}/leads/enriched.csv" + DF_HISTORICAL_OUTPUT = ( + f"s3://{EVENTS_BUCKET}/historical_data/100k_historic_enriched.csv" + ) + DF_PREDICTION_OUTPUT = f"s3://{EVENTS_BUCKET}/leads/leads_predicted_size.csv" DF_PREPROCESSED_INPUT = f"s3://{FEATURES_BUCKET}/preprocessed_data_files/" REVIEWS = f"s3://{EVENTS_BUCKET}/reviews/" SNAPSHOTS = f"s3://{EVENTS_BUCKET}/snapshots/" @@ -95,7 +99,7 @@ def _fetch_object_s3(self, bucket, obj_key): obj = s3.get_object(Bucket=bucket, Key=obj_key) except botocore.exceptions.ClientError as e: log.warning( - f"{e.response['Error']['Code']}: {e.response['Error']['Message']}" + f"{e.response['Error']['Code']}: {e.response['Error']['Message']} (s3://{bucket}/{obj_key})" if "Error" in e.response else f"Error while getting object s3://{bucket}/{obj_key}" ) @@ -131,6 +135,16 @@ def save_dataframe(self): self._save_to_s3(csv_buffer.getvalue(), bucket, obj_key) log.info(f"Successfully saved enriched leads to s3://{bucket}/{obj_key}") + def save_prediction(self, df): + """ + Save dataframe in df parameter in chosen output location + """ + bucket, obj_key = decode_s3_url(self.DF_PREDICTION_OUTPUT) + csv_buffer = StringIO() + df.to_csv(csv_buffer, index=False) + self._save_to_s3(csv_buffer.getvalue(), bucket, obj_key) + log.info(f"Successfully saved prediction result to s3://{bucket}/{obj_key}") + def _save_to_s3(self, data, bucket, key): s3.put_object( Bucket=bucket, @@ -209,8 +223,8 @@ def fetch_review(self, place_id): json_content = json.loads(file_content) return json_content except Exception as e: - log.error( - f"Error loading review from S3 with id {place_id}. Error: {str(e)}" + log.info( + f"No reviews in S3 for place with at s3://{bucket}/{key}. Error: {str(e)}" ) return [] @@ -374,13 +388,17 @@ def save_classification_report(self, report, model_name: str): except Exception as e: log.error(f"Could not save report for '{model_name}' to S3: {str(e)}") - def load_preprocessed_data(self, file_name: str = "preprocessed_data.csv"): + def get_preprocessed_data_path(self, historical: bool = True): + file_name = ( + "historical_preprocessed_data.csv" + if historical + else "preprocessed_data.csv" + ) file_path = self.DF_PREPROCESSED_INPUT + file_name - if not file_path.startswith("s3://"): - log.error( - "S3 location has to be defined like this: s3:///" - ) - return + return file_path + + def load_preprocessed_data(self, historical: bool = True): + file_path = self.get_preprocessed_data_path(historical) source = None remote_dataset = None diff --git a/src/database/models.py b/src/database/models.py deleted file mode 100644 index 2632e5d..0000000 --- a/src/database/models.py +++ /dev/null @@ -1,129 +0,0 @@ -# SPDX-License-Identifier: MIT -# SPDX-FileCopyrightText: 2023 Felix Zailskas - -from enum import Enum, IntEnum -from typing import Optional - -import numpy as np -from pydantic import BaseModel, EmailStr, Field -from sklearn.preprocessing import OneHotEncoder - - -class AnnualIncome(IntEnum): - Undefined = 0 # 0€ - Class1 = 1 # (0€, 35000€] - Class2 = 35001 # (35000€, 60000€] - Class3 = 60001 # (60000€, 100000€] - Class4 = 100001 # (100000€, 200000€] - Class5 = 200001 # (200000€, 400000€] - Class6 = 400001 # (400000€, 600000€] - Class7 = 600001 # (600000€, 1000000€] - Class8 = 1000001 # (1000000€, 2000000€] - Class9 = 2000001 # (2000000€, 5000000€] - Class10 = 5000001 # (5000000€, inf€] - - @classmethod - def _missing_(cls, value): - annual_income = cls.Undefined - for income_value in cls: - if value < income_value: - break - annual_income = income_value - return annual_income - - -class UserRatingsTotal(IntEnum): - Undefined = 0 - Class1 = 50 - Class2 = 100 - Class3 = 500 - Class4 = 1000 - Class5 = 10000 - - @classmethod - def _missing_(cls, value): - rating_total = cls.Undefined - for income_value in cls: - if value < income_value: - break - rating_total = income_value - return rating_total - - -class ProductOfInterest(str, Enum): - Undefined = "Undefined" - Nothing = "Nothing" - Terminals = "Terminals" - CashRegisterSystem = "Cash Register System" - BusinessAccount = "Business Account" - All = "All" - Other = "Other" - - @classmethod - def _missing_(cls, value): - return cls.Undefined - - -class BusinessStatus(str, Enum): - Undefined = "Undefined" - Operational = "OPERATIONAL" - ClosedTemporarily = "CLOSED_TEMPORARILY" - ClosedPermanently = "CLOSED_PERMANENTLY" - - @classmethod - def _missing_(cls, value): - return cls.Undefined - - -def encode_category(value, categories): - ohe = OneHotEncoder(sparse_output=False) - ohe.fit(np.array(categories).reshape(-1, 1)) - encoded = ohe.transform(np.array([value]).reshape(-1, 1)) - return encoded - - -class Lead(BaseModel): - lead_id: int # could be expended to a UUID later - first_name: str - last_name: str - email_address: str - phone_number: str - annual_income: Optional[AnnualIncome] - product_of_interest: Optional[ProductOfInterest] - lead_value: Optional[float] - domain: Optional[str] - number_valid: Optional[bool] - number_possible: Optional[bool] - google_places_business_status: Optional[BusinessStatus] - google_places_user_ratings_total: Optional[UserRatingsTotal] - - def to_one_hot_vector(self): - vector = np.array([]) - vector = np.append( - vector, - encode_category( - self.annual_income.value, [item.value for item in AnnualIncome] - ), - ) - vector = np.append( - vector, - encode_category( - self.product_of_interest.value, - [item.value for item in ProductOfInterest], - ), - ) - vector = np.append( - vector, np.array([int(self.domain is not None)]).astype(float) - ) - vector = np.append( - vector, - np.array([int(self.number_valid and self.number_possible)]).astype(float), - ) - vector = np.append( - vector, - encode_category( - self.google_places_business_status.value, - [item.value for item in BusinessStatus], - ), - ) - return vector diff --git a/src/database/parsers.py b/src/database/parsers.py deleted file mode 100644 index 9ce7e64..0000000 --- a/src/database/parsers.py +++ /dev/null @@ -1,73 +0,0 @@ -# SPDX-License-Identifier: MIT -# SPDX-FileCopyrightText: 2023 Felix Zailskas - -from typing import Dict, List - -import numpy as np -import pandas as pd - -from database.models import ( - AnnualIncome, - BusinessStatus, - Lead, - ProductOfInterest, - UserRatingsTotal, -) -from logger import get_logger - -log = get_logger() - - -class LeadParser: - @staticmethod - def parse_leads_from_csv(path: str) -> List[Lead]: - try: - data_df = pd.read_csv(path) - except FileNotFoundError: - log.error(f"Could not find {path} while parsing leads") - leads = data_df.apply( - lambda row: Lead( - lead_id=row.name, - first_name=row["First Name"], - last_name=row["Last Name"], - email_address=str(row["Email"]), - phone_number=str(row["Phone"]), - annual_income=AnnualIncome.Undefined, - product_of_interest=ProductOfInterest.Undefined, - lead_value=float(row["lead_value"]) if "lead_value" in row else None, - domain=row["domain"] if not pd.isna(row["domain"]) else None, - number_valid=row["number_valid"], - number_possible=row["number_possible"], - google_places_business_status=BusinessStatus( - row["google_places_business_status"] - ), - google_places_user_ratings_total=UserRatingsTotal( - row["google_places_user_ratings_total"] - ), - ), - axis=1, - ).to_list() - return leads - - @staticmethod - def parse_lead_from_dict(data: Dict) -> Lead: - print(data) - return Lead( - lead_id=data["lead_id"], - first_name=data["First Name"], - last_name=data["Last Name"], - email_address=str(data["Email"]), - phone_number=str(data["Phone"]), - annual_income=AnnualIncome.Undefined, - product_of_interest=ProductOfInterest.Undefined, - lead_value=float(data["lead_value"]) if "lead_value" in data else None, - domain=data["domain"] if not pd.isna(data["domain"]) else None, - number_valid=data["number_valid"], - number_possible=data["number_possible"], - google_places_business_status=BusinessStatus( - data["google_places_business_status"] - ), - google_places_user_ratings_total=UserRatingsTotal( - data["google_places_user_ratings_total"] - ), - ) diff --git a/src/demo/__init__.py b/src/demo/__init__.py index a03c078..2ce7a72 100644 --- a/src/demo/__init__.py +++ b/src/demo/__init__.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: MIT # SPDX-FileCopyrightText: 2023 Berkay Bozkurt -from .console_utils import get_int_input, get_multiple_choice, get_yes_no_input -from .demos import bdc_demo, db_demo, evp_demo, pipeline_demo, preprocessing_demo +from .console_utils import * +from .demos import * +from .pipeline_utils import * diff --git a/src/demo/console_utils.py b/src/demo/console_utils.py index b4911dc..ab9de27 100644 --- a/src/demo/console_utils.py +++ b/src/demo/console_utils.py @@ -87,7 +87,7 @@ def get_multiple_choice(prompt: str, choices: list) -> str: prompt += "".join( f"({index}) : {choice} \n" for index, choice in enumerate(choices) ) - ind = get_int_input(prompt, range(len(choices) + 1)) + ind = get_int_input(prompt, range(len(choices))) return choices[ind] except ValueError: print("Invalid input. Please enter a valid integer.") diff --git a/src/demo/demos.py b/src/demo/demos.py index 0ad094b..4d90acc 100644 --- a/src/demo/demos.py +++ b/src/demo/demos.py @@ -7,10 +7,15 @@ # SPDX-FileCopyrightText: 2023 Ahmed Sheta +import re +import warnings + +import pandas as pd +import xgboost as xgb from sklearn.metrics import classification_report -from bdc import DataCollector from bdc.pipeline import Pipeline +from config import DATABASE_TYPE from database import get_database from demo.console_utils import ( get_int_input, @@ -29,6 +34,10 @@ from logger import get_logger from preprocessing import Preprocessing +warnings.simplefilter(action="ignore", category=pd.errors.PerformanceWarning) +warnings.simplefilter(action="ignore", category=FutureWarning) + + log = get_logger() # Constants and configurations @@ -38,21 +47,6 @@ OUTPUT_FILE_BDC = "../data/collected_data.json" -# bdc_demo -def bdc_demo(): - dc = DataCollector() - try: - choice = get_int_input("(1) Read CSV\n(2) Dummy API\n", range(1, 3)) - if choice == 1: - dc.get_data_from_csv(file_path=INPUT_FILE_BDC) - elif choice == 2: - dc.get_data_from_api(file_path=OUTPUT_FILE_BDC) - else: - print("Invalid choice") - except ValueError: - print("Invalid choice") - - # evp demo def evp_demo(): data = get_database().load_preprocessed_data() @@ -145,18 +139,6 @@ def predict_single_lead(evp: EstimatedValuePredictor): print("Invalid Choice") -# db_demo -def db_demo(): - amt_leads = get_database().get_cardinality() - lead_id = get_int_input( - f"Choose a lead_id in range [1, {amt_leads}]\n", range(1, amt_leads + 1) - ) - if 1 <= lead_id <= amt_leads: - print(get_database().get_lead_by_id(lead_id)) - else: - print("Invalid Choice") - - def add_step_if_requested(steps, step_class, step_desc, step_warning_message: str = ""): if get_yes_no_input(f"Run {step_desc} {step_warning_message}(y/N)?\n"): force = get_yes_no_input("Force execution if data is present? (y/N)\n") @@ -225,7 +207,7 @@ def pipeline_demo(): steps_info = "\n".join([str(step) for step in steps]) log.info( - f"Running Pipeline with steps:\n{steps_info}\ninput_location={get_database().get_input_path()}\noutput_location={get_database().get_output_path()}" + f"Running Pipeline with steps:\n{steps_info}\ninput_location={get_database().get_input_path()}\noutput_location={get_database().get_enriched_data_path()}" ) pipeline = Pipeline( @@ -237,18 +219,138 @@ def pipeline_demo(): def preprocessing_demo(): - if get_yes_no_input("Filter out the API-irrelevant data? (y/n)"): + if get_yes_no_input("Filter out the API-irrelevant data? (y/n)\n"): filter_bool = True else: filter_bool = False if get_yes_no_input( - "Run on historical data ? (y/n)\nNote: DATABASE_TYPE should be S3!" + "Run on historical data ? (y/n)\n'n' means it will run on lead data!\n" ): historical_bool = True else: historical_bool = False + preprocessor = Preprocessing( - filter_null_data=filter_bool, historical_data=historical_bool + filter_null_data=filter_bool, historical_bool=historical_bool ) + + preprocessor.preprocessed_df = pd.read_csv(preprocessor.data_path) + df = preprocessor.implement_preprocessing_pipeline() preprocessor.save_preprocessed_data() + + +def predict_MerchantSize_on_lead_data_demo(): + import os + import sys + + import pandas as pd + + log.info( + "Note: In case of running locally, enriched data must be located at src/data/leads_enriched.csv\nIn case of running on S3, enriched data must be located at s3://amos--data--events/leads/enriched.csv" + ) + + ######################### preprocessing the leads ################################## + S3_bool = DATABASE_TYPE == "S3" + current_dir = os.path.dirname(__file__) if "__file__" in locals() else os.getcwd() + parent_dir = os.path.join(current_dir, "..") + sys.path.append(parent_dir) + from database import get_database + from preprocessing import Preprocessing + + db = get_database() + + log.info(f"Preprocessing the leads...") + preprocessor = Preprocessing(filter_null_data=False, historical_bool=False) + preprocessor.preprocessed_df = pd.read_csv(preprocessor.data_path) + df = preprocessor.implement_preprocessing_pipeline() + preprocessor.save_preprocessed_data() + + ############################## adapting the preprocessing files ########################### + log.info(f"Adapting the leads' preprocessed data for the ML model...") + # load the data from the CSV files + historical_preprocessed_data = db.load_preprocessed_data(historical=True) + unlabeled_preprocessed_data = db.load_preprocessed_data(historical=False) + + historical_columns_order = historical_preprocessed_data.columns + + missing_columns = set(historical_columns_order) - set( + unlabeled_preprocessed_data.columns + ) + unlabeled_preprocessed_data[list(missing_columns)] = 0 + + for column in unlabeled_preprocessed_data.columns: + if column not in historical_columns_order: + unlabeled_preprocessed_data = unlabeled_preprocessed_data.drop( + column, axis=1 + ) + + # reorder columns + unlabeled_preprocessed_data = unlabeled_preprocessed_data[historical_columns_order] + unlabeled_preprocessed_data.to_csv( + preprocessor.preprocessed_data_output_path, + index=False, + ) + log.info( + f"Saving the adapted preprocessed data at {preprocessor.preprocessed_data_output_path}" + ) + + # check if columns in both dataframe are in same order and same number + assert list(unlabeled_preprocessed_data.columns) == list( + historical_preprocessed_data.columns + ), "Column names are different" + + ####################### Applying ML model on lead data #################################### + + bucket_name = "amos--models" + + if S3_bool: + model_name = get_string_input( + "Provide model file name in amos--models/models S3 Bucket\nInput example: lightgbm_epochs(1)_f1(0.6375)_numclasses(5)_model.pkl\n" + ) + else: + model_name = get_string_input( + "Provide model file name in data/models local directory\nInput example: lightgbm_epochs(1)_f1(0.6375)_numclasses(5)_model.pkl\n" + ) + model_name = model_name.strip() + xgb_bool = False + if model_name.lower().startswith("xgb"): + xgb_bool = True + + def check_classification_task(string): + match = re.search(r"numclasses\((\d+)\)", string) + if match: + last_number = int(match.group(1)) + if last_number == 3: + return True + else: + False + + classification_task_3 = check_classification_task(model_name) + + try: + model = db.load_ml_model(model_name) + log.info(f"Loaded the model {model_name}!") + except: + log.error("No model found with the given name!") + return + + df = pd.read_csv(preprocessor.preprocessed_data_output_path) + input = df.drop("MerchantSizeByDPV", axis=1) + if xgb_bool: + input = xgb.DMatrix(input) + + predictions = model.predict(input) + if classification_task_3: + size_mapping = {0: "XS", 1: "{S, M, L}", 2: "XL"} + else: + size_mapping = {0: "XS", 1: "S", 2: "M", 3: "L", 4: "XL"} + remapped_predictions = [size_mapping[prediction] for prediction in predictions] + + enriched_data = pd.read_csv(preprocessor.data_path) + + # first 5 columns: Last Name,First Name,Company / Account,Phone,Email, + raw_data = enriched_data.iloc[:, :5] + raw_data["PredictedMerchantSize"] = remapped_predictions + + db.save_prediction(raw_data) diff --git a/src/demo/pipeline_configs/config_sprint09_release.json b/src/demo/pipeline_configs/config_sprint09_release.json index a6ff45b..f726661 100644 --- a/src/demo/pipeline_configs/config_sprint09_release.json +++ b/src/demo/pipeline_configs/config_sprint09_release.json @@ -6,14 +6,6 @@ "name": "AnalyzeEmails", "force_refresh": true }, - { - "name": "ScrapeAddress", - "force_refresh": true - }, - { - "name": "FacebookGraphAPI", - "force_refresh": true - }, { "name": "PreprocessPhonenumbers", "force_refresh": true diff --git a/src/demo/pipeline_configs/config_template b/src/demo/pipeline_configs/config_template index fb505d2..9fc5eb1 100644 --- a/src/demo/pipeline_configs/config_template +++ b/src/demo/pipeline_configs/config_template @@ -7,14 +7,6 @@ "name": "AnalyzeEmails", "force_refresh": true }, - { - "name": "ScrapeAddress", - "force_refresh": true - }, - { - "name": "FacebookGraphAPI", - "force_refresh": true - }, { "name": "PreprocessPhonenumbers", "force_refresh": true diff --git a/src/demo/pipeline_configs/demo_pipeline.json b/src/demo/pipeline_configs/demo_pipeline.json new file mode 100644 index 0000000..85df118 --- /dev/null +++ b/src/demo/pipeline_configs/demo_pipeline.json @@ -0,0 +1,39 @@ +{ + "description": "This config is optimized for demoing our software.", + "config": { + "steps": [ + { + "name": "HashGenerator", + "force_refresh": true + }, + { + "name": "AnalyzeEmails", + "force_refresh": true + }, + { + "name": "PreprocessPhonenumbers", + "force_refresh": true + }, + { + "name": "GooglePlaces", + "force_refresh": true + }, + { + "name": "GooglePlacesDetailed", + "force_refresh": true + }, + { + "name": "GPTReviewSentimentAnalyzer", + "force_refresh": true + }, + { + "name": "SmartReviewInsightsEnhancer", + "force_refresh": true + }, + { + "name": "RegionalAtlas", + "force_refresh": true + } + ] + } +} diff --git a/src/demo/pipeline_configs/demo_pipeline.json.license b/src/demo/pipeline_configs/demo_pipeline.json.license new file mode 100644 index 0000000..875941a --- /dev/null +++ b/src/demo/pipeline_configs/demo_pipeline.json.license @@ -0,0 +1,2 @@ +SPDX-License-Identifier: MIT +SPDX-FileCopyrightText: 2024 Simon Zimmermann diff --git a/src/demo/pipeline_configs/force_refresh_all_steps.json b/src/demo/pipeline_configs/force_refresh_all_steps.json new file mode 100644 index 0000000..8356533 --- /dev/null +++ b/src/demo/pipeline_configs/force_refresh_all_steps.json @@ -0,0 +1,43 @@ +{ + "description": "This config runs all steps with force_refresh set to true.", + "config": { + "steps": [ + { + "name": "HashGenerator", + "force_refresh": true + }, + { + "name": "AnalyzeEmails", + "force_refresh": true + }, + { + "name": "PreprocessPhonenumbers", + "force_refresh": true + }, + { + "name": "GooglePlaces", + "force_refresh": true + }, + { + "name": "GooglePlacesDetailed", + "force_refresh": true + }, + { + "name": "GPTReviewSentimentAnalyzer", + "force_refresh": true + }, + { + "name": "GPTSummarizer", + "force_refresh": true + }, + { + "name": "SmartReviewInsightsEnhancer", + "force_refresh": true + }, + { + "name": "RegionalAtlas", + "force_refresh": true + } + ] + } +} diff --git a/src/demo/pipeline_configs/force_refresh_all_steps.json.license b/src/demo/pipeline_configs/force_refresh_all_steps.json.license new file mode 100644 index 0000000..f079a3f --- /dev/null +++ b/src/demo/pipeline_configs/force_refresh_all_steps.json.license @@ -0,0 +1,2 @@ +SPDX-License-Identifier: MIT +SPDX-FileCopyrightText: 2023 Berkay Bozkurt diff --git a/src/demo/pipeline_configs/regionalatlas_only.json b/src/demo/pipeline_configs/regionalatlas_only.json new file mode 100644 index 0000000..16c15eb --- /dev/null +++ b/src/demo/pipeline_configs/regionalatlas_only.json @@ -0,0 +1,27 @@ +{ + "description": "This config runs all steps with force_refresh set to true.", + "config": { + "steps": [ + { + "name": "HashGenerator", + "force_refresh": true + }, + { + "name": "AnalyzeEmails", + "force_refresh": true + }, + { + "name": "PreprocessPhonenumbers", + "force_refresh": true + }, + { + "name": "GooglePlaces", + "force_refresh": true + }, + { + "name": "RegionalAtlas", + "force_refresh": true + } + ] + } +} diff --git a/src/demo/pipeline_configs/regionalatlas_only.json.license b/src/demo/pipeline_configs/regionalatlas_only.json.license new file mode 100644 index 0000000..4ff3a64 --- /dev/null +++ b/src/demo/pipeline_configs/regionalatlas_only.json.license @@ -0,0 +1,2 @@ +SPDX-License-Identifier: MIT +SPDX-FileCopyrightText: 2023 Lucca Baumgärtner diff --git a/src/demo/pipeline_configs/run_all_steps.json b/src/demo/pipeline_configs/run_all_steps.json index 139a8d8..f694adb 100644 --- a/src/demo/pipeline_configs/run_all_steps.json +++ b/src/demo/pipeline_configs/run_all_steps.json @@ -1,50 +1,42 @@ { - "description": "This config runs all steps with force_refresh set to true.", + "description": "This config runs all steps with force_refresh set to false.", "config": { "steps": [ { "name": "HashGenerator", - "force_refresh": true + "force_refresh": false }, { "name": "AnalyzeEmails", - "force_refresh": true - }, - { - "name": "ScrapeAddress", - "force_refresh": true - }, - { - "name": "FacebookGraphAPI", - "force_refresh": true + "force_refresh": false }, { "name": "PreprocessPhonenumbers", - "force_refresh": true + "force_refresh": false }, { "name": "GooglePlaces", - "force_refresh": true + "force_refresh": false }, { "name": "GooglePlacesDetailed", - "force_refresh": true + "force_refresh": false }, { "name": "GPTReviewSentimentAnalyzer", - "force_refresh": true + "force_refresh": false }, { "name": "GPTSummarizer", - "force_refresh": true + "force_refresh": false }, { "name": "SmartReviewInsightsEnhancer", - "force_refresh": true + "force_refresh": false }, { "name": "RegionalAtlas", - "force_refresh": true + "force_refresh": false } ] } diff --git a/src/demo/pipeline_utils.py b/src/demo/pipeline_utils.py index c7120c7..d95435a 100644 --- a/src/demo/pipeline_utils.py +++ b/src/demo/pipeline_utils.py @@ -10,7 +10,6 @@ from bdc.steps import ( AnalyzeEmails, - FacebookGraphAPI, GooglePlaces, GooglePlacesDetailed, GPTReviewSentimentAnalyzer, @@ -18,7 +17,6 @@ HashGenerator, PreprocessPhonenumbers, RegionalAtlas, - ScrapeAddress, SearchOffeneRegister, SmartReviewInsightsEnhancer, ) @@ -28,23 +26,19 @@ STEP_STR_TO_CLASS = { "HashGenerator": HashGenerator, "AnalyzeEmails": AnalyzeEmails, - "FacebookGraphAPI": FacebookGraphAPI, "GooglePlaces": GooglePlaces, "GooglePlacesDetailed": GooglePlacesDetailed, "GPTReviewSentimentAnalyzer": GPTReviewSentimentAnalyzer, "GPTSummarizer": GPTSummarizer, "PreprocessPhonenumbers": PreprocessPhonenumbers, "RegionalAtlas": RegionalAtlas, - "ScrapeAddress": ScrapeAddress, "SearchOffeneRegister": SearchOffeneRegister, "SmartReviewInsightsEnhancer": SmartReviewInsightsEnhancer, } # Please do not write following lists! Use the functions below instead. _additional_pipeline_steps = [ - (ScrapeAddress, "Scrape Address", "(will take a long time)"), (SearchOffeneRegister, "Search OffeneRegister", "(will take a long time)"), - (FacebookGraphAPI, "Facebook Graph API", "(will use token)"), (PreprocessPhonenumbers, "Phone Number Validation", ""), ( GooglePlaces, diff --git a/src/docs/.gitkeep b/src/docs/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/docs/Makefile b/src/docs/Makefile new file mode 100644 index 0000000..da3682c --- /dev/null +++ b/src/docs/Makefile @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2023 Berkay Bozkurt + +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/src/docs/conf.py b/src/docs/conf.py new file mode 100644 index 0000000..0288591 --- /dev/null +++ b/src/docs/conf.py @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2023 Berkay Bozkurt + +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +import os +import sys + +sys.path.insert(0, os.path.abspath("..")) + +project = "Sales Lead Qualifier" +copyright = "2024, SumInsights" +author = "SumInsights" +release = "01.00.00" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.viewcode", + "sphinx.ext.napoleon", + "myst_parser", +] + +templates_path = ["_templates"] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "sphinx_rtd_theme" +html_static_path = ["_static"] diff --git a/src/docs/index.rst b/src/docs/index.rst new file mode 100644 index 0000000..ad0ace2 --- /dev/null +++ b/src/docs/index.rst @@ -0,0 +1,24 @@ + .. SPDX-License-Identifier: MIT + SPDX-FileCopyrightText: 2023 Berkay Bozkurt + +.. Sales Lead Qualifier documentation master file, created by + sphinx-quickstart on Sat Feb 3 02:33:45 2024. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to Sales Lead Qualifier's documentation! +================================================ + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + readme_link + modules + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/src/docs/make.bat b/src/docs/make.bat new file mode 100644 index 0000000..000a0f0 --- /dev/null +++ b/src/docs/make.bat @@ -0,0 +1,37 @@ +:: # SPDX-License-Identifier: MIT +:: # SPDX-FileCopyrightText: 2023 Berkay Bozkurt +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/src/docs/readme_link.md b/src/docs/readme_link.md new file mode 100644 index 0000000..e5f876e --- /dev/null +++ b/src/docs/readme_link.md @@ -0,0 +1,8 @@ + + +```{include} ../../README.md + +``` diff --git a/src/evp/__init__.py b/src/evp/__init__.py index dd4f024..5259ee9 100644 --- a/src/evp/__init__.py +++ b/src/evp/__init__.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: MIT # SPDX-FileCopyrightText: 2023 Felix Zailskas -from .evp import EstimatedValuePredictor +from .evp import * +from .predictors import * diff --git a/src/evp/data_processing.py b/src/evp/data_processing.py deleted file mode 100644 index ecfc7e3..0000000 --- a/src/evp/data_processing.py +++ /dev/null @@ -1,48 +0,0 @@ -# SPDX-License-Identifier: MIT -# SPDX-FileCopyrightText: 2023 Felix Zailskas - -import numpy as np -import pandas as pd -from sklearn.model_selection import train_test_split - -from logger import get_logger - -log = get_logger() - - -def split_dataset( - in_path: str, - out_path: str, - train_size: float, - val_size: float, - test_size: float, - add_labels: bool = False, -): - valid_sizes = train_size + val_size + test_size == 1 - if not valid_sizes: - log.error( - "Invalid size combination. Training, validation and test size must add to 1" - ) - return None - try: - full_df = pd.read_csv(in_path, index_col=None) - if add_labels: - full_df["lead_value"] = np.random.uniform( - low=1000, high=1000000, size=len(full_df) - ) - except FileNotFoundError: - log.error(f"Could not find {in_path} splitting data") - return - relative_val_size = val_size / (1 - test_size) - train_val_df, test_df = train_test_split( - full_df, - test_size=test_size, - ) - train_df, val_df = train_test_split(train_val_df, test_size=relative_val_size) - train_df = train_df.reset_index(drop=True) - val_df = val_df.reset_index(drop=True) - test_df = test_df.reset_index(drop=True) - train_df.to_csv(f"{out_path}_train.csv") - val_df.to_csv(f"{out_path}_val.csv") - test_df.to_csv(f"{out_path}_test.csv") - return train_df, val_df, test_df diff --git a/src/logger/__init__.py b/src/logger/__init__.py index 7eb19e1..5cc57bd 100644 --- a/src/logger/__init__.py +++ b/src/logger/__init__.py @@ -3,7 +3,7 @@ import os -from .logger import CustomLogger +from .logger import * _logger = None diff --git a/src/logger/logger.py b/src/logger/logger.py index ccd9eb1..fe08296 100644 --- a/src/logger/logger.py +++ b/src/logger/logger.py @@ -54,7 +54,7 @@ def __init__(self, name, log_dir=None): # Create stream handler for logging to stdout (log all five levels) self.stdout_handler = logging.StreamHandler(sys.stdout) - self.stdout_handler.setLevel(logging.DEBUG) + self.stdout_handler.setLevel(logging.INFO) self.stdout_handler.setFormatter(StdOutFormatter()) self.enable_console_output() diff --git a/src/main.py b/src/main.py index 49cf73e..33300ba 100644 --- a/src/main.py +++ b/src/main.py @@ -4,11 +4,10 @@ import os from demo import ( - bdc_demo, - db_demo, evp_demo, get_multiple_choice, pipeline_demo, + predict_MerchantSize_on_lead_data_demo, preprocessing_demo, ) from logger import get_logger @@ -20,11 +19,10 @@ log = get_logger() DEMOS = { - "BDC": bdc_demo, - "EVP": evp_demo, - "DB": db_demo, - "Pipeline": pipeline_demo, + "Base Data Collector": pipeline_demo, "Data preprocessing": preprocessing_demo, + "ML model training": evp_demo, + "Merchant Size Predictor": predict_MerchantSize_on_lead_data_demo, } PROMPT = "Choose demo:\n" @@ -33,10 +31,8 @@ if __name__ == "__main__": options = list(DEMOS.keys()) + [EXIT] while True: - try: - choice = get_multiple_choice(PROMPT, options) - if choice == EXIT: - break + choice = get_multiple_choice(PROMPT, options) + if choice == EXIT: + break + if choice != None: DEMOS[choice]() - except ValueError: - print("Invalid choice") diff --git a/src/preprocessing/__init__.py b/src/preprocessing/__init__.py index 9acc755..cb29c38 100644 --- a/src/preprocessing/__init__.py +++ b/src/preprocessing/__init__.py @@ -1,4 +1,4 @@ # SPDX-License-Identifier: MIT # SPDX-FileCopyrightText: 2023 Ahmed Sheta -from .preprocessing import Preprocessing +from .preprocessing import * diff --git a/src/preprocessing/preprocessing.py b/src/preprocessing/preprocessing.py index 9f11a10..aa91935 100644 --- a/src/preprocessing/preprocessing.py +++ b/src/preprocessing/preprocessing.py @@ -29,30 +29,13 @@ class Preprocessing: - def __init__(self, filter_null_data=True, historical_data=False): + def __init__(self, filter_null_data=True, historical_bool=True): data_repo = get_database() - data_path = data_repo.get_output_path() - if historical_data: - input_path_components = data_path.split("\\" if "\\" in data_path else "/") - input_path_components.pop() - input_path_components.pop() - input_path_components.append("historical_data/100k_historic_enriched.csv") - input_path = "/".join(input_path_components) - data = pd.read_csv(input_path) - log.debug(f"Data path = {input_path}") - else: - log.debug(f"Data path = {data_path}") - data = pd.read_csv(data_path) - self.preprocessed_df = data.copy() - - if historical_data: - self.prerocessed_data_output_path = "s3://amos--data--features/preprocessed_data_files/preprocessed_data.csv" - else: - # created the new output path based on which repo used - path_components = data_path.split("\\" if "\\" in data_path else "/") - path_components.pop() - path_components.append("preprocessed_data.csv") - self.prerocessed_data_output_path = "/".join(path_components) + self.data_path = data_repo.get_enriched_data_path(historical=historical_bool) + self.preprocessed_df = None + self.preprocessed_data_output_path = data_repo.get_preprocessed_data_path( + historical_bool + ) self.filter_bool = filter_null_data # columns that would be added later after one-hot encoding each class @@ -110,7 +93,10 @@ def filter_out_null_data(self): ] def fill_missing_values(self, column, strategy="constant"): - if column in self.preprocessed_df.columns: + if ( + column in self.preprocessed_df.columns + and not self.preprocessed_df[column].empty + ): imputer = SimpleImputer(strategy=strategy) self.preprocessed_df[column] = imputer.fit_transform( self.preprocessed_df[[column]] @@ -261,9 +247,9 @@ def save_preprocessed_data(self): except ValueError as e: log.error(f"Failed to save the selected columns for preprocessing! {e}") try: - selected_df.to_csv(self.prerocessed_data_output_path, index=False) + selected_df.to_csv(self.preprocessed_data_output_path, index=False) log.info( - f"Preprocessed dataframe of shape {self.preprocessed_df.shape} is saved at {self.prerocessed_data_output_path}" + f"Preprocessed dataframe of shape {self.preprocessed_df.shape} is saved at {self.preprocessed_data_output_path}" ) except ValueError as e: log.error(f"Failed to save preprocessed data file! {e}") diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..c93c73d --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2024 Felix Zailskas + +import pandas as pd + + +def mock_hash_check( + self, + lead_data: pd.Series, + data_fill_function: callable, + step_name: str, + fields_tofill: list[str], + *args, + **kwargs, +): + return data_fill_function(*args, **kwargs) diff --git a/tests/conftest.py b/tests/conftest.py index 33dbbcd..5a15f67 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,33 +1,2 @@ # SPDX-License-Identifier: MIT # SPDX-FileCopyrightText: 2023 Felix Zailskas - -from typing import Dict - -import pytest -from mock_components import get_database_mock - - -@pytest.fixture -def mock_database(): - import database - - database._database = get_database_mock() - yield database.get_database() - database._database = None - - -@pytest.fixture -def create_lead_dict(request) -> Dict: - lead_value_adjustments = request.param - lead_data = { - "lead_id": 0, - "annual_income": 0, - "product_of_interest": "Nothing", - "first_name": "Manu", - "last_name": "Musterperson", - "phone_number": "49123123123", - "email_address": "test@test.de", - } - for key, value in lead_value_adjustments.items(): - lead_data[key] = value - yield lead_data diff --git a/tests/mock_components.py b/tests/mock_components.py deleted file mode 100644 index 393dc1e..0000000 --- a/tests/mock_components.py +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-License-Identifier: MIT -# SPDX-FileCopyrightText: 2023 Felix Zailskas - -from database import DatabaseDummy - - -def get_database_mock(): - return DatabaseDummy("tests/test_data/database_dummies.csv") diff --git a/tests/steps/test_analyze_emails.py b/tests/steps/test_analyze_emails.py new file mode 100644 index 0000000..2718bdc --- /dev/null +++ b/tests/steps/test_analyze_emails.py @@ -0,0 +1,123 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2024 Felix Zailskas + +import unittest +from unittest.mock import MagicMock, patch + +import pandas as pd + +from bdc.steps.analyze_emails import ( + AnalyzeEmails, + analyze_email_account, + extract_custom_domain, +) +from bdc.steps.helpers.generate_hash_leads import LeadHashGenerator +from tests import mock_hash_check + + +class TestExtractCustomDomain(unittest.TestCase): + def test_valid_email(self): + email = "user@example.com" + result = extract_custom_domain(email) + expected = pd.Series(["example.com", True]) + self.assertTrue(result.equals(expected)) + + def test_invalid_email(self): + email = "invalid_email" + result = extract_custom_domain(email) + expected = pd.Series([None, False]) + self.assertTrue(result.equals(expected)) + + def test_email_with_subdomain(self): + email = "user@sub.example.com" + result = extract_custom_domain(email) + expected = pd.Series(["sub.example.com", True]) + self.assertTrue(result.equals(expected)) + + def test_empty_email(self): + email = "" + result = extract_custom_domain(email) + expected = pd.Series([None, False]) + self.assertTrue(result.equals(expected)) + + +class TestAnalyzeEmailAccount(unittest.TestCase): + def _init_lead(self, Email: str, email_valid: bool): + lead = { + "First Name": "John", + "Last Name": "Doe", + "Email": Email, + "email_valid": email_valid, + } + return lead + + def test_valid_email_account(self): + lead = self._init_lead(Email="john.doe@example.com", email_valid=True) + result = analyze_email_account(lead) + expected = pd.Series([True, True]) + self.assertTrue(result.equals(expected)) + + def test_invalid_email_account(self): + lead = self._init_lead(Email="invalid_email", email_valid=False) + result = analyze_email_account(lead) + expected = pd.Series([False, False]) + self.assertTrue(result.equals(expected)) + + def test_missing_first_name(self): + lead = self._init_lead(Email="john@example.com", email_valid=True) + result = analyze_email_account(lead) + expected = pd.Series([True, False]) + self.assertTrue(result.equals(expected)) + + def test_missing_last_name(self): + lead = self._init_lead(Email="doe123@example.com", email_valid=True) + result = analyze_email_account(lead) + expected = pd.Series([False, True]) + self.assertTrue(result.equals(expected)) + + def test_missing_names(self): + lead = self._init_lead(Email="user@example.com", email_valid=True) + lead = {"Email": "user@example.com", "email_valid": True} + result = analyze_email_account(lead) + expected = pd.Series([False, False]) + self.assertTrue(result.equals(expected)) + + +class TestStepExecution(unittest.TestCase): + step: AnalyzeEmails + + def setUp(self): + lead_data = { + "First Name": ["John"] * 3, + "Last Name": ["Doe"] * 3, + "Email": [ + "john.doe@john.com", + "invalid_email", + "john@yahoo.com", + ], + } + self.step = AnalyzeEmails(force_refresh=True) + self.step.df = pd.DataFrame(lead_data) + + @patch.object(LeadHashGenerator, "hash_check", mock_hash_check) + def test_run_method(self): + result = self.step.run() + assert type(result) is pd.DataFrame + columns = result.columns.to_list() + assert all( + col in columns + for col in [ + "First Name", + "Last Name", + "Email", + "domain", + "email_valid", + "first_name_in_account", + "last_name_in_account", + ] + ) + assert result["domain"].to_list() == ["john.com", None, None] + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/steps/test_hash_generator.py b/tests/steps/test_hash_generator.py new file mode 100644 index 0000000..f9b7b56 --- /dev/null +++ b/tests/steps/test_hash_generator.py @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2024 Felix Zailskas + +import hashlib +import unittest + +import pandas as pd + +from bdc.steps.hash_generator import HashGenerator + + +class TestStepExecution(unittest.TestCase): + def setUp(self): + self.lead_data = { + "First Name": ["John"], + "Last Name": ["Doe"], + "Company / Account": ["ABC Corp"], + "Phone": ["+4912345678"], + "Email": ["john.doe@john.com"], + } + self.step = HashGenerator(force_refresh=True) + self.step.df = pd.DataFrame(self.lead_data) + + def test_hash_lead(self): + # Calculate the expected hash manually based on the data + expected_hash = hashlib.sha256( + ("John" + "Doe" + "ABC Corp" + "+4912345678" + "john.doe@john.com").encode() + ).hexdigest() + + # Call the hash_lead method with the sample data + result = self.step.run() + + # Assert that the actual hash matches the expected hash + assert type(result) is pd.DataFrame + columns = result.columns.to_list() + assert all( + col in columns + for col in [ + "First Name", + "Last Name", + "Email", + "Company / Account", + "Phone", + "lead_hash", + ] + ) + self.assertEqual(result.iloc[0]["lead_hash"], expected_hash) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/steps/test_preprocess_phonenumbers.py b/tests/steps/test_preprocess_phonenumbers.py new file mode 100644 index 0000000..f4f529b --- /dev/null +++ b/tests/steps/test_preprocess_phonenumbers.py @@ -0,0 +1,114 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2024 Felix Zailskas + +import unittest +from unittest.mock import patch + +import pandas as pd + +from bdc.steps.helpers.generate_hash_leads import LeadHashGenerator +from bdc.steps.preprocess_phonenumbers import PreprocessPhonenumbers +from tests import mock_hash_check + + +class TestStepExecution(unittest.TestCase): + def setUp(self): + self.lead_data = { + "First Name": ["John"] * 7, + "Last Name": ["Doe"] * 7, + "Phone": [ + "4930183992170", + "invalid_phone", + "442087599036", + "3197010281402", + "436601359011", + "33757056600", + "495111233421", + ], + } + self.step = PreprocessPhonenumbers(force_refresh=True) + self.step.df = pd.DataFrame(self.lead_data) + self.formatted_gt = [ + "+49 30 183992170", + "", + "+44 20 8759 9036", + "+31 970 102 81402", + "+43 660 1359011", + "+33 7 57 05 66 00", + "+49 511 1233421", + ] + self.country_gt = [ + "Germany", + "", + "United Kingdom", + "Netherlands", + "Austria", + "France", + "Germany", + ] + self.area_gt = [ + "Berlin", + "", + "London", + "", + "", + "", + "Hannover", + ] + self.valid_gt = [ + True, + False, + True, + True, + True, + True, + True, + ] + self.possible_gt = [ + True, + False, + True, + True, + True, + True, + True, + ] + + @patch.object(LeadHashGenerator, "hash_check", mock_hash_check) + def test_hash_lead(self): + result = self.step.run() + + assert type(result) is pd.DataFrame + columns = result.columns.to_list() + assert all( + col in columns + for col in [ + "First Name", + "Last Name", + "Phone", + "number_formatted", + "number_country", + "number_area", + "number_valid", + "number_possible", + ] + ) + # test formatted number + for test, gt in zip(result["number_formatted"].to_list(), self.formatted_gt): + self.assertEqual(test, gt) + # test country + for test, gt in zip(result["number_country"].to_list(), self.country_gt): + self.assertEqual(test, gt) + # test area + for test, gt in zip(result["number_area"].to_list(), self.area_gt): + self.assertEqual(test, gt) + # test valid + for test, gt in zip(result["number_valid"].to_list(), self.valid_gt): + self.assertEqual(test, gt) + # test possible + for test, gt in zip(result["number_possible"].to_list(), self.possible_gt): + self.assertEqual(test, gt) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_console_utils.py b/tests/test_console_utils.py index 7b02f9d..c0092d9 100644 --- a/tests/test_console_utils.py +++ b/tests/test_console_utils.py @@ -124,7 +124,7 @@ def test_valid_input(self, mock_get_int_input): result = get_multiple_choice("Select an option: ", choices) self.assertEqual(result, "Option A") mock_get_int_input.assert_called_with( - "Select an option: " + choice_string, range(len(choices) + 1) + "Select an option: " + choice_string, range(len(choices)) ) diff --git a/tests/test_leadparser.py b/tests/test_leadparser.py deleted file mode 100644 index 620eda3..0000000 --- a/tests/test_leadparser.py +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-License-Identifier: MIT -# SPDX-FileCopyrightText: 2023 Felix Zailskas - -from mock_components import get_database_mock - -from database.models import Lead - - -def test_parser(): - leads = get_database_mock().get_all_leads() - for lead in leads: - assert type(lead) == Lead diff --git a/tests/test_pipeline_utils.py b/tests/test_pipeline_utils.py new file mode 100644 index 0000000..966415d --- /dev/null +++ b/tests/test_pipeline_utils.py @@ -0,0 +1,149 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2024 Felix Zailskas + +import unittest +from unittest.mock import MagicMock, mock_open, patch + +from bdc.steps import * +from demo.pipeline_utils import ( + get_all_available_pipeline_json_configs, + get_pipeline_additional_steps, + get_pipeline_config_from_json, + get_pipeline_initial_steps, + get_pipeline_steps, +) + + +class TestPipelineUtils(unittest.TestCase): + def test_get_pipeline_steps(self): + steps = get_pipeline_steps() + self.assertEqual( + [ + (HashGenerator, "Hash Generator", ""), + (AnalyzeEmails, "Analyze Emails", ""), + ( + SearchOffeneRegister, + "Search OffeneRegister", + "(will take a long time)", + ), + (PreprocessPhonenumbers, "Phone Number Validation", ""), + ( + GooglePlaces, + "Google API", + "(will use token and generate cost!)", + ), + ( + GooglePlacesDetailed, + "Google API Detailed", + "(will use token and generate cost!)", + ), + ( + GPTReviewSentimentAnalyzer, + "openAI GPT Sentiment Analyzer", + "(will use token and generate cost!)", + ), + ( + GPTSummarizer, + "openAI GPT Summarizer", + "(will use token and generate cost!)", + ), + ( + SmartReviewInsightsEnhancer, + "Smart Review Insights", + "(will take looong time!)", + ), + (RegionalAtlas, "Regionalatlas", ""), + ], + steps, + ) + + def test_get_pipeline_initial_steps(self): + initial_steps = get_pipeline_initial_steps() + self.assertEqual( + [ + (HashGenerator, "Hash Generator", ""), + (AnalyzeEmails, "Analyze Emails", ""), + ], + initial_steps, + ) + + def test_get_pipeline_additional_steps(self): + additional_steps = get_pipeline_additional_steps() + self.assertEqual( + [ + ( + SearchOffeneRegister, + "Search OffeneRegister", + "(will take a long time)", + ), + (PreprocessPhonenumbers, "Phone Number Validation", ""), + ( + GooglePlaces, + "Google API", + "(will use token and generate cost!)", + ), + ( + GooglePlacesDetailed, + "Google API Detailed", + "(will use token and generate cost!)", + ), + ( + GPTReviewSentimentAnalyzer, + "openAI GPT Sentiment Analyzer", + "(will use token and generate cost!)", + ), + ( + GPTSummarizer, + "openAI GPT Summarizer", + "(will use token and generate cost!)", + ), + ( + SmartReviewInsightsEnhancer, + "Smart Review Insights", + "(will take looong time!)", + ), + (RegionalAtlas, "Regionalatlas", ""), + ], + additional_steps, + ) + + def test_get_all_available_pipeline_json_configs(self): + # Create a temporary directory and add some JSON files for testing + with patch( + "os.listdir", MagicMock(return_value=["config1.json", "config2.json"]) + ): + configs = get_all_available_pipeline_json_configs(config_path="fake_path") + self.assertEqual(configs, ["config1.json", "config2.json"]) + + def test_get_pipeline_config_from_json(self): + # Create a temporary JSON file for testing + mock_json_content = """ + { + "config": { + "steps": [ + {"name": "HashGenerator", "force_refresh": true}, + {"name": "AnalyzeEmails", "force_refresh": false}, + {"name": "GooglePlacesDetailed", "force_refresh": false}, + {"name": "SearchOffeneRegister", "force_refresh": true} + ] + } + } + """ + steps_gt = [ + HashGenerator(force_refresh=True), + AnalyzeEmails(force_refresh=False), + GooglePlacesDetailed(force_refresh=False), + SearchOffeneRegister(force_refresh=True), + ] + with patch("builtins.open", mock_open(read_data=mock_json_content)): + steps = get_pipeline_config_from_json( + "fake_config.json", config_path="fake_path" + ) + for step, gt in zip(steps, steps_gt): + self.assertEqual(type(step), type(gt)) + self.assertEqual(step.name, gt.name) + self.assertEqual(step.force_refresh, gt.force_refresh) + + +if __name__ == "__main__": + unittest.main()