Fix: refactor to use pydantic and validation ✨

✨ Refactor: move to pydantic for validation✨
pyOpenSci · Aug 20, 2023 · 2a7a070 · 2a7a070
2 parents 30743a9 + 2a19227
commit 2a7a070
Show file tree

Hide file tree

Showing 12 changed files with 826 additions and 779 deletions.
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,2 @@
+[flake8]
+ignore = E203, W503
diff --git a/.gitignore b/.gitignore
@@ -41,5 +41,5 @@ dmypy.json
 
 token.txt
 src/test-model.py
-
+src/pyosmeta/_version_generated.py
 .pdm-build/*
diff --git a/pyproject.toml b/pyproject.toml
@@ -31,7 +31,14 @@ classifiers = [
     "Programming Language :: Python :: 3.11",
 ]
 
-dependencies = ["ruamel-yaml>=0.17.21", "requests", "python-dotenv", "pydantic"]
+
+dependencies = [
+    "ruamel-yaml>=0.17.21",
+    "requests",
+    "python-dotenv",
+    "pydantic>=2.0",
+]
+
 # This is the metadata that pip reads to understand what versions your package supports
 requires-python = ">=3.10"
 readme = "README.md"
@@ -52,14 +59,26 @@ license = { text = "MIT" }
 # for a user to run directly from the package.
 [project.scripts] # Optional
 update-contributors = "pyosmeta.cli.update_contributors:main"
-update-reviews = "pyosmeta.cli.update_reviews:main"
-update-reviewers = "pyosmeta.cli.update_review_contribs:main"
+update-reviews = "pyosmeta.cli.process_reviews:main"
+update-review-teams = "pyosmeta.cli.update_review_teams:main"
 
 
 # Right now i'm not using pdm to add dependencies.
 # Will explore that later
 # Below using dynamic versioning / which is setuptools scm like
-[tool.pdm]
+[tool.black]
+line-length = 79
+target-version = ['py310']
+
+[tool.isort]
+profile = "black"
+multi_line_output = 3
+py_version = 27
+
+# Precommit ignores this config so i added a .flake8 file
+# but why did it ignore it?
+[tool.flake8]
+extend-ignore = ["E203", "W503"]
 
 
 [tool.pdm.build]
@@ -70,6 +89,7 @@ package-dir = "src"
 
 # Versioning is a backend feature - instructions are in pdm-backend docs
 # https://pdm-backend.fming.dev/metadata/
+
 [tool.pdm.version]
 # Note that you need to create the tag after all commits are created - otherwise
 # pdm adds dev info after the tag number which won't publish to pypi

diff --git a/src/pyosmeta/__init__.py b/src/pyosmeta/__init__.py
@@ -1,6 +1,14 @@
-# SPDX-FileCopyrightText: 2023-present Leah Wasser <[email protected]>
-#
-# SPDX-License-Identifier: MIT
+from .contributors import PersonModel, ProcessContributors
+from .parse_issues import ProcessIssues, ReviewModel
+
+# Trick suggested by flake8 maintainer to ensure the imports above don't
+# get flagged as being "unused"
+__all__ = (
+    "ProcessIssues",
+    "ReviewModel",
+    "PersonModel",
+    "ProcessContributors",
+)
 
 try:
     from ._version_generated import __version__

diff --git a/src/pyosmeta/cli/process_reviews.py b/src/pyosmeta/cli/process_reviews.py
@@ -0,0 +1,61 @@
+"""
+Script that parses metadata from na issue and adds it to a yml file for the
+website. It also grabs some of the package metadata such as stars,
+last commit, etc.
+
+Output: packages.yml file containing a list of
+ 1. all packages with accepted reviews
+ 2. information related to the review including reviewers, editors
+ 3. basic package stats including stars, etc.
+
+To run at the CLI: parse_issue_metadata
+"""
+
+# TODO: if we export files we might want packages.yml and then under_review.yml
+# thus we'd want to add a second input parameters which was file_name
+# TODO: feature - Would be cool to create an "under review now" list as well -
+# ideally this could be passed as a CLI argument with the label we want to
+# search for
+
+import pickle
+
+from pydantic import ValidationError
+
+from pyosmeta import ProcessIssues, ReviewModel
+
+# TODO: change the template to ask for date accepted format year-month-day
+
+
+def main():
+    process_review = ProcessIssues(
+        org="pyopensci",
+        repo_name="software-submission",
+        label_name="6/pyOS-approved 🚀🚀🚀",
+    )
+
+    # Get all issues for approved packages - load as dict
+    issues = process_review.return_response()
+    accepted_reviews = process_review.parse_issue_header(issues, 45)
+
+    # Update gh metrics via api for all packages
+    repo_endpoints = process_review.get_repo_endpoints(accepted_reviews)
+    all_reviews = process_review.get_gh_metrics(
+        repo_endpoints, accepted_reviews
+    )
+
+    # Populate model objects with review data + metrics
+    final_reviews = {}
+    for key, review in all_reviews.items():
+        # First add gh meta to each dict
+        print("Parsing & validating", key)
+        try:
+            final_reviews[key] = ReviewModel(**review)
+        except ValidationError as ve:
+            print(key, ":", ve)
+
+    with open("all_reviews.pickle", "wb") as f:
+        pickle.dump(final_reviews, f)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/pyosmeta/cli/update_contributors.py b/src/pyosmeta/cli/update_contributors.py
@@ -1,15 +1,17 @@
 import argparse
 import pickle
 
-from pyosmeta.contributors import ProcessContributors
-from pyosmeta.file_io import clean_export_yml, load_website_yml
+import pydantic
+from pydantic import ValidationError
 
-# TODO: will this still run in gh actions??
-# TODO: add update=True like i did for update_reviews
-# TODO: still need to add a flag to not update specific fields
-# TODO: if i use composition and there are helpers in a class
-# that are used in a method that i call via composition are the helpers
-# still available?
+from pyosmeta.contributors import PersonModel, ProcessContributors
+from pyosmeta.file_io import create_paths, open_yml_file
+
+print(pydantic.__version__)
+
+# TODO - https://stackoverflow.com
+# /questions/55762673/how-to-parse-list-of-models-with-pydantic
+# I can use TypeAdapter to convert the json data to model objects!
 
 
 def main():
@@ -20,65 +22,86 @@ def main():
     parser.add_argument(
         "--update",
         type=str,
-        help="Will force update contrib info from GitHub for every contributor",
+        help="Force update contrib info from GitHub for every contributor",
     )
     args = parser.parse_args()
+    update_value = args.update
 
-    if args:
+    if update_value:
         update_all = True
 
-    # TODO - maybe add these as an attr in the contribs class?
-    base_url = "https://raw.githubusercontent.com/pyOpenSci/"
-    end_url = "/main/.all-contributorsrc"
     repos = [
         "python-package-guide",
         "software-peer-review",
         "pyopensci.github.io",
         "software-review",
         "update-web-metadata",
     ]
-    json_files = [base_url + repo + end_url for repo in repos]
+    json_files = create_paths(repos)
 
     # Get existing contribs from pyopensci.github.io repo (website data)
-    web_yaml_path = base_url + "pyopensci.github.io/main/_data/contributors.yml"
+    base_url = "https://raw.githubusercontent.com/pyOpenSci/"
+    web_yaml_path = (
+        base_url + "pyopensci.github.io/main/_data/contributors.yml"
+    )
 
-    process_contribs = ProcessContributors(json_files)
+    web_contribs = open_yml_file(web_yaml_path)
+
+    # Populate all existing contribs into model objects
+    all_contribs = {}
+    for a_contrib in web_contribs:
+        print(a_contrib["github_username"])
+        try:
+            all_contribs[a_contrib["github_username"].lower()] = PersonModel(
+                **a_contrib
+            )
+        except ValidationError as ve:
+            print(a_contrib["github_username"])
+            print(ve)
 
-    # Returns a list of dict objects with gh usernames (lowercase) as keys
-    # TODO: File io module (could just be a function)
-    web_contribs = load_website_yml(url=web_yaml_path, key="github_username")
-    bot_all_contribs_dict = process_contribs.combine_json_data()
+    print("Done processing all-contribs")
 
-    # Parse through each user in the web yaml, if they don't exist, add them
-    # finally - update contrib types
-    for key, users in bot_all_contribs_dict.items():
+    # Create a list of all contributors across repositories
+    process_contribs = ProcessContributors(json_files)
+    bot_all_contribs = process_contribs.combine_json_data()
+
+    print("Updating contrib types and searching for new users now")
+    for key, users in bot_all_contribs.items():
         for gh_user in users:
-            # Add any new contributors
-            if gh_user not in web_contribs.keys():
-                print("I found a new contributor! Adding:", gh_user)
-                web_contribs.update(
-                    # TODO: this is also used in the other 2 scripts
-                    # but add user info is in the contribs class - i do
-                    # think it belongs there
-                    process_contribs.check_add_user(gh_user, web_contribs)
-                )
-
-            # Update contrib type list
-            existing_contribs = web_contribs[gh_user]["contributor_type"]
-            # TODO: This helper is used in all three scripts but defined
-            # in the contribs class
-            web_contribs[gh_user][
-                "contributor_type"
-            ] = process_contribs.update_contrib_list(existing_contribs, key)
+            # Find and populate data for any new contributors
+            if gh_user not in all_contribs.keys():
+                print("Missing", gh_user, "Adding them now")
+                new_contrib = process_contribs.get_user_info(gh_user)
+                all_contribs[gh_user] = PersonModel(**new_contrib)
+
+            # Update contribution type list for all users
+            all_contribs[gh_user].add_unique_value("contributor_type", key)
 
     if update_all:
-        gh_data = process_contribs.get_gh_data(web_contribs)
-        web_contribs = process_contribs.update_contrib_data(web_contribs, gh_data)
+        for user in all_contribs.keys():
+            print("Updating all user info from github", user)
+            new_gh_data = process_contribs.get_user_info(user)
+
+            # TODO: turn this into a small update method
+            existing = all_contribs[user].model_dump()
+
+            for key, item in new_gh_data.items():
+                if key == "mastodon":
+                    # Mastodon isn't available in the GH api yet
+                    continue
+                # Don't replace the value if there is a noupdate flag
+                # TODO: This approach doesn't work, ruemal-yaml doesn't
+                # preserve inline comments
+                if key == "name" and existing[key]:
+                    continue
+                else:
+                    existing[key] = item
+
+            all_contribs[user] = PersonModel(**existing)
 
-    # Export data
-    # Pickle supports updates after parsing reviews
+    # Export to pickle which supports updates after parsing reviews
     with open("all_contribs.pickle", "wb") as f:
-        pickle.dump(web_contribs, f)
+        pickle.dump(all_contribs, f)
 
 
 if __name__ == "__main__":