Skip to content

Commit

Permalink
Fix: refactor to use pydantic and validation ✨
Browse files Browse the repository at this point in the history
✨ Refactor: move to pydantic for validation✨
  • Loading branch information
lwasser committed Aug 20, 2023
2 parents 30743a9 + 2a19227 commit 2a7a070
Show file tree
Hide file tree
Showing 12 changed files with 826 additions and 779 deletions.
2 changes: 2 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[flake8]
ignore = E203, W503
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,5 @@ dmypy.json

token.txt
src/test-model.py

src/pyosmeta/_version_generated.py
.pdm-build/*
28 changes: 24 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,14 @@ classifiers = [
"Programming Language :: Python :: 3.11",
]

dependencies = ["ruamel-yaml>=0.17.21", "requests", "python-dotenv", "pydantic"]

dependencies = [
"ruamel-yaml>=0.17.21",
"requests",
"python-dotenv",
"pydantic>=2.0",
]

# This is the metadata that pip reads to understand what versions your package supports
requires-python = ">=3.10"
readme = "README.md"
Expand All @@ -52,14 +59,26 @@ license = { text = "MIT" }
# for a user to run directly from the package.
[project.scripts] # Optional
update-contributors = "pyosmeta.cli.update_contributors:main"
update-reviews = "pyosmeta.cli.update_reviews:main"
update-reviewers = "pyosmeta.cli.update_review_contribs:main"
update-reviews = "pyosmeta.cli.process_reviews:main"
update-review-teams = "pyosmeta.cli.update_review_teams:main"


# Right now i'm not using pdm to add dependencies.
# Will explore that later
# Below using dynamic versioning / which is setuptools scm like
[tool.pdm]
[tool.black]
line-length = 79
target-version = ['py310']

[tool.isort]
profile = "black"
multi_line_output = 3
py_version = 27

# Precommit ignores this config so i added a .flake8 file
# but why did it ignore it?
[tool.flake8]
extend-ignore = ["E203", "W503"]


[tool.pdm.build]
Expand All @@ -70,6 +89,7 @@ package-dir = "src"

# Versioning is a backend feature - instructions are in pdm-backend docs
# https://pdm-backend.fming.dev/metadata/

[tool.pdm.version]
# Note that you need to create the tag after all commits are created - otherwise
# pdm adds dev info after the tag number which won't publish to pypi
Expand Down
14 changes: 11 additions & 3 deletions src/pyosmeta/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
# SPDX-FileCopyrightText: 2023-present Leah Wasser <[email protected]>
#
# SPDX-License-Identifier: MIT
from .contributors import PersonModel, ProcessContributors
from .parse_issues import ProcessIssues, ReviewModel

# Trick suggested by flake8 maintainer to ensure the imports above don't
# get flagged as being "unused"
__all__ = (
"ProcessIssues",
"ReviewModel",
"PersonModel",
"ProcessContributors",
)

try:
from ._version_generated import __version__
Expand Down
61 changes: 61 additions & 0 deletions src/pyosmeta/cli/process_reviews.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""
Script that parses metadata from na issue and adds it to a yml file for the
website. It also grabs some of the package metadata such as stars,
last commit, etc.
Output: packages.yml file containing a list of
1. all packages with accepted reviews
2. information related to the review including reviewers, editors
3. basic package stats including stars, etc.
To run at the CLI: parse_issue_metadata
"""

# TODO: if we export files we might want packages.yml and then under_review.yml
# thus we'd want to add a second input parameters which was file_name
# TODO: feature - Would be cool to create an "under review now" list as well -
# ideally this could be passed as a CLI argument with the label we want to
# search for

import pickle

from pydantic import ValidationError

from pyosmeta import ProcessIssues, ReviewModel

# TODO: change the template to ask for date accepted format year-month-day


def main():
process_review = ProcessIssues(
org="pyopensci",
repo_name="software-submission",
label_name="6/pyOS-approved 🚀🚀🚀",
)

# Get all issues for approved packages - load as dict
issues = process_review.return_response()
accepted_reviews = process_review.parse_issue_header(issues, 45)

# Update gh metrics via api for all packages
repo_endpoints = process_review.get_repo_endpoints(accepted_reviews)
all_reviews = process_review.get_gh_metrics(
repo_endpoints, accepted_reviews
)

# Populate model objects with review data + metrics
final_reviews = {}
for key, review in all_reviews.items():
# First add gh meta to each dict
print("Parsing & validating", key)
try:
final_reviews[key] = ReviewModel(**review)
except ValidationError as ve:
print(key, ":", ve)

with open("all_reviews.pickle", "wb") as f:
pickle.dump(final_reviews, f)


if __name__ == "__main__":
main()
113 changes: 68 additions & 45 deletions src/pyosmeta/cli/update_contributors.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
import argparse
import pickle

from pyosmeta.contributors import ProcessContributors
from pyosmeta.file_io import clean_export_yml, load_website_yml
import pydantic
from pydantic import ValidationError

# TODO: will this still run in gh actions??
# TODO: add update=True like i did for update_reviews
# TODO: still need to add a flag to not update specific fields
# TODO: if i use composition and there are helpers in a class
# that are used in a method that i call via composition are the helpers
# still available?
from pyosmeta.contributors import PersonModel, ProcessContributors
from pyosmeta.file_io import create_paths, open_yml_file

print(pydantic.__version__)

# TODO - https://stackoverflow.com
# /questions/55762673/how-to-parse-list-of-models-with-pydantic
# I can use TypeAdapter to convert the json data to model objects!


def main():
Expand All @@ -20,65 +22,86 @@ def main():
parser.add_argument(
"--update",
type=str,
help="Will force update contrib info from GitHub for every contributor",
help="Force update contrib info from GitHub for every contributor",
)
args = parser.parse_args()
update_value = args.update

if args:
if update_value:
update_all = True

# TODO - maybe add these as an attr in the contribs class?
base_url = "https://raw.githubusercontent.com/pyOpenSci/"
end_url = "/main/.all-contributorsrc"
repos = [
"python-package-guide",
"software-peer-review",
"pyopensci.github.io",
"software-review",
"update-web-metadata",
]
json_files = [base_url + repo + end_url for repo in repos]
json_files = create_paths(repos)

# Get existing contribs from pyopensci.github.io repo (website data)
web_yaml_path = base_url + "pyopensci.github.io/main/_data/contributors.yml"
base_url = "https://raw.githubusercontent.com/pyOpenSci/"
web_yaml_path = (
base_url + "pyopensci.github.io/main/_data/contributors.yml"
)

process_contribs = ProcessContributors(json_files)
web_contribs = open_yml_file(web_yaml_path)

# Populate all existing contribs into model objects
all_contribs = {}
for a_contrib in web_contribs:
print(a_contrib["github_username"])
try:
all_contribs[a_contrib["github_username"].lower()] = PersonModel(
**a_contrib
)
except ValidationError as ve:
print(a_contrib["github_username"])
print(ve)

# Returns a list of dict objects with gh usernames (lowercase) as keys
# TODO: File io module (could just be a function)
web_contribs = load_website_yml(url=web_yaml_path, key="github_username")
bot_all_contribs_dict = process_contribs.combine_json_data()
print("Done processing all-contribs")

# Parse through each user in the web yaml, if they don't exist, add them
# finally - update contrib types
for key, users in bot_all_contribs_dict.items():
# Create a list of all contributors across repositories
process_contribs = ProcessContributors(json_files)
bot_all_contribs = process_contribs.combine_json_data()

print("Updating contrib types and searching for new users now")
for key, users in bot_all_contribs.items():
for gh_user in users:
# Add any new contributors
if gh_user not in web_contribs.keys():
print("I found a new contributor! Adding:", gh_user)
web_contribs.update(
# TODO: this is also used in the other 2 scripts
# but add user info is in the contribs class - i do
# think it belongs there
process_contribs.check_add_user(gh_user, web_contribs)
)

# Update contrib type list
existing_contribs = web_contribs[gh_user]["contributor_type"]
# TODO: This helper is used in all three scripts but defined
# in the contribs class
web_contribs[gh_user][
"contributor_type"
] = process_contribs.update_contrib_list(existing_contribs, key)
# Find and populate data for any new contributors
if gh_user not in all_contribs.keys():
print("Missing", gh_user, "Adding them now")
new_contrib = process_contribs.get_user_info(gh_user)
all_contribs[gh_user] = PersonModel(**new_contrib)

# Update contribution type list for all users
all_contribs[gh_user].add_unique_value("contributor_type", key)

if update_all:
gh_data = process_contribs.get_gh_data(web_contribs)
web_contribs = process_contribs.update_contrib_data(web_contribs, gh_data)
for user in all_contribs.keys():
print("Updating all user info from github", user)
new_gh_data = process_contribs.get_user_info(user)

# TODO: turn this into a small update method
existing = all_contribs[user].model_dump()

for key, item in new_gh_data.items():
if key == "mastodon":
# Mastodon isn't available in the GH api yet
continue
# Don't replace the value if there is a noupdate flag
# TODO: This approach doesn't work, ruemal-yaml doesn't
# preserve inline comments
if key == "name" and existing[key]:
continue
else:
existing[key] = item

all_contribs[user] = PersonModel(**existing)

# Export data
# Pickle supports updates after parsing reviews
# Export to pickle which supports updates after parsing reviews
with open("all_contribs.pickle", "wb") as f:
pickle.dump(web_contribs, f)
pickle.dump(all_contribs, f)


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 2a7a070

Please sign in to comment.