diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..8434b4b
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,2 @@
+[flake8]
+ignore = E203, W503
diff --git a/.gitignore b/.gitignore
index 888ee0f..9dad33e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -41,5 +41,5 @@ dmypy.json
 
 token.txt
 src/test-model.py
-
+src/pyosmeta/_version_generated.py
 .pdm-build/*
diff --git a/pyproject.toml b/pyproject.toml
index 63934bf..6e8e431 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,7 +31,14 @@ classifiers = [
     "Programming Language :: Python :: 3.11",
 ]
 
-dependencies = ["ruamel-yaml>=0.17.21", "requests", "python-dotenv", "pydantic"]
+
+dependencies = [
+    "ruamel-yaml>=0.17.21",
+    "requests",
+    "python-dotenv",
+    "pydantic>=2.0",
+]
+
 # This is the metadata that pip reads to understand what versions your package supports
 requires-python = ">=3.10"
 readme = "README.md"
@@ -52,14 +59,26 @@ license = { text = "MIT" }
 # for a user to run directly from the package.
 [project.scripts] # Optional
 update-contributors = "pyosmeta.cli.update_contributors:main"
-update-reviews = "pyosmeta.cli.update_reviews:main"
-update-reviewers = "pyosmeta.cli.update_review_contribs:main"
+update-reviews = "pyosmeta.cli.process_reviews:main"
+update-review-teams = "pyosmeta.cli.update_review_teams:main"
 
 
 # Right now i'm not using pdm to add dependencies.
 # Will explore that later
 # Below using dynamic versioning / which is setuptools scm like
-[tool.pdm]
+[tool.black]
+line-length = 79
+target-version = ['py310']
+
+[tool.isort]
+profile = "black"
+multi_line_output = 3
+py_version = 27
+
+# Precommit ignores this config so i added a .flake8 file
+# but why did it ignore it?
+[tool.flake8]
+extend-ignore = ["E203", "W503"]
 
 
 [tool.pdm.build]
@@ -70,6 +89,7 @@ package-dir = "src"
 
 # Versioning is a backend feature - instructions are in pdm-backend docs
 # https://pdm-backend.fming.dev/metadata/
+
 [tool.pdm.version]
 # Note that you need to create the tag after all commits are created - otherwise
 # pdm adds dev info after the tag number which won't publish to pypi
diff --git a/src/pyosmeta/__init__.py b/src/pyosmeta/__init__.py
index d745d01..e4d2c24 100644
--- a/src/pyosmeta/__init__.py
+++ b/src/pyosmeta/__init__.py
@@ -1,6 +1,14 @@
-# SPDX-FileCopyrightText: 2023-present Leah Wasser <leah@pyopensci.org>
-#
-# SPDX-License-Identifier: MIT
+from .contributors import PersonModel, ProcessContributors
+from .parse_issues import ProcessIssues, ReviewModel
+
+# Trick suggested by flake8 maintainer to ensure the imports above don't
+# get flagged as being "unused"
+__all__ = (
+    "ProcessIssues",
+    "ReviewModel",
+    "PersonModel",
+    "ProcessContributors",
+)
 
 try:
     from ._version_generated import __version__
diff --git a/src/pyosmeta/cli/process_reviews.py b/src/pyosmeta/cli/process_reviews.py
new file mode 100644
index 0000000..6756c63
--- /dev/null
+++ b/src/pyosmeta/cli/process_reviews.py
@@ -0,0 +1,61 @@
+"""
+Script that parses metadata from na issue and adds it to a yml file for the
+website. It also grabs some of the package metadata such as stars,
+last commit, etc.
+
+Output: packages.yml file containing a list of
+ 1. all packages with accepted reviews
+ 2. information related to the review including reviewers, editors
+ 3. basic package stats including stars, etc.
+
+To run at the CLI: parse_issue_metadata
+"""
+
+# TODO: if we export files we might want packages.yml and then under_review.yml
+# thus we'd want to add a second input parameters which was file_name
+# TODO: feature - Would be cool to create an "under review now" list as well -
+# ideally this could be passed as a CLI argument with the label we want to
+# search for
+
+import pickle
+
+from pydantic import ValidationError
+
+from pyosmeta import ProcessIssues, ReviewModel
+
+# TODO: change the template to ask for date accepted format year-month-day
+
+
+def main():
+    process_review = ProcessIssues(
+        org="pyopensci",
+        repo_name="software-submission",
+        label_name="6/pyOS-approved 🚀🚀🚀",
+    )
+
+    # Get all issues for approved packages - load as dict
+    issues = process_review.return_response()
+    accepted_reviews = process_review.parse_issue_header(issues, 45)
+
+    # Update gh metrics via api for all packages
+    repo_endpoints = process_review.get_repo_endpoints(accepted_reviews)
+    all_reviews = process_review.get_gh_metrics(
+        repo_endpoints, accepted_reviews
+    )
+
+    # Populate model objects with review data + metrics
+    final_reviews = {}
+    for key, review in all_reviews.items():
+        # First add gh meta to each dict
+        print("Parsing & validating", key)
+        try:
+            final_reviews[key] = ReviewModel(**review)
+        except ValidationError as ve:
+            print(key, ":", ve)
+
+    with open("all_reviews.pickle", "wb") as f:
+        pickle.dump(final_reviews, f)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/pyosmeta/cli/update_contributors.py b/src/pyosmeta/cli/update_contributors.py
index 31c29ac..b0e1bb2 100644
--- a/src/pyosmeta/cli/update_contributors.py
+++ b/src/pyosmeta/cli/update_contributors.py
@@ -1,15 +1,17 @@
 import argparse
 import pickle
 
-from pyosmeta.contributors import ProcessContributors
-from pyosmeta.file_io import clean_export_yml, load_website_yml
+import pydantic
+from pydantic import ValidationError
 
-# TODO: will this still run in gh actions??
-# TODO: add update=True like i did for update_reviews
-# TODO: still need to add a flag to not update specific fields
-# TODO: if i use composition and there are helpers in a class
-# that are used in a method that i call via composition are the helpers
-# still available?
+from pyosmeta.contributors import PersonModel, ProcessContributors
+from pyosmeta.file_io import create_paths, open_yml_file
+
+print(pydantic.__version__)
+
+# TODO - https://stackoverflow.com
+# /questions/55762673/how-to-parse-list-of-models-with-pydantic
+# I can use TypeAdapter to convert the json data to model objects!
 
 
 def main():
@@ -20,16 +22,14 @@ def main():
     parser.add_argument(
         "--update",
         type=str,
-        help="Will force update contrib info from GitHub for every contributor",
+        help="Force update contrib info from GitHub for every contributor",
     )
     args = parser.parse_args()
+    update_value = args.update
 
-    if args:
+    if update_value:
         update_all = True
 
-    # TODO - maybe add these as an attr in the contribs class?
-    base_url = "https://raw.githubusercontent.com/pyOpenSci/"
-    end_url = "/main/.all-contributorsrc"
     repos = [
         "python-package-guide",
         "software-peer-review",
@@ -37,48 +37,71 @@ def main():
         "software-review",
         "update-web-metadata",
     ]
-    json_files = [base_url + repo + end_url for repo in repos]
+    json_files = create_paths(repos)
 
     # Get existing contribs from pyopensci.github.io repo (website data)
-    web_yaml_path = base_url + "pyopensci.github.io/main/_data/contributors.yml"
+    base_url = "https://raw.githubusercontent.com/pyOpenSci/"
+    web_yaml_path = (
+        base_url + "pyopensci.github.io/main/_data/contributors.yml"
+    )
 
-    process_contribs = ProcessContributors(json_files)
+    web_contribs = open_yml_file(web_yaml_path)
+
+    # Populate all existing contribs into model objects
+    all_contribs = {}
+    for a_contrib in web_contribs:
+        print(a_contrib["github_username"])
+        try:
+            all_contribs[a_contrib["github_username"].lower()] = PersonModel(
+                **a_contrib
+            )
+        except ValidationError as ve:
+            print(a_contrib["github_username"])
+            print(ve)
 
-    # Returns a list of dict objects with gh usernames (lowercase) as keys
-    # TODO: File io module (could just be a function)
-    web_contribs = load_website_yml(url=web_yaml_path, key="github_username")
-    bot_all_contribs_dict = process_contribs.combine_json_data()
+    print("Done processing all-contribs")
 
-    # Parse through each user in the web yaml, if they don't exist, add them
-    # finally - update contrib types
-    for key, users in bot_all_contribs_dict.items():
+    # Create a list of all contributors across repositories
+    process_contribs = ProcessContributors(json_files)
+    bot_all_contribs = process_contribs.combine_json_data()
+
+    print("Updating contrib types and searching for new users now")
+    for key, users in bot_all_contribs.items():
         for gh_user in users:
-            # Add any new contributors
-            if gh_user not in web_contribs.keys():
-                print("I found a new contributor! Adding:", gh_user)
-                web_contribs.update(
-                    # TODO: this is also used in the other 2 scripts
-                    # but add user info is in the contribs class - i do
-                    # think it belongs there
-                    process_contribs.check_add_user(gh_user, web_contribs)
-                )
-
-            # Update contrib type list
-            existing_contribs = web_contribs[gh_user]["contributor_type"]
-            # TODO: This helper is used in all three scripts but defined
-            # in the contribs class
-            web_contribs[gh_user][
-                "contributor_type"
-            ] = process_contribs.update_contrib_list(existing_contribs, key)
+            # Find and populate data for any new contributors
+            if gh_user not in all_contribs.keys():
+                print("Missing", gh_user, "Adding them now")
+                new_contrib = process_contribs.get_user_info(gh_user)
+                all_contribs[gh_user] = PersonModel(**new_contrib)
+
+            # Update contribution type list for all users
+            all_contribs[gh_user].add_unique_value("contributor_type", key)
 
     if update_all:
-        gh_data = process_contribs.get_gh_data(web_contribs)
-        web_contribs = process_contribs.update_contrib_data(web_contribs, gh_data)
+        for user in all_contribs.keys():
+            print("Updating all user info from github", user)
+            new_gh_data = process_contribs.get_user_info(user)
+
+            # TODO: turn this into a small update method
+            existing = all_contribs[user].model_dump()
+
+            for key, item in new_gh_data.items():
+                if key == "mastodon":
+                    # Mastodon isn't available in the GH api yet
+                    continue
+                # Don't replace the value if there is a noupdate flag
+                # TODO: This approach doesn't work, ruemal-yaml doesn't
+                # preserve inline comments
+                if key == "name" and existing[key]:
+                    continue
+                else:
+                    existing[key] = item
+
+            all_contribs[user] = PersonModel(**existing)
 
-    # Export data
-    # Pickle supports updates after parsing reviews
+    # Export to pickle which supports updates after parsing reviews
     with open("all_contribs.pickle", "wb") as f:
-        pickle.dump(web_contribs, f)
+        pickle.dump(all_contribs, f)
 
 
 if __name__ == "__main__":
diff --git a/src/pyosmeta/cli/update_review_contribs.py b/src/pyosmeta/cli/update_review_contribs.py
deleted file mode 100644
index 13d5413..0000000
--- a/src/pyosmeta/cli/update_review_contribs.py
+++ /dev/null
@@ -1,134 +0,0 @@
-"""
-This script parses through our reviews and contributors and:
-
-1. Updates reviewer, editor and maintainer data in the contributor.yml file to
-ensure all packages they supported are listed there.
-1b: And that they have a listing as peer-review under contributor type
-2. Updates the packages metadata with the participants names if it's missing
-3. FUTURE: finally it looks to see if we are missing review participants from
-the review issues in the contributor file and updates that file.
-
-This script assumes that update_contributors and update_reviews has been run.
-Rather than hit any api's it just updates information from the issues.
-To run: update_reviewers
-
-# TODO - FEATURE we have some packages that were NOT approved but we had editors and reviewers.
-# We need to acknowledge these people as well. maybe tag them with waiting on maintainer response??
-# TODO: package-wide feature: create a flag for entries that we do not want to update
-# TODO: make sure we can add a 3rd or 4th reviewer - crowsetta has this as
-# will biocypher
-# TODO: make sure to add a current editor boolean to the current editors and
-# emeritus ones.
-# TODO - ?create a class for person types??
-
-"""
-
-import os
-
-from pyosmeta.contributors import ProcessContributors
-from pyosmeta.file_io import clean_export_yml, load_pickle
-
-
-def get_clean_user(username: str):
-    return username.lower().strip()
-
-
-def main():
-    # TODO: move refresh contribs and contribs dict attr to
-    # processContribs and remove this module altogether
-    updateContribs = ProcessContributors([])
-
-    # Two pickle files are outputs of the two other scripts
-    # use that data to limit web calls
-    contribs = load_pickle("all_contribs.pickle")
-
-    # Output of update_reviews.py
-    packages = load_pickle("all_reviews.pickle")
-
-    contrib_types = updateContribs.contrib_types
-
-    for pkg_name, issue_meta in packages.items():
-        print("Processing review team for:", pkg_name)
-        for issue_role in contrib_types.keys():
-            if issue_role == "all_current_maintainers":
-                if issue_role in issue_meta:
-                    # Loop through each maintainer in the list
-                    for i, a_maintainer in enumerate(issue_meta.get(issue_role)):
-                        gh_user = get_clean_user(a_maintainer["github_username"])
-
-                        if gh_user not in contribs.keys():
-                            contribs.update(
-                                updateContribs.check_add_user(gh_user, contribs)
-                            )
-
-                        # Update contrib packages for peer review
-                        (
-                            contrib_key,
-                            pkg_list,
-                        ) = updateContribs.refresh_contribs(
-                            contribs[gh_user],
-                            pkg_name,  # new contribs
-                            issue_role,
-                        )
-                        # Update users contrib list
-                        contribs[gh_user][contrib_key] = pkg_list
-
-                        _, contrib_list = updateContribs.refresh_contribs(
-                            contribs[gh_user],
-                            None,
-                            issue_role,
-                        )
-                        contribs[gh_user]["contributor_type"] = contrib_list
-
-                        # If name is missing in issue summary, populate from contribs
-                        if a_maintainer["name"] == "":
-                            packages[pkg_name]["all_current_maintainers"][i][
-                                "name"
-                            ] = contribs[gh_user]["name"]
-
-                else:
-                    print(
-                        "All maintainers is missing in the review for ",
-                        pkg_name,
-                    )
-
-            else:
-                # Else we are processing editors, reviewers...
-                gh_user = get_clean_user(
-                    packages[pkg_name][issue_role]["github_username"]
-                )
-
-                if gh_user not in contribs.keys():
-                    # If they aren't already in contribs, add them
-                    contribs.update(updateContribs.check_add_user(gh_user, contribs))
-                # Update user package contributions
-                (
-                    contrib_key,
-                    pkg_list,
-                ) = updateContribs.refresh_contribs(
-                    contribs[gh_user],
-                    pkg_name,  # new contribs
-                    issue_role,
-                )
-
-                # Update users contrib list
-                contribs[gh_user][contrib_key] = pkg_list
-
-                _, contrib_list = updateContribs.refresh_contribs(
-                    contribs[gh_user],
-                    None,
-                    issue_role,
-                )
-                contribs[gh_user]["contributor_type"] = contrib_list
-
-                # If users's name is missing in issue, populate from contribs dict
-                if issue_meta[issue_role]["name"] == "":
-                    packages[pkg_name][issue_role]["name"] = contribs[gh_user]["name"]
-
-    # Export to yaml
-    clean_export_yml(contribs, os.path.join("_data", "contributors.yml"))
-    clean_export_yml(packages, os.path.join("_data", "packages.yml"))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/src/pyosmeta/cli/update_review_teams.py b/src/pyosmeta/cli/update_review_teams.py
new file mode 100644
index 0000000..5ff10b0
--- /dev/null
+++ b/src/pyosmeta/cli/update_review_teams.py
@@ -0,0 +1,128 @@
+"""
+This script parses through our reviews and contributors and:
+
+1. Updates reviewer, editor and maintainer data in the contributor.yml file to
+ensure all packages they supported are listed there.
+1b: And that they have a listing as peer-review under contributor type
+2. Updates the packages metadata with the participants names if it's missing
+3. FUTURE: finally it looks to see if we are missing review participants from
+the review issues in the contributor file and updates that file.
+
+This script assumes that update_contributors and update_reviews has been run.
+Rather than hit any api's it just updates information from the issues.
+To run: update_reviewers
+
+# TODO - FEATURE we have some packages that were NOT approved but we had
+# editors and reviewers.
+# We need to acknowledge these people as well. maybe tag them with waiting on
+# maintainer response??
+# TODO: package-wide feature: create no update flag for entries
+# TODO: make sure we can add a 3rd or 4th reviewer - crowsetta has this as
+# will biocypher
+
+"""
+import os
+
+from pydantic import ValidationError
+
+from pyosmeta.contributors import PersonModel, ProcessContributors
+from pyosmeta.file_io import clean_export_yml, load_pickle
+
+
+def get_clean_user(username: str) -> str:
+    """A small helper that removes whitespace and ensures username is
+    lower case"""
+    return username.lower().strip()
+
+
+def main():
+    process_contribs = ProcessContributors([])
+
+    # Two pickle files are outputs of the two other scripts
+    # use that data to limit web calls
+    contribs = load_pickle("all_contribs.pickle")
+    packages = load_pickle("all_reviews.pickle")
+
+    contrib_types = process_contribs.contrib_types
+
+    for pkg_name, issue_meta in packages.items():
+        print("Processing review team for:", pkg_name)
+        for issue_role in contrib_types.keys():
+            if issue_role == "all_current_maintainers":
+                # Loop through each maintainer in the list
+                for i, a_maintainer in enumerate(
+                    issue_meta.all_current_maintainers
+                ):
+                    gh_user = get_clean_user(a_maintainer["github_username"])
+
+                    if gh_user not in contribs.keys():
+                        print("Found a new user!", gh_user)
+                        new_contrib = process_contribs.get_user_info(gh_user)
+                        try:
+                            contribs[gh_user] = PersonModel(**new_contrib)
+                        except ValidationError as ve:
+                            print(ve)
+
+                    # Update user package contributions (if it's unique)
+                    review_key = contrib_types[issue_role][0]
+                    contribs[gh_user].add_unique_value(
+                        review_key, pkg_name.lower()
+                    )
+
+                    # Update user contrib list (if it's unique)
+                    review_roles = contrib_types[issue_role][1]
+                    contribs[gh_user].add_unique_value(
+                        "contributor_type", review_roles
+                    )
+
+                    # If name is missing in issue, populate from contribs
+                    if a_maintainer["name"] == "":
+                        name = getattr(contribs[gh_user], "name")
+                        packages[pkg_name].all_current_maintainers[i][
+                            "name"
+                        ] = name
+
+            else:
+                # Else we are processing editors, reviewers...
+                gh_user = get_clean_user(
+                    getattr(packages[pkg_name], issue_role)["github_username"]
+                )
+
+                if gh_user not in contribs.keys():
+                    # If they aren't already in contribs, add them
+                    print("Found a new user!", gh_user)
+                    new_contrib = process_contribs.get_user_info(gh_user)
+                    try:
+                        contribs[gh_user] = PersonModel(**new_contrib)
+                    except ValidationError as ve:
+                        print(ve)
+
+                # Update user package contributions (if it's unique)
+                review_key = contrib_types[issue_role][0]
+                contribs[gh_user].add_unique_value(
+                    review_key, pkg_name.lower()
+                )
+
+                # Update user contrib list (if it's unique)
+                review_roles = contrib_types[issue_role][1]
+                contribs[gh_user].add_unique_value(
+                    "contributor_type", review_roles
+                )
+
+                # If users's name is missing in issue, populate from contribs
+                if getattr(issue_meta, issue_role)["name"] == "":
+                    attribute_value = getattr(packages[pkg_name], issue_role)
+                    attribute_value["name"] = getattr(
+                        contribs[gh_user], "name"
+                    )
+
+    # Export to yaml
+    contribs_ls = [model.model_dump() for model in contribs.values()]
+    pkgs_ls = [model.model_dump() for model in packages.values()]
+
+    clean_export_yml(contribs_ls, os.path.join("_data", "contributors.yml"))
+    clean_export_yml(pkgs_ls, os.path.join("_data", "packages.yml"))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/pyosmeta/cli/update_reviews.py b/src/pyosmeta/cli/update_reviews.py
deleted file mode 100644
index 8e632cd..0000000
--- a/src/pyosmeta/cli/update_reviews.py
+++ /dev/null
@@ -1,102 +0,0 @@
-"""
-Script that parses metadata from na issue and adds it to a yml file for the
-website. It also grabs some of the package metadata such as stars,
-last commit, etc.
-
-Output: packages.yml file containing a list of
- 1. all packages with accepted reviews
- 2. information related to the review including reviewers, editors
- 3. basic package stats including stars, etc.
-
-To run at the CLI: parse_issue_metadata
-"""
-
-# TODO: if we export files we might want packages.yml and then under_review.yml
-# thus we'd want to add a second input parameters which was file_name
-# TODO: feature - Would be cool to create an "under review now" list as well -
-# ideally this could be passed as a CLI argument with the label we want to
-# search for
-
-import argparse
-import pickle
-
-from pyosmeta import ProcessIssues
-from pyosmeta.file_io import clean_export_yml, load_website_yml
-
-
-def main():
-    update_all = False
-    parser = argparse.ArgumentParser(
-        description="A CLI script to update pyOpenSci reviews"
-    )
-    parser.add_argument(
-        "--update",
-        type=str,
-        help="Will force update review info from GitHub for every review",
-    )
-    args = parser.parse_args()
-
-    if args:
-        update_all = True
-
-    web_reviews_path = "https://raw.githubusercontent.com/pyOpenSci/pyopensci.github.io/main/_data/packages.yml"
-
-    issueProcess = ProcessIssues(
-        org="pyopensci",
-        repo_name="software-submission",
-        label_name="6/pyOS-approved 🚀🚀🚀",
-    )
-
-    # Open web yaml & return dict with package name as key
-    web_reviews = load_website_yml(key="package_name", url=web_reviews_path)
-
-    # Get all issues for approved packages
-    issues = issueProcess.return_response()
-    all_accepted_reviews = issueProcess.parse_issue_header(issues, 15)
-
-    # Parse through reviews, identify new ones, fix case
-    if update_all == True:
-        for review_key, review_meta in all_accepted_reviews.items():
-            web_reviews[review_key.lower()] = review_meta
-    else:
-        for review_key, review_meta in all_accepted_reviews.items():
-            if review_key.lower() not in web_reviews.keys():
-                print("Yay - pyOS has a new package:", review_key)
-                web_reviews[review_key.lower()] = review_meta
-
-    # Update gh metrics via api for all packages
-    repo_endpoints = issueProcess.get_repo_endpoints(web_reviews)
-    gh_stats = [
-        "name",
-        "description",
-        "homepage",
-        "created_at",
-        "stargazers_count",
-        "watchers_count",
-        "forks",
-        "open_issues_count",
-        "forks_count",
-    ]
-
-    # Get gh metadata for each package submission
-    all_repo_meta = {}
-    for package_name in repo_endpoints.keys():
-        print("Getting GitHub stats for", package_name)
-        package_api = repo_endpoints[package_name]
-        all_repo_meta[package_name] = issueProcess.get_repo_meta(package_api, gh_stats)
-
-        all_repo_meta[package_name]["contrib_count"] = issueProcess.get_repo_contribs(
-            package_api
-        )
-        all_repo_meta[package_name]["last_commit"] = issueProcess.get_last_commit(
-            package_api
-        )
-        # Add github meta to review metadata
-        web_reviews[package_name]["gh_meta"] = all_repo_meta[package_name]
-
-    with open("all_reviews.pickle", "wb") as f:
-        pickle.dump(web_reviews, f)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/src/pyosmeta/contributors.py b/src/pyosmeta/contributors.py
index 4944473..22e22aa 100644
--- a/src/pyosmeta/contributors.py
+++ b/src/pyosmeta/contributors.py
@@ -1,16 +1,171 @@
 import json
 import os
-from dataclasses import dataclass
-from typing import Dict, List, Optional, Tuple, Union
+import re
 
 import requests
+from dataclasses import dataclass
 from dotenv import load_dotenv
+from pydantic import (
+    AliasChoices,
+    BaseModel,
+    ConfigDict,
+    Field,
+    field_serializer,
+    field_validator,
+)
+from typing import List, Optional, Set, Tuple, Union
+
+
+class UrlValidatorMixin:
+    # Check fields is false given mixin is used by two diff classes
+    @field_validator(
+        "website", "documentation", mode="before", check_fields=False
+    )
+    @classmethod
+    def format_url(cls, url: str) -> str:
+        """Append https to the beginning of URL if it doesn't exist & cleanup
+        If the url doesn't have https add it
+        If the url starts with http change it to https
+        Else do nothing
+
+        Parameters
+        ----------
+        url : str
+            String representing the url grabbed from the GH api
+
+        """
+
+        if not url:
+            return url  # Returns empty string if url is empty
+        else:
+            if url.startswith("http://"):
+                print(f"{url} 'http://' replacing w 'https://'")
+                url = url.replace("http://", "https://")
+            elif not url.startswith("http"):
+                print("Oops, missing http")
+                url = "https://" + url
+        if cls._check_url(url=url):
+            return url
+        else:
+            return None
+
+    @staticmethod
+    def _check_url(url: str) -> bool:
+        """Test url. Return true if there's a valid response, False if not
+
+        Parameters
+        ----------
+        url : str
+            String for a url to a website to test.
+
+        """
+
+        try:
+            response = requests.get(url, timeout=6)
+            return response.status_code == 200
+        except Exception:
+            print("Oops, url", url, "is not valid, removing it")
+            return False
+
+
+class PersonModel(BaseModel, UrlValidatorMixin):
+    model_config = ConfigDict(
+        populate_by_name=True,
+        str_strip_whitespace=True,
+        validate_assignment=True,
+    )
+
+    name: Optional[str] = None
+    title: Optional[Union[list[str], str]] = None
+    sort: Optional[int] = None
+    bio: Optional[str] = None
+    organization: Optional[str] = Field(
+        None, validation_alias=AliasChoices("company")
+    )
+    github_username: str = Field(None, validation_alias=AliasChoices("login"))
+    github_image_id: int = Field(None, validation_alias=AliasChoices("id"))
+    deia_advisory: Optional[bool] = False
+    editorial_board: Optional[bool] = Field(
+        None, validation_alias=AliasChoices("editorial-board")
+    )
+    advisory: Optional[bool] = False
+    twitter: Optional[str] = Field(
+        None, validation_alias=AliasChoices("twitter_username")
+    )
+    mastodon: Optional[str] = Field(
+        None, validation_alias=AliasChoices("mastodon_username", "mastodon")
+    )
+    orcidid: Optional[str] = None
+    website: Optional[str] = Field(
+        None, validation_alias=AliasChoices("blog", "website")
+    )
+    board: Optional[bool] = False
+    contributor_type: Set[str] = set()
+    packages_editor: Set[str] = set()
+    packages_submitted: Set[str] = set()
+    packages_reviewed: Set[str] = set()
+    location: Optional[str] = None
+    email: Optional[str] = None
+
+    @field_validator(
+        "packages_reviewed",
+        "packages_submitted",
+        "packages_editor",
+        "contributor_type",
+        mode="before",
+    )
+    @classmethod
+    def convert_to_set(cls, value: list[str]):
+        if isinstance(value, list):
+            if not value:
+                return set()
+            elif value[0] is None:
+                return set()
+            else:
+                value = [a_val.lower() for a_val in value]
+                return set(value)
+        elif value is None:
+            return set()
+        return set(value.lower())
+
+    def add_unique_value(self, attr_name: str, values: Union[str, list[str]]):
+        """A helper that will add only unique values to an existing list"""
+        if isinstance(values, str):
+            values = [values]
+        attribute = getattr(self, attr_name)
+        if isinstance(attribute, set):
+            attribute.update(values)
+        else:
+            raise ValueError(f"{attr_name} is not a set attribute")
+
+    @field_serializer(
+        "packages_reviewed",
+        "packages_submitted",
+        "packages_editor",
+        "contributor_type",
+    )
+    def serialize_set(self, items: Set[str]):
+        """This is a serializer that runs on export. It ensures sets are
+        converted to lists"""
+        return sorted(list(items))
+
+    @field_validator("bio", mode="before")
+    @classmethod
+    def clean_strings(cls, string: str) -> str:
+        """This is a cleaning step that will remove spurious
+        characters from string fields.
+
+        """
+        if isinstance(string, str):
+            string = re.sub(r"[\r\n]", "", string)
+        return string
 
 
 @dataclass
 class ProcessContributors:
-    # When initializing how do you decide what should be an input
-    # attribute vs just something a method accepted when called?
+    """A class that contains some basic methods to support populating and
+    updating contributor data."""
+
     def __init__(self, json_files: List) -> None:
         """
         Parameters
@@ -38,15 +193,15 @@ def __init__(self, json_files: List) -> None:
         ]
 
         self.contrib_types = {
-            "reviewer_1": ["packages-reviewed", ["reviewer", "peer-review"]],
-            "reviewer_2": ["packages-reviewed", ["reviewer", "peer-review"]],
-            "editor": ["packages-editor", ["editor", "peer-review"]],
+            "reviewer_1": ["packages_reviewed", ["reviewer", "peer-review"]],
+            "reviewer_2": ["packages_reviewed", ["reviewer", "peer-review"]],
+            "editor": ["packages_editor", ["editor", "peer-review"]],
             "submitting_author": [
-                "packages-submitted",
+                "packages_submitted",
                 ["maintainer", "submitting-author", "peer-review"],
             ],
             "all_current_maintainers": [
-                "packages-submitted",
+                "packages_submitted",
                 ["maintainer", "peer-review"],
             ],
         }
@@ -63,124 +218,6 @@ def get_token(self) -> str:
         load_dotenv()
         return os.environ["GITHUB_TOKEN"]
 
-    def refresh_contribs(self, contribs: Dict, new_contribs, review_role):
-        """Need to add ....
-
-        Parameters
-        ----------
-
-
-        Returns
-        -------
-        """
-        contrib_types = self.contrib_types
-        contrib_key_yml = ""
-        # Contributor type will be updated which is a list of roles
-        if new_contribs:
-            contrib_key_yml = contrib_types[review_role][0]
-            existing_contribs = contribs[contrib_key_yml]
-        # Else this is a specific review role meant to update package list
-        else:
-            new_contribs = contrib_types[review_role][1]
-            existing_contribs = contribs["contributor_type"]
-
-        final_list = self.update_contrib_list(existing_contribs, new_contribs)
-        return (contrib_key_yml, final_list)
-
-    def create_contrib_template(self) -> Dict:
-        """A small helper that creates a template for a new contributor
-        that we are adding to our contributor.yml file"""
-
-        return {
-            "name": "",
-            "bio": "",
-            "organization": "",
-            "title": "",
-            "github_username": "",
-            "github_image_id": "",
-            "editorial-board": "",
-            "twitter": "",
-            "mastodon": "",
-            "orcidid": "",
-            "website": "",
-            "contributor_type": [],
-            "packages-editor": [],
-            "packages-submitted": [],
-            "packages-reviewed": [],
-            "location": "",
-            "email": "",
-        }
-
-    # TODO - This utility is used across all scripts.
-    def clean_list(self, a_list: Union[str, List[str]]) -> List[str]:
-        """Helper function that takes an input object as a list or string.
-        If it is a list containing none, it returns an empty list
-        if it is a string is returns the string as a list
-        removes 'None' if that is in the list. and returns
-            either an empty clean list of the list as is."""
-
-        if isinstance(a_list, str):
-            a_list = [a_list]
-        elif not a_list:
-            a_list = []
-        # Remove None from list
-        a_list = list(filter(lambda x: x, a_list))
-        return a_list
-
-    # TODO - There is likely a better way to do this. If it returns an
-    # empty list then we know there are no new vals... so it likely can
-    # return a single thing
-    def unique_new_vals(
-        self, a_list: List[str], a_item: List[str]
-    ) -> Tuple[bool, Optional[List[str]]]:
-        """Checks two objects either a list and string or two lists
-        and evaluates whether there are differences between them.
-
-        Returns
-        -------
-        Tuple
-            Containing a boolean representing whether there are difference
-            or not and a list containing new value if there are differences.
-
-        """
-
-        default = (False, None)
-        list_lower = [al.lower() for al in a_list]
-        item_lower = [ai.lower() for ai in a_item]
-        diff = list(set(item_lower) - set(list_lower))
-        if len(diff) > 0:
-            default = (True, diff)
-        return default
-
-    # TODO - also a helper used by all scripts
-    def update_contrib_list(
-        self,
-        existing_contribs: Union[List, str],
-        new_contrib: Union[List, str],
-    ) -> List:
-        """Method that gets an existing list of contribs.
-        cleans the list and then checks the list against a
-        new contribution to see if it should be added.
-
-        Parameters
-        ----------
-        existing_contribs: list or str
-            A users existing contributions
-        new_contrib: list or str
-            a list or a single new contribution to be added
-
-        """
-
-        # Cleanup first
-        cleaned_list = self.clean_list(existing_contribs)
-        new_contrib = self.clean_list(new_contrib)
-
-        unique_vals, new_vals = self.unique_new_vals(cleaned_list, new_contrib)
-        if unique_vals:
-            cleaned_list += new_vals
-
-        return cleaned_list
-
     def check_contrib_type(self, json_file: str):
         """
         Determine the type of contribution the person
@@ -211,22 +248,26 @@ def check_contrib_type(self, json_file: str):
             contrib_type = "community"
         return contrib_type
 
-    def check_add_user(self, gh_user: str, contribs: Dict[str, str]) -> None:
-        """Check to make sure user exists and if not, add them
+    # TODO possibly could repurpose this as a check in the code
+    # but it should return get_user_info
+    # def check_add_user(self, gh_user: str, contribs: Dict[str, str]) -> None:
+    #     """Check to make sure user exists in the existing contrib data. If
+    #     they
+    #     don't' exist, add them
 
-        Parameters
-        ----------
-        gh_user : str
-            github username
-        contribs: dict
-            A dictionary containing contributors with gh user being the key
+    #     Parameters
+    #     ----------
+    #     gh_user : str
+    #         github username
+    #     contribs: dict
+    #         A dictionary containing contributors with gh user being the key
 
-        This returns the updated dictionary with a new user at the end.
+    #     This returns the updated dictionary with a new user at the end.
 
-        """
-        if gh_user not in contribs.keys():
-            print("Missing user", gh_user, "adding them now.")
-            return self.add_new_user(gh_user)
+    #     """
+    #     if gh_user not in contribs.keys():
+    #         print("Missing user", gh_user, "adding them now.")
+    #         return self.get_user_info(gh_user)
 
     def load_json(self, json_path: str) -> dict:
         """
@@ -239,6 +280,7 @@ def load_json(self, json_path: str) -> dict:
             print(ae)
         return json.loads(response.text)
 
+    # TODO: check is i'm using the contrib type part of this method ?
     def process_json_file(self, json_file: str) -> Tuple[str, List]:
         """Deserialize a JSON file from a URL and cleanup data
 
@@ -278,6 +320,8 @@ def combine_json_data(self) -> dict:
         # Create an empty dictionary to hold the combined data
         combined_data = {}
 
+        # TODO: to make this faster, it might be better to return a dict
+        # with username : [contrib1, contrib2]
         for json_file in self.json_files:
             # Process the JSON file and add the data to the combined dictionary
             try:
@@ -287,22 +331,9 @@ def combine_json_data(self) -> dict:
                 print("Oops - can't process", json_file, e)
         return combined_data
 
-    def get_gh_usernames(self, contrib_data: List) -> List:
-        """Get a list of all gh usernames
-
-        Parameters
-        ----------
-        contrib_data : list
-            Dict containing all of the contributor information for the website.
-
-        """
-        all_usernames = []
-        for item in contrib_data:
-            all_usernames.append(item["github_username"])
-
-        return all_usernames
-
-    def get_user_info(self, username: str, aname: Optional[str] = None) -> dict:
+    def get_user_info(
+        self, username: str, aname: Optional[str] = None
+    ) -> dict:
         """
         Get a single user's information from their GitHub username using the
         GitHub API
@@ -314,6 +345,7 @@ def get_user_info(self, username: str, aname: Optional[str] = None) -> dict:
             Github username to retrieve data for
         aname : str default=None
             A user's name from the contributors.yml file.
+            https://docs.github.com/en/rest/users/users?apiVersion=2022-11-28#get-a-user
 
         Returns
         -------
@@ -327,7 +359,6 @@ def get_user_info(self, username: str, aname: Optional[str] = None) -> dict:
         # if message = Bad credentials
         response_json = response.json()
 
-        user_data = {}
         # TODO: make an attribute and call it here?
         update_keys = {
             "name": "name",
@@ -342,20 +373,9 @@ def get_user_info(self, username: str, aname: Optional[str] = None) -> dict:
             "github_username": "login",
         }
 
-        user_data[username] = {}
-        for akey in update_keys:
-            # If the key is name, check to see if there is name data
-            # already there. don't force update if there's a name!
-            if akey == "name":
-                if aname is None:
-                    user_data[username][akey] = response_json.get(
-                        update_keys[akey], None
-                    )
-                else:
-                    # Else just keep the original name
-                    user_data[username][akey] = aname
-            else:
-                user_data[username][akey] = response_json.get(update_keys[akey], None)
+        user_data = {}
+        for key in update_keys:
+            user_data[key] = response_json.get(update_keys[key], None)
 
         return user_data
 
@@ -423,7 +443,9 @@ def combine_users(self, repoDict: dict, webDict: dict) -> dict:
             if gh_user in webDict.keys():
                 # Return a list of updated contributor type keys and use it to
                 # update the web dict
-                webDict[gh_user]["contributor_type"] = self._update_contrib_type(
+                webDict[gh_user][
+                    "contributor_type"
+                ] = self._update_contrib_type(
                     webDict[gh_user]["contributor_type"],
                     repoDict[gh_user]["contributor_type"],
                 )
@@ -433,138 +455,3 @@ def combine_users(self, repoDict: dict, webDict: dict) -> dict:
                 print("New user found. Adding: ", gh_user)
                 webDict[gh_user] = repoDict[gh_user]
         return webDict
-
-    def add_new_user(self, gh_user: str) -> dict:
-        """Add a new user to the contrib file using gh username
-
-        This method does a few things.
-        1. Adds a new template entry for the user w no values populated
-        2. Gets user metadata from the user's github profile
-        3. Updates their contrib entry with the gh data
-
-        Parameters
-        ----------
-        gh_user : str
-            String representing the GitHub username
-
-        Returns
-        -------
-        Dict
-            Username is the key and the updated github profile info is contained
-            in the dict.
-
-        """
-
-        new = {}
-        new[gh_user] = self.create_contrib_template()
-        gh_data = self.get_gh_data([gh_user])
-        # Update their metadata in the dict and return
-        updated_data = self.update_contrib_data(new, gh_data)
-        return updated_data
-
-    def get_gh_data(self, contribs: Union[Dict[str, str], List]) -> dict[str, str]:
-        """Parses through each GitHub username and hits the GitHub
-        API to grab user information.
-
-        Parameters
-        ----------
-        contribs : dict
-            Dict containing all current contrib info
-
-        Returns
-        -------
-            Dict
-            A dict of updated user data via a list of github usernames
-        """
-        all_user_info = {}
-        for gh_user in contribs:
-            print("Getting github data for: ", gh_user)
-            # If the user already has a name in the dict, don't update
-            # Important to allow us to update names to ensure correct spelling,
-            # etc on website
-            if isinstance(contribs, list):
-                aname = None
-            else:
-                aname = contribs[gh_user]["name"]
-
-            all_user_info[gh_user] = self.get_user_info(gh_user, aname)
-        return all_user_info
-
-    def _check_url(self, url: str) -> bool:
-        """Test a url and return true if it works, false if not
-
-        Parameters
-        ----------
-        url : str
-            String for a url to a website to test.
-
-        """
-
-        try:
-            response = requests.get(url, timeout=6)
-            return response.status_code == 200
-        except:
-            print("Oops, url", url, "is not valid, removing it")
-            return False
-
-    def update_contrib_data(self, contrib_data: dict, gh_data: dict):
-        """Update contributor data from the GH API return.
-
-        Use the GitHub API to grab user profile data such as twitter handle,
-        mastodon, website, email and location and update contributor
-        information. GitHub profile data is the source of truth source for
-        contributor metadata.
-
-        Parameters
-        ----------
-        contrib_data : dict
-            A dict containing contributor data to be updated
-        gh_data : dict
-            Updated contributor data pulled from github API
-
-        Returns
-        -------
-        dict
-            Dictionary containing updated contributor data.
-        """
-
-        for i, gh_name in enumerate(contrib_data.keys()):
-            print(i, gh_name)
-            # Update the key:value pairs for data pulled from GitHub
-            for akey in self.update_keys:
-                if akey == "website":
-                    url = gh_data[gh_name][gh_name][akey]
-                    # Fix the url format and check to see if it works online
-                    url = self.format_url(url)
-                    # It url is valid, add to dict
-                    if self._check_url(url):
-                        contrib_data[gh_name][akey] = url
-                    else:
-                        contrib_data[gh_name][akey] = ""
-                else:
-                    contrib_data[gh_name][akey] = gh_data[gh_name][gh_name][akey]
-
-        return contrib_data
-
-    def format_url(self, url: str) -> str:
-        """Append https to the beginning of URL if it doesn't exist
-        If the url doesn't have https add it
-        If the url starts with http change it to https
-        Else do nothing
-
-        Parameters
-        ----------
-        url : str
-            String representing the url grabbed from the GH api
-
-        """
-        if not url:
-            return url  # returns empty string if url is empty
-        elif url.startswith("https://"):
-            return url
-        elif url.startswith("http://"):
-            print("Fixing", url, "https://" + url[7:])
-            return "https://" + url[7:]
-        else:
-            print("Missing https://, adding to ", url)
-            return "https://" + url
diff --git a/src/pyosmeta/file_io.py b/src/pyosmeta/file_io.py
index 9c7c7da..8521477 100644
--- a/src/pyosmeta/file_io.py
+++ b/src/pyosmeta/file_io.py
@@ -1,8 +1,8 @@
 import pickle
 import urllib.request
-from typing import Dict, List, Optional, Tuple, Union
 
 import ruamel.yaml
+from typing import Dict, List, Union
 
 
 def load_pickle(filename):
@@ -27,6 +27,25 @@ def _list_to_dict(a_list: List, a_key: str) -> Dict:
     return {a_dict[a_key].lower(): a_dict for a_dict in a_list}
 
 
+def create_paths(repos: Union[list[str], str]) -> Union[list[str], str]:
+    """ """
+    base_url = "https://raw.githubusercontent.com/pyOpenSci/"
+    end_url = "/main/.all-contributorsrc"
+    repos = [
+        "python-package-guide",
+        "software-peer-review",
+        "pyopensci.github.io",
+        "software-review",
+        "update-web-metadata",
+    ]
+    if isinstance(repos, list):
+        all_paths = [base_url + repo + end_url for repo in repos]
+    else:
+        all_paths = base_url + repos + end_url
+
+    return all_paths
+
+
 def load_website_yml(key: str, url: str):
     """
     This opens a website contrib yaml file and turns it in a
@@ -37,28 +56,6 @@ def load_website_yml(key: str, url: str):
     return _list_to_dict(yml_list, key)
 
 
-# def dict_to_list(pyos_meta: Dict[str, Union[str, List[str]]]) -> List[Dict]:
-#     """Turn dict into list for parsing to jekyll friendly yaml
-
-#     Parameters
-#     ----------
-#     pyos_meta : Dict
-#         A dictionary containing metadata for pyos contributors or review issues
-
-#     Returns
-#     -------
-#     List
-#         A list of dictionaries containing pyos metadata for contribs or reviews
-
-#     """
-#     print("a")
-#     # Turn dict into list for parsing
-#     return [pyos_meta[key] for key in pyos_meta]
-#     # for key in pyos_meta:
-#     #     final_contribs.append(pyos_meta[key])
-#     # return final_contribs
-
-
 def open_yml_file(file_path: str) -> dict:
     """Open & deserialize YAML file to dictionary.
 
@@ -74,8 +71,11 @@ def open_yml_file(file_path: str) -> dict:
 
     # TODO: this used to be self.web_yml so i'll need to reorganized
     # the contrib class
-    with urllib.request.urlopen(file_path) as f:
-        return ruamel.yaml.safe_load(f)
+    try:
+        with urllib.request.urlopen(file_path) as f:
+            return ruamel.yaml.safe_load(f)
+    except urllib.error.URLError as url_error:
+        print("Oops - can find the url", file_path, url_error)
 
 
 def export_yaml(filename: str, data_list: list):
@@ -109,7 +109,7 @@ def export_yaml(filename: str, data_list: list):
 # function created
 def clean_string(astr: str) -> str:
     """
-    Clean a string by removing occurrences of strings starting with "*id0" and "[]".
+    Clean - remove strings starting with "*id0" and "[]".
 
     Parameters
     ----------
@@ -139,7 +139,7 @@ def clean_yaml_file(filename):
     with open(filename, "r") as f:
         lines = f.readlines()
 
-    # TODO: regex would be cleaner here - https://stackoverflow.com/questions/27064964/python-replace-all-words-start-with
+    # TODO: regex would be cleaner here
     cleaned_lines = []
     for i, line in enumerate(lines):
         if i == 0 and line.startswith("  "):
@@ -156,7 +156,9 @@ def clean_yaml_file(filename):
         f.write(cleaned_text)
 
 
-def clean_export_yml(a_dict: Dict[str, Union[str, List[str]]], filename: str) -> None:
+def clean_export_yml(
+    a_dict: Dict[str, Union[str, List[str]]], filename: str
+) -> None:
     """Inputs a dictionary with keys - contribs or packages.
     It then converse to a list for export, and creates a cleaned
     YAML file that is jekyll friendly
@@ -173,12 +175,7 @@ def clean_export_yml(a_dict: Dict[str, Union[str, List[str]]], filename: str) ->
     None
         Outputs a yaml file with the input name containing the pyos meta
     """
-    # TODO: why doesn't .values() work here? it returns a representation
-    # error.
-    # final_data = []
-    # for key in a_dict:
-    #     final_data.append(a_dict[key])
-    # print("sdf")
+
     # Export to yaml
-    export_yaml(filename, list(a_dict.values()))
+    export_yaml(filename, a_dict)
     clean_yaml_file(filename)
diff --git a/src/pyosmeta/parse_issues.py b/src/pyosmeta/parse_issues.py
index 61d5953..8267998 100644
--- a/src/pyosmeta/parse_issues.py
+++ b/src/pyosmeta/parse_issues.py
@@ -1,13 +1,160 @@
-from dataclasses import dataclass
+import re
 from datetime import datetime
 
 import requests
+from dataclasses import dataclass
+from pydantic import (
+    AliasChoices,
+    BaseModel,
+    ConfigDict,
+    Field,
+    field_validator,
+)
+from typing import Any, Optional
+
+from pyosmeta.contributors import ProcessContributors, UrlValidatorMixin
+
+
+def clean_date(a_date: Optional[str]) -> str:
+    """Cleans up a datetime from github and returns a date string
+
+    In some cases the string is manually entered month-day-year and in
+    others it's a gh time stamp. finally sometimes it could be missing
+    or text. handle all of those cases with this validator.
+    """
+
+    if a_date is None or a_date == "missing":
+        return "missing"
+    # elif len(a_date) < 11:
+    #     new_date = a_date.replace("/", "-").split("-")
+    #     return f"{new_date[0]}-{new_date[1]}-{new_date[2]}"
+    else:
+        try:
+            return (
+                datetime.strptime(a_date, "%Y-%m-%dT%H:%M:%SZ")
+                .date()
+                .strftime("%Y-%m-%d")
+            )
+        except TypeError as t_error:
+            print("Oops - missing data. Setting date to missing", t_error)
+            return "missing"
+
+
+class GhMeta(BaseModel, UrlValidatorMixin):
+    name: str
+    description: str
+    created_at: str
+    stargazers_count: int
+    watchers_count: int
+    forks: int
+    open_issues_count: int
+    forks_count: int
+    documentation: Optional[str]  # Jointly is missing documentation
+    contrib_count: int
+    last_commit: str
+
+    @field_validator(
+        "last_commit",
+        "created_at",
+        mode="before",
+    )
+    @classmethod
+    def clean_date(cls, a_date: Optional[str]) -> str:
+        """Cleans up a datetime from github and returns a date string
+
+        Runs the general clean_date function in this module as a validator.
+        """
+
+        return clean_date(a_date)
+
+
+class ReviewModel(BaseModel):
+    # Make sure model populates both aliases and original attr name
+    model_config = ConfigDict(
+        populate_by_name=True,
+        str_strip_whitespace=True,
+        validate_assignment=True,
+    )
+
+    package_name: Optional[str] = ""
+    package_description: str = Field(
+        "", validation_alias=AliasChoices("one-line_description_of_package")
+    )
+    submitting_author: dict[str, Optional[str]] = {}
+    all_current_maintainers: list[dict[str, str | None]] = {}
+    repository_link: Optional[str] = None
+    version_submitted: Optional[str] = None
+    categories: Optional[list[str]] = None
+    editor: dict[str, str | None] = {}
+    reviewer_1: dict[str, str | None] = {}
+    reviewer_2: dict[str, str | None] = {}
+    archive: Optional[str] = None
+    version_accepted: Optional[str] = None
+    date_accepted: Optional[str] = None
+    created_at: str = None
+    updated_at: str = None
+    closed_at: Optional[str] = None
+    issue_link: str = None
+    joss: Optional[str] = None
+    gh_meta: Optional[GhMeta] = None
+
+    @field_validator(
+        "date_accepted",
+        mode="before",
+    )
+    @classmethod
+    def clean_date_review(cls, a_date: Optional[str]) -> str:
+        """Clean a manually added datetime that is added to a review by an
+        editor when the review package is accepted.
+
+        """
+        if a_date is None or a_date in ["missing", "TBD"]:
+            return "missing"
+        else:
+            new_date = a_date.replace("/", "-").split("-")
+            if len(new_date[0]) == 4:
+                return f"{new_date[0]}-{new_date[1]}-{new_date[2]}"
+            else:
+                return f"{new_date[2]}-{new_date[0]}-{new_date[1]}"
+
+    @field_validator(
+        "created_at",
+        "updated_at",
+        "closed_at",
+        mode="before",
+    )
+    @classmethod
+    def clean_date(cls, a_date: Optional[str]) -> str:
+        """Cleans up a datetime from github and returns a date string
+
+        Runs the general clean_date function in this module as a validator.
+
+        """
+
+        return clean_date(a_date)
+
+    @field_validator(
+        "editor",
+        "reviewer_1",
+        "reviewer_2",
+        mode="before",
+    )
+    @classmethod
+    def clean_gh_url(cls, user: dict[str, str]) -> dict[str, str]:
+        """Remove markdown link remnants from gh usernames and name.
+
+        Sometimes editors and reviewers add names using github links.
+        Remove the link data.
+        """
+
+        user["github_username"] = user["github_username"].replace(
+            "https://github.com/", ""
+        )
+        user["name"] = re.sub(r"\[|\]", "", user["name"])
 
-from pyosmeta.contributors import ProcessContributors
+        return user
 
 
-# main reason to use this is attributes .. avoiding them being changed
-# in other instances...
 @dataclass
 class ProcessIssues:
     """
@@ -17,7 +164,6 @@ class ProcessIssues:
 
     """
 
-    # TODO: turn file io into functions and remove inheritance here
     def __init__(self, org, repo_name, label_name):
         """
         More here...
@@ -41,9 +187,25 @@ def __init__(self, org, repo_name, label_name):
 
         self.GITHUB_TOKEN = self.contrib_instance.get_token()
 
+    gh_stats = [
+        "name",
+        "description",
+        "homepage",
+        "created_at",
+        "stargazers_count",
+        "watchers_count",
+        "forks",
+        "open_issues_count",
+        "forks_count",
+    ]
+
     @property
     def api_endpoint(self):
-        return f"https://api.github.com/repos/{self.org}/{self.repo_name}/issues?labels={self.label_name}&state=all"
+        url = (
+            f"https://api.github.com/repos/{self.org}/{self.repo_name}/"
+            f"issues?labels={self.label_name}&state=all"
+        )
+        return url
 
     # Set up the API endpoint
     def _get_response(self):
@@ -106,7 +268,7 @@ def _get_line_meta(self, line_item: list[str]) -> dict[str, object]:
         line_item : list
             A single list item representing a single line in the issue
             containing metadata for the review.
-            This comment is the metadata for the review that the author fills out.
+            This comment is metadata for the review that the author fills out.
 
         Returns
         -------
@@ -126,7 +288,7 @@ def _get_line_meta(self, line_item: list[str]) -> dict[str, object]:
                     # Add each maintainer to the dict
                     user = aname.split("@")
                     # Clean
-                    user = [self._clean_name(l) for l in user]
+                    user = [self._clean_name(a_str) for a_str in user]
                     a_maint = {
                         "name": self._clean_name(user[0]),
                         "github_username": self._clean_name(user[1]),
@@ -152,7 +314,7 @@ def _get_line_meta(self, line_item: list[str]) -> dict[str, object]:
         return meta
 
     def parse_issue_header(
-        self, issues: list[str], total_lines: int = 15
+        self, issues: list[str], total_lines: int = 20
     ) -> dict[str, str]:
         """
         A function that parses through the header of an issue.
@@ -165,7 +327,7 @@ def parse_issue_header(
             metadata at the top of each issue
         total_lines : int
             an integer representing the total number of lines to parse in the
-            issue header. Default = 15
+            issue header. Default = 20
 
         Returns
         -------
@@ -174,78 +336,54 @@ def parse_issue_header(
             package name, description, review team, version submitted etc.
             See key_order below for the full list of keys.
         """
-        # Reorder data
-        key_order = [
-            "package_name",
-            "package_description",
-            "submitting_author",
-            "all_current_maintainers",
-            "repository_link",
-            "version_submitted",
-            "categories",
-            "editor",
-            "reviewer_1",
-            "reviewer_2",
-            "archive",
-            "version_accepted",
-            "date_accepted",
-            "created_at",
-            "updated_at",
-            "closed_at",
-            "issue_link",
-        ]
+
         meta_dates = ["created_at", "updated_at", "closed_at"]
 
         review = {}
         for issue in issues:
-            package_name, body_data = self.parse_comment(issue)
-            if not package_name:
+            pkg_name, body_data = self.parse_comment(issue)
+            if not pkg_name:
                 continue
             # Index of 15 should include date accepted in the review meta
-            issue_meta = self.get_issue_meta(body_data, total_lines)
-            # Add issue open and close date to package meta
-            # Created, opened & closed dates are in GitHub Issue response
+            review[pkg_name] = self.get_issue_meta(body_data, total_lines)
+            # Add issue open and close date to package meta from GH response
+            # Date cleaning happens via pydantic validator not here
             for a_date in meta_dates:
-                issue_meta[a_date] = self._clean_date(issue[a_date])
-
-            # Date accepted is a manually added value. Fix format separately
-            # Using dashes because it's jekyll friendly
-            try:
-                the_date = issue_meta["date_accepted"].replace("/", "-").split("-")
-                if the_date[0] == "TBD":
-                    continue
-                else:
-                    issue_meta[
-                        "date_accepted"
-                    ] = f"{the_date[2]}-{the_date[0]}-{the_date[1]}"
-            except KeyError as ke:
-                print("Oops,", package_name, "is missing date_accepted key.")
-            # Clean markdown url's from editor, and reviewer lines
-            types = ["editor", "reviewer_1", "reviewer_2"]
-            user_values = ["github_username", "name"]
-            for a_type in types:
-                for user_value in user_values:
-                    issue_meta[a_type][user_value] = (
-                        issue_meta[a_type][user_value]
-                        .replace("https://github.com/", "")
-                        .replace("[", "")
-                        .replace("]", "")
-                    )
-
-            review[package_name] = issue_meta
-            review[package_name]["categories"] = self.get_categories(body_data)
-            review[package_name]["issue_link"] = issue["url"].replace(
+                review[pkg_name][a_date] = issue[a_date]
+            # Get categories and issue review link
+            review[pkg_name]["categories"] = self.get_categories(body_data)
+            review[pkg_name]["issue_link"] = issue["url"].replace(
                 "https://api.github.com/repos/", "https://github.com/"
             )
-            # Rename package description & reorder keys
-            review[package_name]["package_description"] = review[package_name].pop(
-                "one-line_description_of_package", ""
-            )
-            review[package_name] = {
-                key: review[package_name][key]
-                for key in key_order
-                if review[package_name].get(key)
+
+            review_clean = {
+                key: value
+                for key, value in review[pkg_name].items()
+                if not key.startswith("##")
+                and not key.startswith("---")
+                and not key.startswith("-_[x]_i_agree")
             }
+            review[pkg_name] = review_clean
+            # filtered = {}
+            # for key, value in review.items():
+            #     print(key)
+            #     if not key.startswith("##") and not key.startswith("-"):
+            #         filtered[key] = value
+
+            # # Clean markdown url's from editor, and reviewer lines
+            # TODO - this could be a reviewer name cleanup validaotr
+            # types = ["editor", "reviewer_1", "reviewer_2"]
+            # user_values = ["github_username", "name"]
+            # for a_type in types:
+            #     for user_value in user_values:
+            #         issue_meta[a_type][user_value] = (
+            #             issue_meta[a_type][user_value]
+            #             .replace("https://github.com/", "")
+            #             .replace("[", "")
+            #             .replace("]", "")
+            #         )
+
+            # review[pkg_name] = issue_meta
 
         return review
 
@@ -260,11 +398,11 @@ def get_issue_meta(
         Parameters
         ----------
         body_data : list
-            A list containing all of the body data for the top comment in an issue.
+            A list containing all body data for the top comment in an issue.
         end_range : int
-            The number of lines to parse at the top of the issue (this may change
-            over time so this variable allows us to have different processing
-            based upon the date of the issue being opened)
+            The number of lines to parse at the top of the issue (this may
+            change over time so this variable allows us to have different
+            processing based upon the date of the issue being opened)
 
         Returns
         -------
@@ -279,7 +417,9 @@ def get_issue_meta(
 
         return issue_meta
 
-    def get_repo_endpoints(self, review_issues: dict[str, str]) -> dict[str, str]:
+    def get_repo_endpoints(
+        self, review_issues: dict[str, str]
+    ) -> dict[str, str]:
         """
         Returns a list of repository endpoints
 
@@ -291,7 +431,7 @@ def get_repo_endpoints(self, review_issues: dict[str, str]) -> dict[str, str]:
         Returns
         -------
             Dict
-                Containing package_name: endpoint for each review.
+                Containing pkg_name: endpoint for each review.
 
         """
 
@@ -299,7 +439,13 @@ def get_repo_endpoints(self, review_issues: dict[str, str]) -> dict[str, str]:
         for a_package in review_issues.keys():
             repo = review_issues[a_package]["repository_link"].strip("/")
             owner, repo = repo.split("/")[-2:]
-            all_repos[a_package] = f"https://api.github.com/repos/{owner}/{repo}"
+            # TODO: could be simpler code - Remove any link remnants
+            pattern = r"[\(\)\[\]?]"
+            owner = re.sub(pattern, "", owner)
+            repo = re.sub(pattern, "", repo)
+            all_repos[
+                a_package
+            ] = f"https://api.github.com/repos/{owner}/{repo}"
         return all_repos
 
     def parse_comment(self, issue: dict[str, str]) -> tuple[str, list[str]]:
@@ -315,49 +461,70 @@ def parse_comment(self, issue: dict[str, str]) -> tuple[str, list[str]]:
 
         Returns
         -------
-            package_name : str
+            pkg_name : str
                 The name of the package
             comment : list
                 A list containing the comment elements in order
         """
 
-        # TODO: this var isn't used
-        comments_url = issue["comments_url"]
         body = issue["body"]
-        # Here sometimes the lines are split with \n, others \r\n
-        # To clean split on \n but may have to remove the \r
+        # Clean line breaks (could be done with a regex too)
         lines = body.split("\n")
         lines = [a_line.strip("\r").strip() for a_line in lines]
         # Some users decide to hold the issue titles.
         # For those, clean the markdown bold ** element
-        lines = [line.replace("**", "").strip() for line in lines if line.strip() != ""]
+        lines = [
+            line.replace("**", "").strip()
+            for line in lines
+            if line.strip() != ""
+        ]
         # You need a space after : or else it will break https:// in two
         body_data = [line.split(": ") for line in lines if line.strip() != ""]
 
         # Loop through issue header and grab relevant review metadata
         name_index = next(
-            (i for i, sublist in enumerate(body_data) if sublist[0] == "Package Name"),
+            (
+                i
+                for i, sublist in enumerate(body_data)
+                if sublist[0] == "Package Name"
+            ),
             None,
         )
 
-        package_name = body_data[name_index][1] if name_index else None
+        pkg_name = body_data[name_index][1] if name_index else None
 
-        return package_name, body_data
+        return pkg_name, body_data
 
-    def _clean_date(self, date: str) -> str:
-        """Cleans up a datetime  from github and returns a date string"""
+    def get_gh_metrics(
+        self,
+        endpoints: dict[str, str],
+        reviews: dict[str, dict[str, Any]],
+    ) -> dict[str, dict[str, Any]]:
+        """
+        Get GitHub metrics for each review based on provided endpoints.
 
-        try:
-            date_clean = (
-                datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ")
-                .date()
-                .strftime("%Y-%m-%d")
-            )
-        except:
-            print("Oops - i need a string to process date")
-            print("setting date to missing")
-            date_clean = "missing"
-        return date_clean
+        Parameters:
+        ----------
+        endpoints : dict
+            A dictionary mapping package names to their GitHub URLs.
+        reviews : dict
+            A dictionary containing review data.
+
+        Returns:
+        -------
+        dict
+            Updated review data with GitHub metrics.
+        """
+        pkg_meta = {}
+        for pkg_name, url in endpoints.items():
+            pkg_meta[pkg_name] = self.get_repo_meta(url, self.gh_stats)
+
+            pkg_meta[pkg_name]["contrib_count"] = self.get_repo_contribs(url)
+            pkg_meta[pkg_name]["last_commit"] = self.get_last_commit(url)
+            # Add github meta to review metadata
+            reviews[pkg_name]["gh_meta"] = pkg_meta[pkg_name]
+
+        return reviews
 
     def get_repo_meta(self, url: str, stats_list: list) -> dict:
         """
@@ -365,8 +532,7 @@ def get_repo_meta(self, url: str, stats_list: list) -> dict:
 
         """
         stats_dict = {}
-        # Small script to get the url (normally the docs) and description of a repo!
-        print(url)
+        # Get the url (normally the docs) and description of a repo!
         response = requests.get(
             url, headers={"Authorization": f"token {self.GITHUB_TOKEN}"}
         )
@@ -387,7 +553,9 @@ def get_repo_meta(self, url: str, stats_list: list) -> dict:
             for astat in stats_list:
                 stats_dict[astat] = data[astat]
             stats_dict["documentation"] = stats_dict.pop("homepage")
-            stats_dict["created_at"] = self._clean_date(stats_dict["created_at"])
+            # stats_dict["created_at"] = self._clean_date(
+            #     stats_dict["created_at"]
+            # )
 
         return stats_dict
 
@@ -404,7 +572,7 @@ def get_repo_contribs(self, url: str) -> dict:
         )
 
         if response.status_code == 404:
-            print("Can't find: ", url, ". Did the repo url change?")
+            print("Can't find: ", repo_contribs, ". Did the repo url change?")
         # Extract the description and homepage URL from the JSON response
         else:
             return len(response.json())
@@ -422,59 +590,48 @@ def get_last_commit(self, repo: str) -> str:
         response = requests.get(
             url, headers={"Authorization": f"token {self.GITHUB_TOKEN}"}
         ).json()
-        date = (
-            response[0]["commit"]["author"]["date"]
-            # if 0 in response
-            # else "1970-01-01T00:00:00Z"
-        )
+        date = response[0]["commit"]["author"]["date"]
 
-        return self._clean_date(date)
+        return date
 
     def get_categories(
-        self, issue_body_list: list[list[str]], fmt: bool = True
+        self, issue_list: list[list[str]], fmt: bool = True
     ) -> list[str]:
         """Parse through a pyOS review issue and grab categories associated
         with a package
 
         Parameters
         ----------
-        issue_body_list : list[list[str]]
-            The first comment from the issue split into lines and then the lines split as by self.parse_comment()
+        issue_list : list[list[str]]
+            The first comment from the issue split into lines and then the
+            lines split as by self.parse_comment()
 
         fmt : bool
-            Applies some formatting changes to the categories to match what is required for the website.
+            Applies some formatting changes to the categories to match what is
+            required for the website.
         """
         # Find the starting index of the category section
-        start_index = None
-        for i in range(len(issue_body_list)):
-            if issue_body_list[i][0].startswith("- Please indicate which"):
-                start_index = i + 1
-                break
-        # NOTE - some issues have line after that startswith "Check out our"
-        # For those issues advance i += 1
-        if issue_body_list[start_index][0].startswith("Check out our"):
-            start_index += 1
-
-        if start_index is None:
-            # If we couldn't find the starting index, return an empty list
-            return []
-
-        # Iterate through the lines starting at the starting index and grab the relevant text
-        cat_matches = ["[x]", "[X]"]
-        categories: list[str] = []
-        for i in range(start_index, len(issue_body_list)):  # 30):
-            line = issue_body_list[i][0].strip()
-            checked = any([x in line for x in cat_matches])
-
-            if line.startswith("- [") and checked:
-                category = line[line.index("]") + 2 :]
-                categories.append(category)
-            elif not line.startswith("- ["):
-                break
-
-        if fmt:
-            categories = [c.lower().replace(" ", "-") for c in categories]
-        return categories
-
-
-# https://api.github.com/repos/pyopensci/python-package-guide/commits
+        try:
+            index = next(
+                i
+                for i, sublist in enumerate(issue_list)
+                if "## Scope" in sublist
+            )
+            # Iterate from scope index to first line starting with " - ["
+            # To find list of category check boxes
+            for i in range(index + 1, len(issue_list)):
+                if issue_list[i] and issue_list[i][0].startswith("- ["):
+                    cat_index = i
+                    break
+        except StopIteration:
+            print("'## Scope' not found in the list.")
+
+        # Get checked categories for package
+        cat_list = issue_list[cat_index : cat_index + 10]
+        categories = [
+            re.sub(r"- \[[xX]\] ", "", item[0])
+            for item in cat_list
+            if re.search(r"- \[[xX]\] ", item[0])
+        ]
+
+        return [item.lower().replace("[^1]", "") for item in categories]