Skip to content

Commit

Permalink
feat: Update requirements and add GitHub client for organization and …
Browse files Browse the repository at this point in the history
…repository management (#5)

feat: Update child pages for each github org to include details about each org for searchability
  • Loading branch information
venkatamutyala authored Nov 21, 2024
1 parent a217c2f commit a19230a
Show file tree
Hide file tree
Showing 4 changed files with 201 additions and 58 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Generates dynamic documentation of all our GitHub Organizations and their descri
The application requires three environment variables to be set. You can create a `.env` file in the root directory of the project with the following content:

```env
export GITHUB_TOKEN=<Uses a PAT> #Assumes that all orgs a user is part of is an ORG that we own.
export GITHUB_TOKEN=<Uses a PAT> #Assumes that all orgs a user is part of is an ORG that we own. Permissions required: **full** `repo` scope and org `read:org` scope
export GETOUTLINE_DOCUMENT_ID=<This is usually at the end of the document in the URL of the document you want to update>
export GETOUTLINE_API_TOKEN=<Token is tied to a user account in GETOUTLINE>
```
131 changes: 131 additions & 0 deletions app/github.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import requests
import os
import glueops.setup_logging

LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
logger = glueops.setup_logging.configure(level=LOG_LEVEL)

class GitHubClient:
def __init__(self, github_token, github_api_url):
self.github_token = github_token
self.github_api_url = github_api_url

def get_organizations(self):
logger.debug("Fetching organizations from GitHub API.")
headers = {
'Authorization': f'token {self.github_token}',
'Accept': 'application/vnd.github.v3+json',
}
organizations = []
url = f"{self.github_api_url}/user/orgs"

try:
while url:
response = requests.get(url, headers=headers)
response.raise_for_status()
organizations.extend(response.json())
logger.debug(f"Fetched {len(response.json())} organizations.")

# Check for pagination
links = response.headers.get('Link')
if links:
next_link = None
for link in links.split(','):
if 'rel="next"' in link:
next_link = link[link.find('<') + 1:link.find('>')]
break
url = next_link
else:
url = None

logger.debug("All organizations fetched successfully.")
return organizations
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching organizations: {e}")
raise


def generate_markdown(github_orgs):
logger.debug("Generating markdown for organizations.")
markdown_content = "> This page is automatically generated. Any manual changes will be lost. See: https://github.com/GlueOps/getoutline-docs-update-github \n\n"
markdown_content = "# Full list of GitHub Organizations\n\n"
markdown_content += "| Organization Name | Description |\n"
markdown_content += "|-------------------|-------------|\n"

for org in github_orgs:
name = org['login']
url = f"https://github.com/{org['login']}"
description = org.get('description', 'No description available.')
markdown_content += f"| [{name}]({url}) | {description} |\n"

logger.debug("Markdown generation completed.")
return markdown_content


def get_repositories(self, org_login):
logger.debug(f"Fetching repositories for organization: {org_login}")
headers = {
'Authorization': f'token {self.github_token}',
'Accept': 'application/vnd.github.v3+json',
}
repositories = []
url = f"{self.github_api_url}/orgs/{org_login}/repos"

try:
while url:
response = requests.get(url, headers=headers)
response.raise_for_status()
repositories.extend(response.json())
logger.debug(f"Fetched {len(response.json())} repositories for organization: {org_login}")

# Check for pagination
links = response.headers.get('Link')
if links:
next_link = None
for link in links.split(','):
if 'rel="next"' in link:
next_link = link[link.find('<') + 1:link.find('>')]
break
url = next_link
else:
url = None

return repositories
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching repositories for organization {org_login}: {e}")
raise

def get_repository_topics(self, org_login, repo_name):
logger.debug(f"Fetching topics for repository: {org_login}/{repo_name}")
headers = {
'Authorization': f'token {self.github_token}',
'Accept': 'application/vnd.github.mercy-preview+json',
}
url = f"{self.github_api_url}/repos/{org_login}/{repo_name}/topics"

try:
response = requests.get(url, headers=headers)
response.raise_for_status()
topics = response.json().get('names', [])
logger.debug(f"Fetched topics for repository: {org_login}/{repo_name}")
return topics
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching topics for repository {org_login}/{repo_name}: {e}")
raise

def generate_markdown_for_org(self, org_login):
logger.debug(f"Generating markdown for organization: {org_login}")
markdown_content = f"# Repositories for {org_login}\n\n"
markdown_content += "| Repository | Description | Topics |\n"
markdown_content += "|------------|-------------|--------|\n"

org_repos = self.get_repositories(org_login)
for repo in org_repos:
repo_name = repo['name']
repo_description = repo.get('description', 'No description available.')
repo_topics = self.get_repository_topics(org_login, repo_name)
topics_str = ', '.join(repo_topics)
markdown_content += f"| [{repo_name}](https://github.com/{org_login}/{repo_name}) | {repo_description} | {topics_str} |\n"

logger.debug(f"Markdown generation completed for organization: {org_login}")
return markdown_content
123 changes: 67 additions & 56 deletions app/main.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import requests
import os
import glueops.setup_logging


import glueops.getoutline
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
from github import GitHubClient

GITHUB_API_URL = "https://api.github.com"
GETOUTLINE_API_URL = "https://app.getoutline.com"

# Environment Variables
REQUIRED_ENV_VARS = [
"GITHUB_TOKEN",
Expand All @@ -18,7 +21,14 @@
}

def get_env_variable(var_name: str, default=None):
"""Retrieve environment variable or return default if not set."""
"""
Retrieve environment variable or return default if not set.
:param var_name: Name of the environment variable.
:param default: Default value if the environment variable is not set.
:return: Value of the environment variable or default.
:raises EnvironmentError: If a required environment variable is not set.
"""
value = os.getenv(var_name, default)
if var_name in REQUIRED_ENV_VARS and value is None:
logger.error(f"Environment variable '{var_name}' is not set.")
Expand Down Expand Up @@ -47,64 +57,65 @@ def get_env_variable(var_name: str, default=None):
logger.critical(f"Environment setup failed: {env_err}")
raise

def get_organizations():
logger.debug("Fetching organizations from GitHub API.")
headers = {
'Authorization': f'token {GITHUB_TOKEN}',
'Accept': 'application/vnd.github.v3+json',
}
try:
response = requests.get(f"{GITHUB_API_URL}/user/orgs", headers=headers)
response.raise_for_status()
logger.debug("Organizations fetched successfully.")
return response.json()
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching organizations: {e}")
return []
@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=60, max=120), retry=retry_if_exception_type(requests.exceptions.RequestException))
def retry_create_document(client, parent_id, title, text):
"""
Retry creating a document in Outline.
:param client: GetOutlineClient instance.
:param parent_id: Parent document ID.
:param title: Title of the new document.
:param text: Content of the new document.
:return: Result of the create_document method.
"""
return client.create_document(parent_id, title, text)

def generate_markdown(orgs):
logger.debug("Generating markdown for organizations.")
markdown_content = "> This page is automatically generated. Any manual changes will be lost. See: https://github.com/GlueOps/getoutline-docs-update-github \n\n"
markdown_content = "# Full list of GitHub Organizations\n\n"
markdown_content += "| Organization Name | Description |\n"
markdown_content += "|-------------------|-------------|\n"

for org in orgs:
name = org['login']
url = f"https://github.com/{org['login']}"
description = org.get('description', 'No description available.')
markdown_content += f"| [{name}]({url}) | {description} |\n"

logger.debug("Markdown generation completed.")
return markdown_content
@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=60, max=120), retry=retry_if_exception_type(requests.exceptions.RequestException))
def retry_update_document(client, text):
"""
Retry updating a document in Outline.
def update_document(markdown_text):
logger.debug("Updating document on Outline.")
url = "https://app.getoutline.com/api/documents.update"
payload = {
"id": GETOUTLINE_DOCUMENT_ID,
"text": markdown_text
}
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {GETOUTLINE_API_TOKEN}"
}
try:
response = requests.post(url, json=payload, headers=headers)
response.raise_for_status()
logger.info(f"Document update response code: {response.status_code}")
except requests.exceptions.RequestException as e:
logger.error(f"Error updating document: {e}")
:param client: GetOutlineClient instance.
:param text: New content for the document.
:return: Result of the update_document method.
"""
return client.update_document(text)

@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=60, max=120), retry=retry_if_exception_type(requests.exceptions.RequestException))
def retry_generate_markdown_for_org(client, github_org_name):
"""
Retry generating markdown content for a GitHub organization.
:param client: GitHubClient instance.
:param github_org_name: Name of the GitHub organization.
:return: Markdown content as a string.
"""
return client.generate_markdown_for_org(github_org_name)

def main():
logger.info("Starting.")
organizations = get_organizations()
"""
Main function to update GitHub organizations documentation in Outline.
"""
logger.info("Starting GitHub Doc Updates.")
GetOutlineClient = glueops.getoutline.GetOutlineClient(GETOUTLINE_API_URL, GETOUTLINE_DOCUMENT_ID, GETOUTLINE_API_TOKEN)
github_client = GitHubClient(GITHUB_TOKEN, GITHUB_API_URL)
organizations = github_client.get_organizations()
if organizations:
markdown = generate_markdown(organizations)
update_document(markdown)
logger.info(f"Updating document letting folks know we are updating the list of organizations.")
retry_update_document(GetOutlineClient, "# UPDATING..... \n\n # check back shortly.....\n\n\n")
parent_id = GetOutlineClient.get_document_uuid()
children = GetOutlineClient.get_children_documents_to_delete(parent_id)
for id in children:
GetOutlineClient.delete_document(id)
for org in organizations:
org_specific_markdown_content = retry_generate_markdown_for_org(github_client, org["login"])
retry_create_document(GetOutlineClient, parent_id, org["login"], org_specific_markdown_content)
markdown = GitHubClient.generate_markdown(organizations)
retry_update_document(GetOutlineClient, markdown)

logger.info("Finished GitHub Doc Updates.")
else:
logger.warning("No organizations found.")
logger.info("Finished.")


if __name__ == "__main__":
main()
main()
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
requests==2.32.3
glueops-helpers @ https://github.com/GlueOps/python-glueops-helpers-library/archive/refs/tags/v0.4.1.zip
glueops-helpers @ https://github.com/GlueOps/python-glueops-helpers-library/archive/refs/tags/v0.6.0.zip
tenacity==9.0.0

0 comments on commit a19230a

Please sign in to comment.