Skip to content

Commit

Permalink
Refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
adamjtaylor committed May 5, 2024
1 parent 7e4df90 commit a23e648
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 54 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/run.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ jobs:
runs-on: ubuntu-latest
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
DBGAP_STUDY_ID: phs002371.v5.p1

steps:
- name: Checkout code
Expand All @@ -19,7 +20,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.8
python-version: 3.11

- name: Install dependencies
run: |
Expand Down
163 changes: 110 additions & 53 deletions dbgapmonitor.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,70 @@
import pandas as pd
import polars as pl
#!/usr/bin/env python

"""
File: dbgapmonitor.py
Author: Adam J. Taylor
Date: 2024-05-05
Description: A Python script to monitor dbGaP Authorized Requestors and send a message to Slack.
"""

from datetime import datetime, timedelta
import io
import requests
import json
import os

# Download the tab-separated text file
url = "https://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/GetAuthorizedRequestDownload.cgi?study_id=phs002371.v5.p1"
response = requests.get(url)
import pandas as pd
import polars as pl
import requests


# Read the CSV file without header and with arbitrary column names
df = (
pl.read_csv(
# "dbGaPAuthorizedRequests.phs002371.v1.p1.tab-delimited.txt",
io.StringIO(response.text),
separator="\t",
truncate_ragged_lines=True,
try_parse_dates=True,
)
.rename({"Cloud Service AdministratorData stewardRequestor": "Requestor"})
.with_columns(pl.col("Date of approval").str.to_date("%b%d, %Y"))
.sort("Date of approval", descending=True)
)
def get_dbgap_requestors(phs_id):
"""
Retrieves the list of dbGaP Authorized Requestors for a given study ID.
# Strip extra whitespace from the columns
df = df.with_columns(
pl.col("Requestor").str.strip_chars(),
pl.col("Affiliation").str.strip_chars(),
pl.col("Project").str.strip_chars(),
)
Args:
phs_id (str): The study ID for which the requestors are to be retrieved.
Returns:
pandas.DataFrame: A DataFrame containing the dbGaP Authorized Requestors.
"""
# Download the tab-separated text file
url = f"https://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/GetAuthorizedRequestDownload.cgi?study_id={phs_id}"
response = requests.get(url)

# Filter for those approved in the last month
# Get today's date
today = datetime.today()
# Read the CSV file without header and with arbitrary column names
# Polars is used here as it simplified loading a rather non-standard TSV file
df = (
pl.read_csv(
io.StringIO(response.text),
separator="\t",
truncate_ragged_lines=True,
try_parse_dates=True,
)
.rename({"Cloud Service AdministratorData stewardRequestor": "Requestor"})
.with_columns(pl.col("Date of approval").str.to_date("%b%d, %Y"))
.sort("Date of approval", descending=True)
)

# Calculate the date from 30 days ago
last_month = today - timedelta(days=7)
df_recent = df.filter(pl.col("Date of approval") > last_month)
# Strip extra whitespace from the columns
df = df.with_columns(
pl.col("Requestor").str.strip_chars(),
pl.col("Affiliation").str.strip_chars(),
pl.col("Project").str.strip_chars(),
)

print(df_recent)
return df


def dataframe_to_slack_block_with_md_links(df):
"""
Converts a pandas DataFrame to a Slack message block with markdown links.
Args:
df (pandas.DataFrame): The DataFrame containing the data to be converted.
Returns:
dict: A dictionary representing the Slack message block with markdown links.
"""
blocks = [
{
"type": "section",
Expand All @@ -62,6 +82,19 @@ def dataframe_to_slack_block_with_md_links(df):


def send_message_to_slack_blocks(webhook_url, blocks):
"""
Sends a message to Slack using the provided webhook URL and blocks.
Args:
webhook_url (str): The URL of the Slack webhook.
blocks (list): The blocks to be sent as part of the message.
Raises:
ValueError: If the request to Slack returns an error.
Returns:
None
"""
headers = {"Content-Type": "application/json"}
data = json.dumps(blocks)
response = requests.post(webhook_url, headers=headers, data=data)
Expand All @@ -71,24 +104,48 @@ def send_message_to_slack_blocks(webhook_url, blocks):
)


if df_recent.to_pandas().empty:
# If no modified entities are found, prepare a simple message for Slack
slack_message_blocks = {
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "No new dbGaP Authorized Requestors added in the last 7 days",
},
}
]
}
else:
# If there are modified entities, format the message as before
slack_message_blocks = dataframe_to_slack_block_with_md_links(df_recent.to_pandas())

# Usage
# Get the webhook URL from a env variable called SLACK_WEBHOOK_URL
webhook_url = os.getenv("SLACK_WEBHOOK_URL")
send_message_to_slack_blocks(webhook_url, slack_message_blocks)
def main():

# Get the webhook URL from a env variable called SLACK_WEBHOOK_URL
webhook_url = os.getenv("SLACK_WEBHOOK_URL")

# Get the study ID from an environment variable
phs_id = os.getenv("DBGAP_STUDY_ID")

# Declare the number of days to look back
lookback_days = 7

# Get the dbGaP Authorized Requestors for the study ID
df = get_dbgap_requestors(phs_id)

# Filter for those approved in the n days
today = datetime.today()
start_date = today - timedelta(days=lookback_days)
df_recent = df.filter(pl.col("Date of approval") > start_date)

# Perpeare the slack message blocks
if df_recent.to_pandas().empty:
# If no modified entities are found, prepare a simple message for Slack
slack_message_blocks = {
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": f"No new dbGaP Authorized Requestors added in the last {lookback_days} days",
},
}
]
}
else:
# If there are modified entities, format the message as before
slack_message_blocks = dataframe_to_slack_block_with_md_links(
df_recent.to_pandas()
)

# Send the message to Slack
send_message_to_slack_blocks(webhook_url, slack_message_blocks)


if __name__ == "__main__":
main()

0 comments on commit a23e648

Please sign in to comment.