Skip to content

Commit

Permalink
Add simple GH actions run (not airflow)
Browse files Browse the repository at this point in the history
  • Loading branch information
adamjtaylor committed May 5, 2024
1 parent 9b760db commit 98a0d22
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 0 deletions.
30 changes: 30 additions & 0 deletions .github/workflows/run.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Github action to run test.py every week ona monday at 8 am GMT
name: Run dbgapmonitor

on:
schedule:
- cron: '0 8 * * 1'
workflow_dispatch:

jobs:
test:
runs-on: ubuntu-latest
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}

steps:
- name: Checkout code
uses: actions/checkout@v2

- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.8

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Run dbgapmonitor.py
run: python dbgapmonitor.py
94 changes: 94 additions & 0 deletions dbgapmonitor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import pandas as pd
import polars as pl
from datetime import datetime, timedelta
import io
import requests
import json
import os

# Download the tab-separated text file
url = "https://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/GetAuthorizedRequestDownload.cgi?study_id=phs002371.v5.p1"
response = requests.get(url)


# Read the CSV file without header and with arbitrary column names
df = (
pl.read_csv(
# "dbGaPAuthorizedRequests.phs002371.v1.p1.tab-delimited.txt",
io.StringIO(response.text),
separator="\t",
truncate_ragged_lines=True,
try_parse_dates=True,
)
.rename({"Cloud Service AdministratorData stewardRequestor": "Requestor"})
.with_columns(pl.col("Date of approval").str.to_date("%b%d, %Y"))
.sort("Date of approval", descending=True)
)

# Strip extra whitespace from the columns
df = df.with_columns(
pl.col("Requestor").str.strip_chars(),
pl.col("Affiliation").str.strip_chars(),
pl.col("Project").str.strip_chars(),
)


# Filter for those approved in the last month
# Get today's date
today = datetime.today()

# Calculate the date from 30 days ago
last_month = today - timedelta(days=7)
df_recent = df.filter(pl.col("Date of approval") > last_month)

print(df_recent)


def dataframe_to_slack_block_with_md_links(df):
blocks = [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "*New dbGaP Authorized Requestors added in the last 7 days*",
},
}
]
for index, row in df.iterrows():
line = f"{row['Requestor']} from {row['Affiliation']} {row['Request status']} on {row['Date of approval'].strftime('%a %d %B')}\n> {row['Project']}"
block = {"type": "section", "text": {"type": "mrkdwn", "text": f"{line}"}}
blocks.append(block)
return {"blocks": blocks}


def send_message_to_slack_blocks(webhook_url, blocks):
headers = {"Content-Type": "application/json"}
data = json.dumps(blocks)
response = requests.post(webhook_url, headers=headers, data=data)
if response.status_code != 200:
raise ValueError(
f"Request to slack returned an error {response.status_code}, the response is:\n{response.text}"
)


if df_recent.to_pandas().empty:
# If no modified entities are found, prepare a simple message for Slack
slack_message_blocks = {
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "No new dbGaP Authorized Requestors added in the last 7 days",
},
}
]
}
else:
# If there are modified entities, format the message as before
slack_message_blocks = dataframe_to_slack_block_with_md_links(df_recent.to_pandas())

# Usage
# Get the webhook URL from a env variable called SLACK_WEBHOOK_URL
webhook_url = os.getenv("SLACK_WEBHOOK_URL")
send_message_to_slack_blocks(webhook_url, slack_message_blocks)
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pandas
polars

0 comments on commit 98a0d22

Please sign in to comment.