-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add simple GH actions run (not airflow)
- Loading branch information
1 parent
9b760db
commit 98a0d22
Showing
3 changed files
with
126 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# Github action to run test.py every week ona monday at 8 am GMT | ||
name: Run dbgapmonitor | ||
|
||
on: | ||
schedule: | ||
- cron: '0 8 * * 1' | ||
workflow_dispatch: | ||
|
||
jobs: | ||
test: | ||
runs-on: ubuntu-latest | ||
env: | ||
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} | ||
|
||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v2 | ||
|
||
- name: Set up Python | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: 3.8 | ||
|
||
- name: Install dependencies | ||
run: | | ||
python -m pip install --upgrade pip | ||
pip install -r requirements.txt | ||
- name: Run dbgapmonitor.py | ||
run: python dbgapmonitor.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
import pandas as pd | ||
import polars as pl | ||
from datetime import datetime, timedelta | ||
import io | ||
import requests | ||
import json | ||
import os | ||
|
||
# Download the tab-separated text file | ||
url = "https://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/GetAuthorizedRequestDownload.cgi?study_id=phs002371.v5.p1" | ||
response = requests.get(url) | ||
|
||
|
||
# Read the CSV file without header and with arbitrary column names | ||
df = ( | ||
pl.read_csv( | ||
# "dbGaPAuthorizedRequests.phs002371.v1.p1.tab-delimited.txt", | ||
io.StringIO(response.text), | ||
separator="\t", | ||
truncate_ragged_lines=True, | ||
try_parse_dates=True, | ||
) | ||
.rename({"Cloud Service AdministratorData stewardRequestor": "Requestor"}) | ||
.with_columns(pl.col("Date of approval").str.to_date("%b%d, %Y")) | ||
.sort("Date of approval", descending=True) | ||
) | ||
|
||
# Strip extra whitespace from the columns | ||
df = df.with_columns( | ||
pl.col("Requestor").str.strip_chars(), | ||
pl.col("Affiliation").str.strip_chars(), | ||
pl.col("Project").str.strip_chars(), | ||
) | ||
|
||
|
||
# Filter for those approved in the last month | ||
# Get today's date | ||
today = datetime.today() | ||
|
||
# Calculate the date from 30 days ago | ||
last_month = today - timedelta(days=7) | ||
df_recent = df.filter(pl.col("Date of approval") > last_month) | ||
|
||
print(df_recent) | ||
|
||
|
||
def dataframe_to_slack_block_with_md_links(df): | ||
blocks = [ | ||
{ | ||
"type": "section", | ||
"text": { | ||
"type": "mrkdwn", | ||
"text": "*New dbGaP Authorized Requestors added in the last 7 days*", | ||
}, | ||
} | ||
] | ||
for index, row in df.iterrows(): | ||
line = f"{row['Requestor']} from {row['Affiliation']} {row['Request status']} on {row['Date of approval'].strftime('%a %d %B')}\n> {row['Project']}" | ||
block = {"type": "section", "text": {"type": "mrkdwn", "text": f"{line}"}} | ||
blocks.append(block) | ||
return {"blocks": blocks} | ||
|
||
|
||
def send_message_to_slack_blocks(webhook_url, blocks): | ||
headers = {"Content-Type": "application/json"} | ||
data = json.dumps(blocks) | ||
response = requests.post(webhook_url, headers=headers, data=data) | ||
if response.status_code != 200: | ||
raise ValueError( | ||
f"Request to slack returned an error {response.status_code}, the response is:\n{response.text}" | ||
) | ||
|
||
|
||
if df_recent.to_pandas().empty: | ||
# If no modified entities are found, prepare a simple message for Slack | ||
slack_message_blocks = { | ||
"blocks": [ | ||
{ | ||
"type": "section", | ||
"text": { | ||
"type": "mrkdwn", | ||
"text": "No new dbGaP Authorized Requestors added in the last 7 days", | ||
}, | ||
} | ||
] | ||
} | ||
else: | ||
# If there are modified entities, format the message as before | ||
slack_message_blocks = dataframe_to_slack_block_with_md_links(df_recent.to_pandas()) | ||
|
||
# Usage | ||
# Get the webhook URL from a env variable called SLACK_WEBHOOK_URL | ||
webhook_url = os.getenv("SLACK_WEBHOOK_URL") | ||
send_message_to_slack_blocks(webhook_url, slack_message_blocks) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
pandas | ||
polars |