Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
adamjtaylor committed Dec 14, 2023
0 parents commit 17f849f
Show file tree
Hide file tree
Showing 4 changed files with 185 additions and 0 deletions.
57 changes: 57 additions & 0 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: Build Docker image

on:
push:
branches:
- "main"
paths:
- "dockerfile"
- ".github/workflows/docker.yml"

env:
REGISTRY: ghcr.io

jobs:
docker:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
id-token: write

defaults:
run:
working-directory: "."

steps:
- name: Checkout GitHub Action
uses: actions/checkout@v3

- name: Login to GitHub Container Registry (GHCR)
uses: docker/login-action@v2
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract Docker metadata
id: metadata
uses: docker/metadata-action@v4
with:
images: ${{ env.REGISTRY }}/${{ github.repository }}
tags: |
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=semver,pattern={{major}}
type=ref,event=branch
type=sha
latest
- name: Build and push to GHCR
uses: docker/build-push-action@v4
with:
context: .
push: true
tags: ${{ steps.metadata.outputs.tags }}
labels: ${{ steps.metadata.outputs.labels }}
provenance: false
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
makefile
.secrets
develop.ipynb
14 changes: 14 additions & 0 deletions dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Use an official Python runtime as a parent image
FROM python:3.9-slim

# Set the working directory in the container
WORKDIR /usr/src/app

# Install any needed packages specified in requirements.txt
RUN pip install --no-cache-dir requests pandas synapseclient

# Copy the current directory contents into the container at /usr/src/app
COPY . .

# Make the script run when the container launches
CMD ["python", "./monitor.py"]
111 changes: 111 additions & 0 deletions monitor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import json
import requests
import synapseclient
from synapseclient import EntityViewSchema, EntityViewType, Synapse
syn = synapseclient.Synapse()

import sys

fileview = 'syn20446927'
webhook_url = 'https://hooks.slack.com/services/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'
fileview = sys.argv[1]
webhook_url = sys.argv[2]

syn.login()


def find_modified_entities_fileview(
syn: Synapse, syn_id: str, value: int = 1, unit: str = "day"
) -> list:
"""Finds entities scoped in a fileview modified in the past {value} {unit}
Args:
syn: Synapse connection
syn_id: Synapse Fileview Id
value: number of time units
unit: time unit
Returns:
List of synapse ids
"""
# Update the view
# _force_update_view(syn, view_id)

query = (
f"select id, projectId, parentId, createdBy, modifiedBy, Component from {syn_id} where "
f"modifiedOn > unix_timestamp(NOW() - INTERVAL {value} {unit})*1000"
)
results = syn.tableQuery(query)
resultsdf = results.asDataFrame()
return resultsdf


def enrich_count(df, syn):
"""
Enriches a DataFrame with user names, project names, and parent folder names from Synapse.
Args:
df (pd.DataFrame): DataFrame containing Synapse data with columns 'modifiedBy', 'projectId', and 'parentId'.
syn (synapseclient.Synapse): A logged-in Synapse client instance.
Returns:
pd.DataFrame: The enriched DataFrame.
"""
# Add columns for user, project name, and parent folder name
df['userName'] = ''
df['projectName'] = ''
df['parentFolderName'] = ''

for index, row in df.iterrows():
# Get user info
user = syn.getUserProfile(row['modifiedBy'])
df.at[index, 'userName'] = user['userName']

# Get project info
project = syn.get(row['projectId'], downloadFile=False)
df.at[index, 'projectName'] = project.name

# Get parent folder info
parent_folder = syn.get(row['parentId'], downloadFile=False)
df.at[index, 'parentFolderName'] = parent_folder.name

return df


def dataframe_to_slack_block_with_md_links(df):
base_synapse_url = "https://www.synapse.org/#!Synapse:"
blocks = [{"type": "section", "text": {"type": "mrkdwn", "text": "*Your daily update on HTAN activity on Synapse:*"}}]
for index, row in df.iterrows():
# Determine the correct pluralization
dataset_text = "dataset" if row['id'] == 1 else "datasets"

# Construct the Markdown URL for the parent folder
parent_folder_url = f"{base_synapse_url}{row['parentId']}"
parent_folder_link = f"<{parent_folder_url}|{row['parentFolderName']}>"

# Format the line with the Markdown link
line = f"{row['userName']} modified {row['id']} {dataset_text} in the {parent_folder_link} folder of the {row['projectName']} project."
block = {"type": "section", "text": {"type": "mrkdwn", "text": f"{line}"}}
blocks.append(block)
return {"blocks": blocks}


def send_message_to_slack_blocks(webhook_url, blocks):
headers = {'Content-Type': 'application/json'}
data = json.dumps(blocks)
response = requests.post(webhook_url, headers=headers, data=data)
if response.status_code != 200:
raise ValueError(f"Request to slack returned an error {response.status_code}, the response is:\n{response.text}")



count = find_modified_entities_fileview(syn, fileview).groupby(['modifiedBy','projectId','parentId']).count().reset_index()

enriched_data = enrich_count(count, syn)

slack_message_blocks = dataframe_to_slack_block_with_md_links(enriched_data)

# Usage
send_message_to_slack_blocks(webhook_url, slack_message_blocks)


0 comments on commit 17f849f

Please sign in to comment.