Skip to content

Commit

Permalink
Use zip in backup (#268)
Browse files Browse the repository at this point in the history
* Refactor auto backup.

* Backup to .zip file

* Backup to .zip file

* Backup to .zip file

* Look for any .json file in the .zip file

* Add github action to check if the backup file was created.

* Create github action to check backup.

* Test github action.

* Test github action.

* Test github action.

* Test github action.

* Test github action.

* Test github action.

* Test github action.

* Test github action.

* Test github action.

* Test github action.

* Test github action.

* Add logs.

* Add backup flag validation.

* Import no_backup var.

* Import no_backup var from config file.

* Enable tmate for debugging.

* Force error.

* Force error.

* Refactor.

* Remove tmate.

* Remove no backup variable from config.

* Improve messages.

* Improve messages.

* Improve messages.

* Check backup file size.

* Update slack channel.
  • Loading branch information
everaldorodrigo authored Oct 3, 2024
1 parent eb9021e commit 01e5b56
Show file tree
Hide file tree
Showing 3 changed files with 213 additions and 23 deletions.
96 changes: 96 additions & 0 deletions .github/scripts/check_backup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""
This script checks if a backup file for the current date exists in a specified S3 bucket.
If the backup file does not exist, a notification is sent to a Slack channel.
Expected file format in the S3 bucket:
- The file should be in the folder 'db_backup/' with the following naming pattern:
'smartapi_YYYYMMDD.zip', where YYYYMMDD corresponds to the current date.
Required Environment Variables:
- AWS_ACCESS_KEY_ID: The AWS access key ID to read the AWS s3 bucket.
- AWS_SECRET_ACCESS_KEY: The AWS secret access key to read the AWS s3 bucket.
- BACKUP_BUCKET_NAME: The name of the AWS S3 bucket where backups are stored.
- S3_FOLDER: The folder path within the S3 bucket where backups are stored (e.g., 'db_backup/').
- AWS_REGION: The AWS region where the S3 bucket is located.
- SLACK_CHANNEL: The Slack channel where notifications should be sent (e.g., '#observability-test').
- SLACK_WEBHOOK_URL: The Slack Webhook URL used to send the notification.
Functionality:
1. The script uses the AWS SDK (boto3) to check for the existence of the backup file in the specified S3 bucket.
2. If the file is found, it logs that no action is needed.
3. If the file is not found, it sends a notification to the configured Slack channel.
Dependencies:
- boto3: For interacting with AWS S3.
- requests: For sending HTTP POST requests to Slack.
"""

import boto3
import botocore
import os
import requests

from datetime import datetime


def send_slack_notification(message):

print(f" └─ {message}")

# Create the payload for Slack
slack_data = {
"channel": os.getenv("SLACK_CHANNEL"),
"username": "SmartAPI",
"icon_emoji": ":thumbsdown:",
"text": message,
}

try:
print(" └─ Sending Slack notification.")
response = requests.post(os.getenv("SLACK_WEBHOOK_URL"), json=slack_data, timeout=10)
if response.status_code == 200:
print(" └─ Slack notification sent successfully.")
else:
print(f" └─ Failed to send message to Slack: {response.status_code}, {response.text}")
except requests.exceptions.Timeout as e:
print(" └─ Request timed out to Slack WebHook URL.")
raise e
except requests.exceptions.RequestException as e:
print(f" └─ Failed to send Slack notification. Error: {str(e)}")
raise e


def check_backup_file():

# Create the expected file name
today_date = datetime.today().strftime("%Y%m%d")
expected_file = f"{os.getenv('S3_FOLDER')}smartapi_{today_date}.zip"

# Create the S3 client
s3_client = boto3.client("s3", region_name=os.getenv("AWS_REGION"))

# Try to fetch the file metadata
try:
response = s3_client.head_object(Bucket=os.getenv("BACKUP_BUCKET_NAME"), Key=expected_file)
print(f" └─ Backup file {expected_file} exists!")

# Get the file size in bytes
file_size = response['ContentLength']

# Check if the file is larger than 1MB
if file_size > 1048576: # 1MB in bytes
print(f" └─ Backup file is larger than 1MB! Size: {file_size} bytes.")
print(" └─ Nothing to do!")
else:
message = f":alert: The backup file {expected_file} is smaller than 1MB!"
send_slack_notification(message)

except botocore.exceptions.ClientError as e:
print(e)
message = f":alert: The backup file {expected_file} was NOT created today!"
send_slack_notification(message)


if __name__ == "__main__":
check_backup_file()
35 changes: 35 additions & 0 deletions .github/workflows/check_backup.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Check S3 Backup and Notify Slack

on:
workflow_dispatch: # Allows manual trigger from GitHub Actions UI
schedule:
- cron: '0 13 * * *' # 5:00 AM PST (UTC-8)

jobs:
check-backup:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v2

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'

- name: Install boto3 (AWS SDK for Python)
run: |
python -m pip install --upgrade pip
pip install boto3 requests
- name: Check if backup exists in S3
run: python .github/scripts/check_backup.py
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.AWS_REGION }}
BACKUP_BUCKET_NAME: "${{ secrets.BACKUP_BUCKET_NAME }}"
S3_FOLDER: "db_backup/"
SLACK_CHANNEL: "#ncats-translator"
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
105 changes: 82 additions & 23 deletions src/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
import logging
import random
import time
import zipfile
import io
from datetime import datetime

import boto3
Expand All @@ -37,20 +39,48 @@
logging.basicConfig(level="INFO")


def _default_filename():
return "smartapi_" + datetime.today().strftime("%Y%m%d") + ".json"
def _default_filename(extension=".json"):
return "smartapi_" + datetime.today().strftime("%Y%m%d") + extension


def save_to_file(mapping, filename=None):
filename = filename or _default_filename()
with open(filename, "w") as file:
json.dump(mapping, file, indent=2)


def save_to_s3(mapping, filename=None, bucket="smartapi"):
filename = filename or _default_filename()
def save_to_file(mapping, filename=None, format="zip"):
"""
Save data to a file in either JSON or ZIP format.
:param mapping: Data to save
:param filename: File name
:param format: File format, either 'json' or 'zip'
"""
if format == "zip":
filename = filename or _default_filename(".zip")
with zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) as zfile:
json_data = json.dumps(mapping, indent=2)
zfile.writestr(filename.replace(".zip", ".json"), json_data)
else:
filename = filename or _default_filename(".json")
with open(filename, "w") as file:
json.dump(mapping, file, indent=2)


def save_to_s3(data, filename=None, bucket="smartapi", format="zip"):
"""
Save data to S3 in either JSON or ZIP format.
:param data: Data to save
:param filename: File name
:param bucket: S3 bucket name
:param format: File format, either 'json' or 'zip'
"""
filename = filename or _default_filename(f".{format}")
s3 = boto3.resource("s3")
s3.Bucket(bucket).put_object(Key="db_backup/{}".format(filename), Body=json.dumps(mapping, indent=2))

if format == "zip":
with zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) as zfile:
json_data = json.dumps(data, indent=2)
zfile.writestr(filename.replace(".zip", ".json"), json_data)
logging.info(f"Uploading {filename} to AWS S3")
s3.Bucket(bucket).upload_file(Filename=filename, Key=f"db_backup/{filename}")
else:
logging.info(f"Uploading {filename} to AWS S3")
s3.Bucket(bucket).put_object(Key=f"db_backup/{filename}", Body=json.dumps(data, indent=2))


def _backup():
Expand All @@ -69,14 +99,14 @@ def _backup():
return smartapis


def backup_to_file(filename=None):
def backup_to_file(filename=None, format="zip"):
smartapis = _backup()
save_to_file(smartapis, filename)
save_to_file(smartapis, filename, format)


def backup_to_s3(filename=None, bucket="smartapi"):
def backup_to_s3(filename=None, bucket="smartapi", format="zip"):
smartapis = _backup()
save_to_s3(smartapis, filename, bucket)
save_to_s3(smartapis, filename, bucket, format)


def _restore(smartapis):
Expand All @@ -99,7 +129,7 @@ def restore_from_s3(filename=None, bucket="smartapi"):
s3 = boto3.client("s3")

if not filename:
objects = s3.list_objects_v2(Bucket="smartapi", Prefix="db_backup")["Contents"]
objects = s3.list_objects_v2(Bucket=bucket, Prefix="db_backup")["Contents"]
filename = max(objects, key=lambda x: x["LastModified"])["Key"]

if not filename.startswith("db_backup/"):
Expand All @@ -108,14 +138,42 @@ def restore_from_s3(filename=None, bucket="smartapi"):
logging.info("GET s3://%s/%s", bucket, filename)

obj = s3.get_object(Bucket=bucket, Key=filename)
smartapis = json.loads(obj["Body"].read())

filename = filename.replace("db_backup/", "")

if filename.endswith(".zip"):
file_content = obj["Body"].read()
with zipfile.ZipFile(io.BytesIO(file_content)) as zfile:
# Search for a JSON file inside the ZIP
json_file = next((f for f in zfile.namelist() if f.endswith(".json")), None)
if not json_file:
raise ValueError("No JSON file found inside the ZIP archive.")
with zfile.open(json_file) as json_data:
smartapis = json.load(json_data)
elif filename.endswith(".json"):
smartapis = json.loads(obj["Body"].read())
else:
raise Exception("Unsupported backup file type!")

_restore(smartapis)


def restore_from_file(filename):
with open(filename) as file:
smartapis = json.load(file)
_restore(smartapis)
if filename.endswith(".zip"):
with zipfile.ZipFile(filename, 'r') as zfile:
# Search for a JSON file inside the ZIP
json_file = next((f for f in zfile.namelist() if f.endswith(".json")), None)
if not json_file:
raise ValueError("No JSON file found inside the ZIP archive.")
with zfile.open(json_file) as json_data:
smartapis = json.load(json_data)
elif filename.endswith(".json"):
with open(filename) as file:
smartapis = json.load(file)
else:
raise Exception("Unsupported backup file type!")

_restore(smartapis)


def refresh_document():
Expand Down Expand Up @@ -226,7 +284,7 @@ def refresh_has_metakg():
_lock = FileLock(".lock", timeout=0)


def routine(no_backup=False):
def routine(no_backup=False, format="zip"):
logger = logging.getLogger("routine")

# Add jitter: random delay between 100 and 500 milliseconds (adjust range as needed)
Expand All @@ -244,8 +302,8 @@ def routine(no_backup=False):
if lock_acquired:
logger.info("Schedule lock acquired successfully.")
if not no_backup:
logger.info("backup_to_s3()")
backup_to_s3()
logger.info(f"backup_to_s3(format={format})")
backup_to_s3(format=format)
logger.info("refresh_document()")
refresh_document()
logger.info("check_uptime()")
Expand All @@ -262,6 +320,7 @@ def routine(no_backup=False):
logger.warning("Schedule lock acquired by another process. No need to run it in this process.")
except Exception as e:
logger.error(f"An error occurred during the routine: {e}")
logger.error("Stack trace:", exc_info=True)
finally:
if lock_acquired:
_lock.release()
Expand Down

0 comments on commit 01e5b56

Please sign in to comment.