Skip to content

Commit

Permalink
fix: Notion Notes Update
Browse files Browse the repository at this point in the history
  • Loading branch information
rainmanjam committed Aug 27, 2024
1 parent 0fe6878 commit e57cc13
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 40 deletions.
6 changes: 3 additions & 3 deletions docker.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ This guide will walk you through setting up and deploying the project using the
docker run -p 8080:8080 \
-e STORAGE_PATH=GCP \
-e API_KEY=<set-to-any-value> \
-e GCP_BUCKET_NAME='<name-of-gcp-storage></name-of-gcp-storage> \
-e GCP_BUCKET_NAME='<name-of-gcp-storage>' \
-e GCP_SA_CREDENTIALS=<contents-of-json-file-downloaded> \
no-code-architects-toolkit
```
Expand All @@ -109,8 +109,8 @@ This guide will walk you through setting up and deploying the project using the
docker run -p 8080:8080 \
-e STORAGE_PATH=DRIVE \
-e GDRIVE_USER='<email address>' \
-e GDRIVE_FOLDER_ID=<Folder-ID>
-e API_KEY=<set-to-any-value> \
-e GDRIVE_FOLDER_ID='<Folder-ID>'
-e API_KEY='<set-to-any-value>' \
-e GCP_SA_CREDENTIALS=<contents-of-json-file-downloaded> \
no-code-architects-toolkit
```
Expand Down
84 changes: 73 additions & 11 deletions routes/transcribe.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,82 @@
from flask import Blueprint, request, jsonify
from flask import Blueprint, request, jsonify, after_this_request
import uuid
import threading
import logging
from services.transcription import process_transcription
from services.authentication import authenticate
from services.webhook import send_webhook

transcribe_bp = Blueprint('transcribe', __name__)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

@transcribe_bp.route('/transcribe', methods=['POST'])
@authenticate
def transcribe_media():
def transcribe():
data = request.json
job_id = str(uuid.uuid4()) # Generate a job ID for tracking (optional)
print(f"Processing Job ID: {job_id}")

try:
# Call the transcription process directly
result = process_transcription(data['media_url'], data['output'])
return jsonify({"job_id": job_id, "result": result}), 200
except Exception as e:
return jsonify({"job_id": job_id, "error": str(e)}), 500
media_url = data.get('media_url')
output = data.get('output', 'transcript').lower()
webhook_url = data.get('webhook_url')
id = data.get('id')

logger.info(f"Received transcription request: media_url={media_url}, output={output}, webhook_url={webhook_url}, id={id}")

if not media_url:
logger.error("Missing media_url parameter in request")
return jsonify({"error": "Missing media_url parameter"}), 400

# Check if either webhook_url or id is provided without the other
if (webhook_url and not id) or (id and not webhook_url):
logger.warning("Either webhook_url or id is missing in the request")
return jsonify({"response": "transcript or srt text is missing"}), 200

job_id = str(uuid.uuid4())
logger.info(f"Generated job_id: {job_id}")

def process_and_notify(media_url, output, webhook_url, id, job_id):
try:
logger.info(f"Job {job_id}: Starting transcription process for {media_url}")
result = process_transcription(media_url, output)
logger.info(f"Job {job_id}: Transcription process completed successfully")

if webhook_url:
logger.info(f"Job {job_id}: Sending success webhook to {webhook_url}")
send_webhook(webhook_url, {
"endpoint": "/transcribe",
"id": id,
"response": result,
"code": 200,
"message": "success"
})
except Exception as e:
logger.error(f"Job {job_id}: Error during transcription - {e}")
if webhook_url:
logger.info(f"Job {job_id}: Sending failure webhook to {webhook_url}")
send_webhook(webhook_url, {
"endpoint": "/transcribe",
"id": id,
"response": None,
"code": 500,
"message": str(e)
})

@after_this_request
def start_background_processing(response):
logger.info(f"Job {job_id}: Starting background processing thread")
thread = threading.Thread(target=process_and_notify, args=(media_url, output, webhook_url, id, job_id))
thread.start()
return response

# If webhook_url and id are provided, return 202 Accepted
if webhook_url and id:
logger.info(f"Job {job_id}: Returning 202 Accepted response and processing in background")
return jsonify({"message": "processing"}), 202
else:
try:
logger.info(f"Job {job_id}: No webhook provided, processing synchronously")
result = process_transcription(media_url, output)
logger.info(f"Job {job_id}: Returning transcription result")
return jsonify({"response": result}), 200
except Exception as e:
logger.error(f"Job {job_id}: Error during synchronous transcription - {e}")
return jsonify({"message": str(e)}), 500
35 changes: 9 additions & 26 deletions services/whisper_transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,45 +6,28 @@
from services.webhook import send_webhook
from services.file_management import download_file, STORAGE_PATH

def process_transcription(media_url, output_type, webhook_url):
def process_transcription(media_url, output_type):
try:
# Generate a unique job ID using UUID
job_id = str(uuid.uuid4())
input_filename = download_file(media_url, os.path.join(STORAGE_PATH, f"{job_id}_input"))

input_filename = download_file(media_url, os.path.join(STORAGE_PATH, 'input_media'))

model = whisper.load_model("base")
result = model.transcribe(input_filename)

os.remove(input_filename) # Clean up the input file after transcription

if output_type == 'transcript':
output = result['text']
else: # SRT format
elif output_type == 'srt':
srt_subtitles = []
for i, segment in enumerate(result['segments'], start=1):
start = timedelta(seconds=segment['start'])
end = timedelta(seconds=segment['end'])
text = segment['text'].strip()
srt_subtitles.append(srt.Subtitle(i, start, end, text))
output = srt.compose(srt_subtitles)
else:
raise ValueError("Invalid output type. Must be 'transcript' or 'srt'.")

if webhook_url:
send_webhook(webhook_url, {
"endpoint": "/transcribe",
"job_id": job_id,
"response": output,
"code": 200,
"message": "success"
})

return job_id # Return the generated job ID
os.remove(input_filename)
return output
except Exception as e:
if webhook_url:
send_webhook(webhook_url, {
"endpoint": "/transcribe",
"job_id": None,
"response": None,
"code": 500,
"message": str(e)
})
print(f"Transcription failed: {str(e)}")
raise

0 comments on commit e57cc13

Please sign in to comment.