-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
🚀 [Update] Web Flask Script and Templates
- Loading branch information
Showing
5 changed files
with
257 additions
and
160 deletions.
There are no files selected for viewing
129 changes: 78 additions & 51 deletions
129
src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/app.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,69 +1,96 @@ | ||
import webbrowser | ||
import os | ||
import sys | ||
import argparse | ||
from os.path import join, dirname | ||
from fastapi import FastAPI, Request | ||
from fastapi.staticfiles import StaticFiles | ||
from fastapi.responses import HTMLResponse | ||
from fastapi.templating import Jinja2Templates | ||
|
||
from flask import Flask, request, jsonify, send_from_directory | ||
from werkzeug.utils import secure_filename | ||
|
||
# ASR engine path | ||
sys.path.append(join(dirname(__file__), 'Automatic_Speech_Recognition')) | ||
from engine import SpeechRecognitionEngine | ||
from neuralnet.dataset import get_featurizer | ||
from decoder import SpeechRecognitionEngine | ||
from engine import Recorder | ||
|
||
app = FastAPI() | ||
app = Flask(__name__) | ||
|
||
# Serve static files | ||
app.mount("/static", StaticFiles(directory="static"), name="static") | ||
# Configuration | ||
UPLOAD_FOLDER = 'uploads' | ||
STATIC_FOLDER = 'static/assets' | ||
|
||
# Setup Jinja2 templates | ||
templates = Jinja2Templates(directory="templates") | ||
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER | ||
|
||
# Global variable for ASR engine | ||
global asr_engine | ||
# Ensure upload folder exists | ||
os.makedirs(UPLOAD_FOLDER, exist_ok=True) | ||
|
||
@app.get("/", response_class=HTMLResponse) | ||
async def index(request: Request): | ||
return templates.TemplateResponse("index.html", {"request": request}) | ||
# Initialize shared objects at startup | ||
recorder = Recorder() # Initialize recorder | ||
featurizer = get_featurizer(16000) # Initialize featurizer | ||
|
||
@app.get("/start_asr") | ||
async def start(): | ||
action = DemoAction() | ||
asr_engine.run(action) | ||
return {"message": "Speech recognition started successfully!"} | ||
asr_engine = None # Initialize to None, to avoid issues during startup | ||
|
||
@app.get("/get_audio") | ||
async def get_audio(): | ||
with open('transcript.txt', 'r') as f: | ||
transcript = f.read() | ||
return {"transcript": transcript} | ||
# Serve static files (index.html) | ||
@app.route("/", methods=["GET"]) | ||
def index(): | ||
return send_from_directory(STATIC_FOLDER, "index.html") | ||
|
||
|
||
@app.route("/transcribe/", methods=["POST"]) | ||
def transcribe_audio(): | ||
""" | ||
Transcribe an uploaded audio file using the preloaded ASR engine. | ||
""" | ||
try: | ||
if asr_engine is None: | ||
return jsonify({"error": "ASR Engine is not initialized."}), 500 | ||
|
||
# Check if file is in request | ||
if "file" not in request.files: | ||
return jsonify({"error": "No file provided"}), 400 | ||
|
||
file = request.files["file"] | ||
if file.filename == "": | ||
return jsonify({"error": "No file selected"}), 400 | ||
|
||
# Secure filename and save file | ||
filename = secure_filename(file.filename) | ||
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) | ||
file.save(file_path) | ||
|
||
print(f"File saved: {file_path}") | ||
|
||
recorded_audio = recorder.record() | ||
recorder.save(recorded_audio, "audio_temp.wav") | ||
print("\nAudio recorded") | ||
|
||
# Use the preloaded ASR Engine to transcribe | ||
transcript = asr_engine.transcribe(asr_engine.model, featurizer, "audio_temp.wav") | ||
|
||
print("\nTranscription:") | ||
print(transcript) | ||
|
||
return jsonify({"transcription": transcript}) | ||
|
||
except Exception as e: | ||
return jsonify({"error": f"Internal server error: {e}"}), 500 | ||
|
||
|
||
def main(args): | ||
global asr_engine | ||
print("Loading Speech Recognition Engine into cache...") | ||
try: | ||
asr_engine = SpeechRecognitionEngine(args.model_file, args.token_path) | ||
print("ASR Engine loaded successfully.") | ||
except Exception as e: | ||
print(f"Error loading ASR Engine: {e}") | ||
asr_engine = None | ||
|
||
class DemoAction: | ||
def __init__(self): | ||
self.asr_results = "" | ||
self.current_beam = "" | ||
|
||
def __call__(self, x): | ||
results, current_context_length = x | ||
self.current_beam = results | ||
transcript = " ".join(self.asr_results.split() + results.split()) | ||
self.save_transcript(transcript) | ||
if current_context_length > 10: | ||
self.asr_results = transcript | ||
|
||
def save_transcript(self, transcript): | ||
with open("transcript.txt", 'w+') as f: | ||
print(transcript) | ||
f.write(transcript) | ||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser(description="Demo of Automatic Speech Recognition") | ||
parser.add_argument("--model_file", type=str, required=True, help="Path to the model file") | ||
parser.add_argument("--kenlm_file", type=str, default=None, help="Path to the KenLM file") | ||
parser = argparse.ArgumentParser(description="ASR Demo: Record and Transcribe Audio") | ||
parser.add_argument('--model_file', type=str, required=True, help='Path to the optimized ASR model.') | ||
parser.add_argument('--token_path', type=str, default="assets/tokens.txt", help='Path to the tokens file.') | ||
args = parser.parse_args() | ||
|
||
asr_engine = SpeechRecognitionEngine(args.model_file, args.kenlm_file) | ||
webbrowser.open_new('http://127.0.0.1:3000/') | ||
main(args) | ||
|
||
import uvicorn | ||
uvicorn.run(app, host="127.0.0.1", port=3000) | ||
app.run(host="127.0.0.1", port=8080, debug=True) |
20 changes: 20 additions & 0 deletions
20
src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/static/assets/index.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
<head> | ||
<meta charset="UTF-8"> | ||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | ||
<title>Audio Transcription</title> | ||
<link rel="stylesheet" href="/static/styles.css"> | ||
</head> | ||
<body> | ||
|
||
<button id="recordBtn" onclick="init()">Click</button> | ||
|
||
<main></main> | ||
|
||
<div id="transcript"> | ||
<p id="result"></p> | ||
</div> | ||
<script src="/static/script.js"></script> | ||
</body> | ||
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.