Skip to content

Commit

Permalink
🚀 [Update] Web Flask Script and Templates
Browse files Browse the repository at this point in the history
  • Loading branch information
LuluW8071 committed Dec 7, 2024
1 parent cee5f28 commit e3c7cef
Show file tree
Hide file tree
Showing 5 changed files with 257 additions and 160 deletions.
129 changes: 78 additions & 51 deletions src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/app.py
Original file line number Diff line number Diff line change
@@ -1,69 +1,96 @@
import webbrowser
import os
import sys
import argparse
from os.path import join, dirname
from fastapi import FastAPI, Request
from fastapi.staticfiles import StaticFiles
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates

from flask import Flask, request, jsonify, send_from_directory
from werkzeug.utils import secure_filename

# ASR engine path
sys.path.append(join(dirname(__file__), 'Automatic_Speech_Recognition'))
from engine import SpeechRecognitionEngine
from neuralnet.dataset import get_featurizer
from decoder import SpeechRecognitionEngine
from engine import Recorder

app = FastAPI()
app = Flask(__name__)

# Serve static files
app.mount("/static", StaticFiles(directory="static"), name="static")
# Configuration
UPLOAD_FOLDER = 'uploads'
STATIC_FOLDER = 'static/assets'

# Setup Jinja2 templates
templates = Jinja2Templates(directory="templates")
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

# Global variable for ASR engine
global asr_engine
# Ensure upload folder exists
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

@app.get("/", response_class=HTMLResponse)
async def index(request: Request):
return templates.TemplateResponse("index.html", {"request": request})
# Initialize shared objects at startup
recorder = Recorder() # Initialize recorder
featurizer = get_featurizer(16000) # Initialize featurizer

@app.get("/start_asr")
async def start():
action = DemoAction()
asr_engine.run(action)
return {"message": "Speech recognition started successfully!"}
asr_engine = None # Initialize to None, to avoid issues during startup

@app.get("/get_audio")
async def get_audio():
with open('transcript.txt', 'r') as f:
transcript = f.read()
return {"transcript": transcript}
# Serve static files (index.html)
@app.route("/", methods=["GET"])
def index():
return send_from_directory(STATIC_FOLDER, "index.html")


@app.route("/transcribe/", methods=["POST"])
def transcribe_audio():
"""
Transcribe an uploaded audio file using the preloaded ASR engine.
"""
try:
if asr_engine is None:
return jsonify({"error": "ASR Engine is not initialized."}), 500

# Check if file is in request
if "file" not in request.files:
return jsonify({"error": "No file provided"}), 400

file = request.files["file"]
if file.filename == "":
return jsonify({"error": "No file selected"}), 400

# Secure filename and save file
filename = secure_filename(file.filename)
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
file.save(file_path)

print(f"File saved: {file_path}")

recorded_audio = recorder.record()
recorder.save(recorded_audio, "audio_temp.wav")
print("\nAudio recorded")

# Use the preloaded ASR Engine to transcribe
transcript = asr_engine.transcribe(asr_engine.model, featurizer, "audio_temp.wav")

print("\nTranscription:")
print(transcript)

return jsonify({"transcription": transcript})

except Exception as e:
return jsonify({"error": f"Internal server error: {e}"}), 500


def main(args):
global asr_engine
print("Loading Speech Recognition Engine into cache...")
try:
asr_engine = SpeechRecognitionEngine(args.model_file, args.token_path)
print("ASR Engine loaded successfully.")
except Exception as e:
print(f"Error loading ASR Engine: {e}")
asr_engine = None

class DemoAction:
def __init__(self):
self.asr_results = ""
self.current_beam = ""

def __call__(self, x):
results, current_context_length = x
self.current_beam = results
transcript = " ".join(self.asr_results.split() + results.split())
self.save_transcript(transcript)
if current_context_length > 10:
self.asr_results = transcript

def save_transcript(self, transcript):
with open("transcript.txt", 'w+') as f:
print(transcript)
f.write(transcript)

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Demo of Automatic Speech Recognition")
parser.add_argument("--model_file", type=str, required=True, help="Path to the model file")
parser.add_argument("--kenlm_file", type=str, default=None, help="Path to the KenLM file")
parser = argparse.ArgumentParser(description="ASR Demo: Record and Transcribe Audio")
parser.add_argument('--model_file', type=str, required=True, help='Path to the optimized ASR model.')
parser.add_argument('--token_path', type=str, default="assets/tokens.txt", help='Path to the tokens file.')
args = parser.parse_args()

asr_engine = SpeechRecognitionEngine(args.model_file, args.kenlm_file)
webbrowser.open_new('http://127.0.0.1:3000/')
main(args)

import uvicorn
uvicorn.run(app, host="127.0.0.1", port=3000)
app.run(host="127.0.0.1", port=8080, debug=True)
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Audio Transcription</title>
<link rel="stylesheet" href="/static/styles.css">
</head>
<body>

<button id="recordBtn" onclick="init()">Click</button>

<main></main>

<div id="transcript">
<p id="result"></p>
</div>
<script src="/static/script.js"></script>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
let isRecording = false;
let audioVisualizer;
let audioContext = new (window.AudioContext || window.webkitAudioContext)();
let mediaRecorder;
let audioData = [];
let recordBtn = document.getElementById("recordBtn");
let resultDisplay = document.getElementById("result");


recordBtn.addEventListener("click", toggleRecording);

class AudioVisualizer {
constructor(audioContext, processFrame, processError) {
this.audioContext = audioContext;
Expand All @@ -15,10 +26,9 @@ class AudioVisualizer {
connectStream(stream) {
this.analyser = this.audioContext.createAnalyser();
const source = this.audioContext.createMediaStreamSource(stream);
console.log(source)
source.connect(this.analyser);
this.analyser.smoothingTimeConstant = 0.5;
this.analyser.fftSize = 32;
this.analyser.fftSize = 256; // Increased fftSize for better frequency resolution

this.initRenderLoop(this.analyser);
}
Expand All @@ -40,60 +50,119 @@ class AudioVisualizer {
const visualMainElement = document.querySelector('main');
const visualValueCount = 16;
let visualElements;

const colors = ['#ff6347', '#4682b4', '#32cd32', '#ff1493', '#ffd700', '#8a2be2', '#ff4500', '#00fa9a', '#a52a2a', '#5f9ea0', '#f0e68c', '#dda0dd', '#0000ff', '#ff00ff', '#adff2f', '#c71585'];

const createDOMElements = () => {
let i;
for (i = 0; i < visualValueCount; ++i) {
for (let i = 0; i < visualValueCount; ++i) {
const elm = document.createElement('div');
visualMainElement.appendChild(elm);
}

// Assign a color from the predefined array
elm.style.background = colors[i % colors.length];
}
visualElements = document.querySelectorAll('main div');
};
createDOMElements();

const startTranscriptions = () => {
fetch("/start_asr").then(res => res.json()).then(data => console.log(data));
setInterval(() => {
fetch("/get_audio")
.then(res => res.json())
.then(data => {
let doc = document.getElementById("transcript");
if (data.transcript && data.transcript !== "") {
doc.innerText = data.transcript;
}
})
.catch(error => console.error('Error:', error));
}, 100);
};

const init = () => {
// Creating initial DOM elements
const audioContext = new AudioContext();
const initDOM = () => {
visualMainElement.innerHTML = '';
createDOMElements();
visualMainElement.innerHTML = '';
createDOMElements();
};
initDOM();

// Swapping values around for a better visual effect
const dataMap = { 0: 15, 1: 10, 2: 8, 3: 9, 4: 6, 5: 5, 6: 2, 7: 1, 8: 0, 9: 4, 10: 3, 11: 7, 12: 11, 13: 12, 14: 13, 15: 14 };
const processFrame = (data) => {
const values = Object.values(data);
let i;
for (i = 0; i < visualValueCount; ++i) {
const value = values[dataMap[i]] / 255;
const elmStyles = visualElements[i].style;
elmStyles.transform = `scaleY( ${value} )`;
elmStyles.opacity = Math.max(.25, value);
}
const values = Object.values(data);
let i;
for (i = 0; i < visualValueCount; ++i) {
const value = values[dataMap[i]] / 255;
const elmStyles = visualElements[i].style;
elmStyles.transform = `scaleY( ${value} )`;
elmStyles.opacity = Math.max(.25, value);
}
};

const processError = () => {
visualMainElement.classList.add('error');
visualMainElement.innerText = 'Allow access to your microphone in order to see this demo.';
visualMainElement.classList.add('error');
visualMainElement.innerText = 'Please allow access to your microphone';
}

const audioVisualizer = new AudioVisualizer(audioContext, processFrame, processError);

};

// Initialize the visualizer and the DOM once on page load
document.addEventListener('DOMContentLoaded', init);

// Toggle the recording state
function toggleRecording() {
if (isRecording) {
stopRecording();
} else {
startRecording();
}
}

function startRecording() {
recordBtn.textContent = "Record"; // Change button text to stop recording
isRecording = true;
audioData = []; // Clear previous audio data

// Access microphone and initialize both MediaRecorder and Visualizer
navigator.mediaDevices.getUserMedia({ audio: true })
.then((stream) => {
// Initialize MediaRecorder
mediaRecorder = new MediaRecorder(stream);
mediaRecorder.ondataavailable = (event) => {
audioData.push(event.data); // Collect audio chunks
};

mediaRecorder.onstop = () => {
// When recording stops, create the audio blob
const audioBlob = new Blob(audioData, { type: 'audio/wav' });
transcribeAudio(audioBlob); // Send for transcription
};

mediaRecorder.start();
console.log("Recording started...");

})
.catch((err) => {
console.error("Error accessing microphone: ", err);
alert("Could not access the microphone.");
isRecording = false;
recordBtn.textContent = "Start Recording"; // Reset button text
});
}

function stopRecording() {
if (!isRecording || !mediaRecorder) return;

isRecording = false;
mediaRecorder.stop(); // Stop recording
console.log("Recording stopped.");

recordBtn.textContent = "Clear"; // Reset button text
}

const a = new AudioVisualizer(audioContext, processFrame, processError);
function transcribeAudio(audioBlob) {
const formData = new FormData();
formData.append("file", audioBlob, "audio_temp.wav"); // Append the audio blob

startTranscriptions()
};
fetch("/transcribe/", {
method: "POST",
body: formData,
})
.then(response => response.json())
.then(data => {
resultDisplay.textContent = data.transcription;
})
.catch((err) => {
console.error("Error during transcription:", err);
resultDisplay.textContent = "Error during transcription.";
});
}
Loading

0 comments on commit e3c7cef

Please sign in to comment.