🚀 [Update] Web Flask Script and Templates

LuluW8071 · Dec 7, 2024 · e3c7cef · e3c7cef
1 parent cee5f28
commit e3c7cef
Show file tree

Hide file tree

Showing 5 changed files with 257 additions and 160 deletions.
diff --git a/src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/app.py b/src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/app.py
@@ -1,69 +1,96 @@
-import webbrowser
+import os
 import sys
 import argparse
-from os.path import join, dirname
-from fastapi import FastAPI, Request
-from fastapi.staticfiles import StaticFiles
-from fastapi.responses import HTMLResponse
-from fastapi.templating import Jinja2Templates
+
+from flask import Flask, request, jsonify, send_from_directory
+from werkzeug.utils import secure_filename
 
 # ASR engine path
 sys.path.append(join(dirname(__file__), 'Automatic_Speech_Recognition'))
-from engine import SpeechRecognitionEngine
+from neuralnet.dataset import get_featurizer
+from decoder import SpeechRecognitionEngine
+from engine import Recorder
 
-app = FastAPI()
+app = Flask(__name__)
 
-# Serve static files
-app.mount("/static", StaticFiles(directory="static"), name="static")
+# Configuration
+UPLOAD_FOLDER = 'uploads'
+STATIC_FOLDER = 'static/assets'
 
-# Setup Jinja2 templates
-templates = Jinja2Templates(directory="templates")
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 
-# Global variable for ASR engine
-global asr_engine
+# Ensure upload folder exists
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 
-@app.get("/", response_class=HTMLResponse)
-async def index(request: Request):
-    return templates.TemplateResponse("index.html", {"request": request})
+# Initialize shared objects at startup
+recorder = Recorder()               # Initialize recorder
+featurizer = get_featurizer(16000)  # Initialize featurizer
 
-@app.get("/start_asr")
-async def start():
-    action = DemoAction()
-    asr_engine.run(action)
-    return {"message": "Speech recognition started successfully!"}
+asr_engine = None  # Initialize to None, to avoid issues during startup
 
-@app.get("/get_audio")
-async def get_audio():
-    with open('transcript.txt', 'r') as f:
-        transcript = f.read()
-    return {"transcript": transcript}
+# Serve static files (index.html)
+@app.route("/", methods=["GET"])
+def index():
+    return send_from_directory(STATIC_FOLDER, "index.html")
+
+
+@app.route("/transcribe/", methods=["POST"])
+def transcribe_audio():
+    """
+    Transcribe an uploaded audio file using the preloaded ASR engine.
+    """
+    try:
+        if asr_engine is None:
+            return jsonify({"error": "ASR Engine is not initialized."}), 500
+
+        # Check if file is in request
+        if "file" not in request.files:
+            return jsonify({"error": "No file provided"}), 400
+
+        file = request.files["file"]
+        if file.filename == "":
+            return jsonify({"error": "No file selected"}), 400
+
+        # Secure filename and save file
+        filename = secure_filename(file.filename)
+        file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+        file.save(file_path)
+
+        print(f"File saved: {file_path}")
+
+        recorded_audio = recorder.record()
+        recorder.save(recorded_audio, "audio_temp.wav")
+        print("\nAudio recorded")
+
+        # Use the preloaded ASR Engine to transcribe
+        transcript = asr_engine.transcribe(asr_engine.model, featurizer, "audio_temp.wav")
+
+        print("\nTranscription:")
+        print(transcript)
+
+        return jsonify({"transcription": transcript})
+
+    except Exception as e:
+        return jsonify({"error": f"Internal server error: {e}"}), 500
+
+
+def main(args):
+    global asr_engine
+    print("Loading Speech Recognition Engine into cache...")
+    try:
+        asr_engine = SpeechRecognitionEngine(args.model_file, args.token_path) 
+        print("ASR Engine loaded successfully.")
+    except Exception as e:
+        print(f"Error loading ASR Engine: {e}")
+        asr_engine = None
 
-class DemoAction:
-    def __init__(self):
-        self.asr_results = ""
-        self.current_beam = ""
-
-    def __call__(self, x):
-        results, current_context_length = x
-        self.current_beam = results
-        transcript = " ".join(self.asr_results.split() + results.split())
-        self.save_transcript(transcript)
-        if current_context_length > 10:
-            self.asr_results = transcript
-
-    def save_transcript(self, transcript):
-        with open("transcript.txt", 'w+') as f:
-            print(transcript)
-            f.write(transcript)
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Demo of Automatic Speech Recognition")
-    parser.add_argument("--model_file", type=str, required=True, help="Path to the model file")
-    parser.add_argument("--kenlm_file", type=str, default=None, help="Path to the KenLM file")
+    parser = argparse.ArgumentParser(description="ASR Demo: Record and Transcribe Audio")
+    parser.add_argument('--model_file', type=str, required=True, help='Path to the optimized ASR model.')
+    parser.add_argument('--token_path', type=str, default="assets/tokens.txt", help='Path to the tokens file.')
     args = parser.parse_args()
 
-    asr_engine = SpeechRecognitionEngine(args.model_file, args.kenlm_file)
-    webbrowser.open_new('http://127.0.0.1:3000/')
+    main(args)
 
-    import uvicorn
-    uvicorn.run(app, host="127.0.0.1", port=3000)
+    app.run(host="127.0.0.1", port=8080, debug=True)
diff --git a/src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/static/assets/index.html b/src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/static/assets/index.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Audio Transcription</title>
+    <link rel="stylesheet" href="/static/styles.css">
+</head>
+<body>
+
+    <button id="recordBtn" onclick="init()">Click</button>
+
+    <main></main>
+
+    <div id="transcript">
+        <p id="result"></p>
+    </div>
+    <script src="/static/script.js"></script>
+</body>
+</html>
diff --git a/src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/static/assets/script.js b/src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/static/assets/script.js
@@ -1,3 +1,14 @@
+let isRecording = false;
+let audioVisualizer;
+let audioContext = new (window.AudioContext || window.webkitAudioContext)();
+let mediaRecorder;
+let audioData = [];
+let recordBtn = document.getElementById("recordBtn");
+let resultDisplay = document.getElementById("result");
+
+
+recordBtn.addEventListener("click", toggleRecording);
+
 class AudioVisualizer {
     constructor(audioContext, processFrame, processError) {
         this.audioContext = audioContext;
@@ -15,10 +26,9 @@ class AudioVisualizer {
     connectStream(stream) {
         this.analyser = this.audioContext.createAnalyser();
         const source = this.audioContext.createMediaStreamSource(stream);
-        console.log(source)
         source.connect(this.analyser);
         this.analyser.smoothingTimeConstant = 0.5;
-        this.analyser.fftSize = 32;
+        this.analyser.fftSize = 256;  // Increased fftSize for better frequency resolution
 
         this.initRenderLoop(this.analyser);
     }
@@ -40,60 +50,119 @@ class AudioVisualizer {
 const visualMainElement = document.querySelector('main');
 const visualValueCount = 16;
 let visualElements;
+
+const colors = ['#ff6347', '#4682b4', '#32cd32', '#ff1493', '#ffd700', '#8a2be2', '#ff4500', '#00fa9a', '#a52a2a', '#5f9ea0', '#f0e68c', '#dda0dd', '#0000ff', '#ff00ff', '#adff2f', '#c71585'];
+
 const createDOMElements = () => {
-    let i;
-    for (i = 0; i < visualValueCount; ++i) {
+    for (let i = 0; i < visualValueCount; ++i) {
         const elm = document.createElement('div');
         visualMainElement.appendChild(elm);
-    }
 
+        // Assign a color from the predefined array
+        elm.style.background = colors[i % colors.length];
+    }
     visualElements = document.querySelectorAll('main div');
 };
-createDOMElements();
-
-const startTranscriptions = () => {
-    fetch("/start_asr").then(res => res.json()).then(data => console.log(data));
-    setInterval(() => {
-        fetch("/get_audio")
-            .then(res => res.json())
-            .then(data => {
-                let doc = document.getElementById("transcript");
-                if (data.transcript && data.transcript !== "") {
-                    doc.innerText = data.transcript;
-                }
-            })
-            .catch(error => console.error('Error:', error));
-    }, 100);
-};
 
 const init = () => {
     // Creating initial DOM elements
     const audioContext = new AudioContext();
     const initDOM = () => {
-        visualMainElement.innerHTML = '';
-        createDOMElements();
+      visualMainElement.innerHTML = '';
+      createDOMElements();
     };
     initDOM();
 
     // Swapping values around for a better visual effect
     const dataMap = { 0: 15, 1: 10, 2: 8, 3: 9, 4: 6, 5: 5, 6: 2, 7: 1, 8: 0, 9: 4, 10: 3, 11: 7, 12: 11, 13: 12, 14: 13, 15: 14 };
     const processFrame = (data) => {
-        const values = Object.values(data);
-        let i;
-        for (i = 0; i < visualValueCount; ++i) {
-            const value = values[dataMap[i]] / 255;
-            const elmStyles = visualElements[i].style;
-            elmStyles.transform = `scaleY( ${value} )`;
-            elmStyles.opacity = Math.max(.25, value);
-        }
+      const values = Object.values(data);
+      let i;
+      for (i = 0; i < visualValueCount; ++i) {
+        const value = values[dataMap[i]] / 255;
+        const elmStyles = visualElements[i].style;
+        elmStyles.transform = `scaleY( ${value} )`;
+        elmStyles.opacity = Math.max(.25, value);
+      }
     };
 
     const processError = () => {
-        visualMainElement.classList.add('error');
-        visualMainElement.innerText = 'Allow access to your microphone in order to see this demo.';
+      visualMainElement.classList.add('error');
+      visualMainElement.innerText = 'Please allow access to your microphone';
+    }
+
+    const audioVisualizer = new AudioVisualizer(audioContext, processFrame, processError);
+
+};
+
+// Initialize the visualizer and the DOM once on page load
+document.addEventListener('DOMContentLoaded', init);
+
+// Toggle the recording state
+function toggleRecording() {
+    if (isRecording) {
+        stopRecording();
+    } else {
+        startRecording();
     }
+}
+
+function startRecording() {
+    recordBtn.textContent = "Record"; // Change button text to stop recording
+    isRecording = true;
+    audioData = []; // Clear previous audio data
+
+    // Access microphone and initialize both MediaRecorder and Visualizer
+    navigator.mediaDevices.getUserMedia({ audio: true })
+        .then((stream) => {
+            // Initialize MediaRecorder
+            mediaRecorder = new MediaRecorder(stream);
+            mediaRecorder.ondataavailable = (event) => {
+                audioData.push(event.data); // Collect audio chunks
+            };
+
+            mediaRecorder.onstop = () => {
+                // When recording stops, create the audio blob
+                const audioBlob = new Blob(audioData, { type: 'audio/wav' });
+                transcribeAudio(audioBlob); // Send for transcription
+            };
+
+            mediaRecorder.start();
+            console.log("Recording started...");
+
+        })
+        .catch((err) => {
+            console.error("Error accessing microphone: ", err);
+            alert("Could not access the microphone.");
+            isRecording = false;
+            recordBtn.textContent = "Start Recording"; // Reset button text
+        });
+}
+
+function stopRecording() {
+    if (!isRecording || !mediaRecorder) return;
+
+    isRecording = false;
+    mediaRecorder.stop(); // Stop recording
+    console.log("Recording stopped.");
+
+    recordBtn.textContent = "Clear"; // Reset button text
+}
 
-    const a = new AudioVisualizer(audioContext, processFrame, processError);
+function transcribeAudio(audioBlob) {
+    const formData = new FormData();
+    formData.append("file", audioBlob, "audio_temp.wav"); // Append the audio blob
 
-    startTranscriptions()
-};
+    fetch("/transcribe/", {
+        method: "POST",
+        body: formData,
+    })
+        .then(response => response.json())
+        .then(data => {
+            resultDisplay.textContent = data.transcription;
+        })
+        .catch((err) => {
+            console.error("Error during transcription:", err);
+            resultDisplay.textContent = "Error during transcription.";
+        });
+}