-
Notifications
You must be signed in to change notification settings - Fork 1
/
app.py
93 lines (67 loc) · 2.81 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import argparse
import os
from flask import Flask, request, jsonify, send_from_directory
from werkzeug.utils import secure_filename
from dataset import get_featurizer
from decoder import SpeechRecognitionEngine
from engine import Recorder
app = Flask(__name__)
# Configuration
UPLOAD_FOLDER = 'uploads'
STATIC_FOLDER = 'static'
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
# Ensure upload folder exists
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
# Initialize shared objects at startup
recorder = Recorder() # Initialize recorder
featurizer = get_featurizer(16000) # Initialize featurizer
asr_engine = None # Initialize to None, to avoid issues during startup
# Serve static files (index.html)
@app.route("/", methods=["GET"])
def index():
return send_from_directory(STATIC_FOLDER, "index.html")
@app.route("/transcribe/", methods=["POST"])
def transcribe_audio():
"""
Transcribe an uploaded audio file using the preloaded ASR engine.
"""
try:
if asr_engine is None:
return jsonify({"error": "ASR Engine is not initialized."}), 500
# Check if file is in request
if "file" not in request.files:
return jsonify({"error": "No file provided"}), 400
file = request.files["file"]
if file.filename == "":
return jsonify({"error": "No file selected"}), 400
# Secure filename and save file
filename = secure_filename(file.filename)
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
file.save(file_path)
print(f"File saved: {file_path}")
recorded_audio = recorder.record()
recorder.save(recorded_audio, "audio_temp.wav")
print("\nAudio recorded")
# Use the preloaded ASR Engine to transcribe
transcript = asr_engine.transcribe(asr_engine.model, featurizer, "audio_temp.wav")
print("\nTranscription:")
print(transcript)
return jsonify({"transcription": transcript})
except Exception as e:
return jsonify({"error": f"Internal server error: {e}"}), 500
def main(args):
global asr_engine
print("Loading Speech Recognition Engine into cache...")
try:
asr_engine = SpeechRecognitionEngine(args.model_file, args.token_path)
print("ASR Engine loaded successfully.")
except Exception as e:
print(f"Error loading ASR Engine: {e}")
asr_engine = None
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="ASR Demo: Record and Transcribe Audio")
parser.add_argument('--model_file', type=str, required=True, help='Path to the optimized ASR model.')
parser.add_argument('--token_path', type=str, default="assets/tokens.txt", help='Path to the tokens file.')
args = parser.parse_args()
main(args)
app.run(host="127.0.0.1", port=8080, debug=True)