forked from bklieger-groq/gradio-groq-basics
-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
216 lines (185 loc) · 6.9 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
import gradio as gr
import groq
import os
import io
import numpy as np
import soundfile as sf
from PIL import Image
import base64
# Get the GROQ_API_KEY from environment variables
api_key = os.environ.get("GROQ_API_KEY")
if not api_key:
raise ValueError("Please set the GROQ_API_KEY environment variable.")
# Initialize the Groq client
client = groq.Client(api_key=api_key)
def transcribe_audio(audio):
if audio is None:
return "No audio provided.", ""
sr, y = audio
# Convert to mono if stereo
if y.ndim > 1:
y = y.mean(axis=1)
# Normalize audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
# Write audio to buffer
buffer = io.BytesIO()
sf.write(buffer, y, sr, format='wav')
buffer.seek(0)
try:
# Use Distil-Whisper English model for transcription
completion = client.audio.transcriptions.create(
model="distil-whisper-large-v3-en",
file=("audio.wav", buffer),
response_format="text"
)
transcription = completion
except Exception as e:
transcription = f"Error in transcription: {str(e)}"
response = generate_response(transcription)
return transcription, response
def generate_response(transcription):
if not transcription or transcription.startswith("Error"):
return "No valid transcription available. Please try speaking again."
try:
# Use Llama 3.1 70B model for text generation
completion = client.chat.completions.create(
model="llama-3.1-70b-versatile",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": transcription}
],
)
return completion.choices[0].message.content
except Exception as e:
return f"Error in response generation: {str(e)}"
def analyze_image(image):
if image is None:
return "No image uploaded.", None
# Convert numpy array to PIL Image
image_pil = Image.fromarray(image.astype('uint8'), 'RGB')
# Convert PIL image to base64
buffered = io.BytesIO()
image_pil.save(buffered, format="JPEG")
base64_image = base64.b64encode(buffered.getvalue()).decode('utf-8')
try:
# Use Llama 3.2 11B Vision model to analyze image
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this image in detail."},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}",
},
},
],
}
],
model="llama-3.2-11b-vision-preview",
)
description = chat_completion.choices[0].message.content
except Exception as e:
description = f"Error in image analysis: {str(e)}"
return description
def respond(message, chat_history):
if chat_history is None:
chat_history = []
# Prepare the message history for the API
messages = []
for user_msg, assistant_msg in chat_history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
try:
# Use Llama 3.1 70B model for generating assistant response
completion = client.chat.completions.create(
model="llama-3.1-70b-versatile",
messages=messages,
)
assistant_message = completion.choices[0].message.content
chat_history.append((message, assistant_message))
except Exception as e:
assistant_message = f"Error: {str(e)}"
chat_history.append((message, assistant_message))
return "", chat_history, chat_history # Return state as the third output
# Custom CSS for the Groq badge and color scheme
custom_css = """
.gradio-container {
background-color: #f5f5f5;
}
.gr-button-primary {
background-color: #f55036 !important;
border-color: #f55036 !important;
}
#groq-badge {
position: fixed;
bottom: 20px;
right: 20px;
z-index: 1000;
}
"""
with gr.Blocks(css=custom_css) as demo:
gr.Markdown("# 🎙️ Groq x Gradio Multi-Modal Llama-3.2 and Whisper")
with gr.Tab("Audio"):
gr.Markdown("## Speak to the AI")
with gr.Row():
audio_input = gr.Audio(type="numpy", label="Speak or Upload Audio")
with gr.Row():
transcription_output = gr.Textbox(label="Transcription")
response_output = gr.Textbox(label="AI Assistant Response")
process_button = gr.Button("Process", variant="primary")
process_button.click(
transcribe_audio,
inputs=audio_input,
outputs=[transcription_output, response_output]
)
with gr.Tab("Image"):
gr.Markdown("## Upload an Image for Analysis")
with gr.Row():
image_input = gr.Image(type="numpy", label="Upload Image")
with gr.Row():
image_description_output = gr.Textbox(label="Image Description")
analyze_button = gr.Button("Analyze Image", variant="primary")
analyze_button.click(
analyze_image,
inputs=image_input,
outputs=[image_description_output]
)
with gr.Tab("Chat"):
gr.Markdown("## Chat with the AI Assistant")
chatbot = gr.Chatbot()
state = gr.State([]) # Initialize the chat state
with gr.Row():
user_input = gr.Textbox(show_label=False, placeholder="Type your message here...", container=False)
send_button = gr.Button("Send", variant="primary")
send_button.click(
respond,
inputs=[user_input, state],
outputs=[user_input, chatbot, state],
)
# Add the Groq badge
gr.HTML("""
<div id="groq-badge">
<div style="color: #f55036; font-weight: bold;">POWERED BY GROQ</div>
</div>
""")
gr.Markdown("""
## How to use this app:
### Audio Tab
1. Click on the microphone icon and speak your message or upload an audio file.
2. Click the "Process" button to transcribe your speech and generate a response from the AI assistant.
3. The transcription and AI assistant response will appear in the respective text boxes.
### Image Tab
1. Upload an image by clicking on the image upload area.
2. Click the "Analyze Image" button to get a detailed description of the image.
3. The uploaded image and its description will appear below.
### Chat Tab
1. Type your message in the text box at the bottom.
2. Click the "Send" button to interact with the AI assistant.
3. The conversation will appear in the chat interface.
""")
demo.launch()