Skip to content

Commit

Permalink
use-whisper web (#26)
Browse files Browse the repository at this point in the history
* use-whisper web

* data for kannada asr

* init-indic-tts

* init-parler tts

* update libraries
  • Loading branch information
sachinsshetty authored Dec 4, 2024
1 parent db20c84 commit 3d7a43e
Show file tree
Hide file tree
Showing 8 changed files with 178 additions and 0 deletions.
19 changes: 19 additions & 0 deletions tutorials/indic/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
Indic - LLM

- Solutions for Indian languages

- tts
- https://huggingface.co/ai4bharat/indic-parler-tts

- text - llm
- sarvam-1

- datasets
-

- ASR for kannada
- https://huggingface.co/ai4bharat/indicconformer_stt_kn_hybrid_ctc_rnnt_large

- git clone https://github.com/AI4Bharat/NeMo.git && cd NeMo && git checkout nemo-v2 && bash reinstall.sh

- https://github.com/AI4Bharat/NeMo
15 changes: 15 additions & 0 deletions tutorials/indic/tts/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime

WORKDIR /app

RUN apt-get update && apt-get install -y \
ffmpeg \
&& rm -rf /var/lib/apt/lists/*

RUN pip install --no-cache-dir parler-tts transformers torch

COPY . .

EXPOSE 8000

CMD ["python", "main.py"]
41 changes: 41 additions & 0 deletions tutorials/indic/tts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
TTS -

- Download model from Huggingface
- huggingface-cli download ai4bharat/indic-parler-tts

- Run with Docker compose
- docker compose -f indic-tts-compose.yml up --detach parler-tts-server

- Test output
- kannada
- curl -s -H "content-type: application/json" localhost:8000/v1/audio/speech -d '{"input": "ಉದ್ಯಾನದಲ್ಲಿ ಮಕ್ಕಳ ಆಟವಾಡುತ್ತಿದ್ದಾರೆ ಮತ್ತು ಪಕ್ಷಿಗಳು ಚಿಲಿಪಿಲಿ ಮಾಡುತ್ತಿವೆ."}' -o audio.mp3

- hindi
- curl -s -H "content-type: application/json" localhost:8000/v1/audio/speech -d '{"input": "अरे, तुम आज कैसे हो?"}' -o audio.mp3

- curl -s -H "content-type: application/json" localhost:8000/v1/audio/speech -d '{"input": "Hey, how are you?", "voice": "Feminine, speedy, and cheerfull"}' -o audio_2.mp3

-

---
TODO

- Create docker image
- docker build -t indic-parler-tts .

- Run the container
- docker run -d -p 8000:8000 -v ~/.cache/huggingface:/root/.cache/huggingface parler-tts



- huggingface-cli download parler-tts/parler-tts-mini-expresso

- with slabstech/parler-tts
- ai4bharat/indic-parler-tts
- huggingface-cli download ai4bharat/indic-parler-tts
- docker run --detach --volume ~/.cache/huggingface:/root/.cache/huggingface --publish 8000:8000 --env MODEL="ai4bharat/indic-parler-tts" slabstech/parler-tts-server
- parler-tts/parler-tts-mini-expresso
- huggingface-cli download parler-tts/parler-tts-mini-expresso
- docker run --detach --volume ~/.cache/huggingface:/root/.cache/huggingface --publish 8000:8000 --env MODEL="parler-tts/parler-tts-mini-expresso" slabstech/parler-tts-server

- curl -s -H "content-type: application/json" localhost:8000/v1/audio/speech -d '{"input": "Hey, how are you?"}' -o audio.mp3
34 changes: 34 additions & 0 deletions tutorials/indic/tts/indic-tts-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
services:
parler-tts-server:
image: slabstech/parler-tts-server
build:
dockerfile: Dockerfile
context: .
platforms:
- linux/amd64
tags:
- slabstech/parler-tts-server
develop:
watch:
- path: ./parler_tts_server
action: rebuild
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
restart: unless-stopped
ports:
- 8000:8000
environment:
- MODEL=ai4bharat/indic-parler-tts
healthcheck:
test: curl --fail http://0.0.0.0:8000/health || exit 1
interval: 10s
timeout: 10s
retries: 3
start_period: 15s
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['0']
capabilities: [gpu]
28 changes: 28 additions & 0 deletions tutorials/indic/tts/test-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
services:
parler-tts:
build: .
image: pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime
ports:
- "8000:8000"
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
environment:
- TZ=UTC
- MODEL_NAME=ai4bharat/indic-parler-tts
- PYTHONUNBUFFERED=1
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['0']
capabilities: [gpu]
restart: unless-stopped
healthcheck:
test: ["CMD", "nvidia-smi"]
interval: 30s
timeout: 10s
retries: 3

volumes:
huggingface_cache:
12 changes: 12 additions & 0 deletions tutorials/whisper/web/README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
Whisper Web

- Whisper Web UI
- https://gitlab.com/aadnk/whisper-webui

- https://gitlab.com/aadnk/whisper-diarization

- https://gitlab.com/users/aadnk/projects
- https://huggingface.co/spaces/openai/whisper

- https://huggingface.co/spaces/hf-audio/whisper-large-v3-turbo
-


- Web GPU - https://github.com/xenova/whisper-web/tree/experimental-webgpu
- Spaces - https://huggingface.co/spaces/Xenova/whisper-web
- https://github.com/xenova/whisper-web
Expand Down
9 changes: 9 additions & 0 deletions tutorials/whisper/web/gradio_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from gradio_client import Client

client = Client("https://openai-whisper.hf.space/")
result = client.predict(
"https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav", # str (filepath or URL to file) in 'inputs' Audio component
"transcribe", # str in 'Task' Radio component
api_name="/predict"
)
print(result)
20 changes: 20 additions & 0 deletions tutorials/whisper/web/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
anyio==4.6.2.post1
certifi==2024.8.30
charset-normalizer==3.4.0
exceptiongroup==1.2.2
filelock==3.16.1
fsspec==2024.10.0
gradio_client==1.5.0
h11==0.14.0
httpcore==1.0.7
httpx==0.28.0
huggingface-hub==0.26.3
idna==3.10
packaging==24.2
PyYAML==6.0.2
requests==2.32.3
sniffio==1.3.1
tqdm==4.67.1
typing_extensions==4.12.2
urllib3==2.2.3
websockets==12.0

0 comments on commit 3d7a43e

Please sign in to comment.