From 3d7a43e434f10d6fbb436c3f0b186c762a350dfd Mon Sep 17 00:00:00 2001 From: Sachin Shetty <26170834+sachinsshetty@users.noreply.github.com> Date: Wed, 4 Dec 2024 17:23:34 +0100 Subject: [PATCH] use-whisper web (#26) * use-whisper web * data for kannada asr * init-indic-tts * init-parler tts * update libraries --- tutorials/indic/README.md | 19 +++++++++++ tutorials/indic/tts/Dockerfile | 15 +++++++++ tutorials/indic/tts/README.md | 41 +++++++++++++++++++++++ tutorials/indic/tts/indic-tts-compose.yml | 34 +++++++++++++++++++ tutorials/indic/tts/test-compose.yml | 28 ++++++++++++++++ tutorials/whisper/web/README.md | 12 +++++++ tutorials/whisper/web/gradio_api.py | 9 +++++ tutorials/whisper/web/requirements.txt | 20 +++++++++++ 8 files changed, 178 insertions(+) create mode 100644 tutorials/indic/README.md create mode 100644 tutorials/indic/tts/Dockerfile create mode 100644 tutorials/indic/tts/README.md create mode 100644 tutorials/indic/tts/indic-tts-compose.yml create mode 100644 tutorials/indic/tts/test-compose.yml create mode 100644 tutorials/whisper/web/gradio_api.py create mode 100644 tutorials/whisper/web/requirements.txt diff --git a/tutorials/indic/README.md b/tutorials/indic/README.md new file mode 100644 index 0000000..3d02218 --- /dev/null +++ b/tutorials/indic/README.md @@ -0,0 +1,19 @@ +Indic - LLM + +- Solutions for Indian languages + +- tts + - https://huggingface.co/ai4bharat/indic-parler-tts + +- text - llm + - sarvam-1 + +- datasets + - + +- ASR for kannada + - https://huggingface.co/ai4bharat/indicconformer_stt_kn_hybrid_ctc_rnnt_large + + - git clone https://github.com/AI4Bharat/NeMo.git && cd NeMo && git checkout nemo-v2 && bash reinstall.sh + + - https://github.com/AI4Bharat/NeMo \ No newline at end of file diff --git a/tutorials/indic/tts/Dockerfile b/tutorials/indic/tts/Dockerfile new file mode 100644 index 0000000..3ff60f9 --- /dev/null +++ b/tutorials/indic/tts/Dockerfile @@ -0,0 +1,15 @@ +FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime + +WORKDIR /app + +RUN apt-get update && apt-get install -y \ + ffmpeg \ + && rm -rf /var/lib/apt/lists/* + +RUN pip install --no-cache-dir parler-tts transformers torch + +COPY . . + +EXPOSE 8000 + +CMD ["python", "main.py"] diff --git a/tutorials/indic/tts/README.md b/tutorials/indic/tts/README.md new file mode 100644 index 0000000..5255c98 --- /dev/null +++ b/tutorials/indic/tts/README.md @@ -0,0 +1,41 @@ +TTS - + +- Download model from Huggingface + - huggingface-cli download ai4bharat/indic-parler-tts + +- Run with Docker compose + - docker compose -f indic-tts-compose.yml up --detach parler-tts-server + +- Test output + - kannada + - curl -s -H "content-type: application/json" localhost:8000/v1/audio/speech -d '{"input": "ಉದ್ಯಾನದಲ್ಲಿ ಮಕ್ಕಳ ಆಟವಾಡುತ್ತಿದ್ದಾರೆ ಮತ್ತು ಪಕ್ಷಿಗಳು ಚಿಲಿಪಿಲಿ ಮಾಡುತ್ತಿವೆ."}' -o audio.mp3 + + - hindi + - curl -s -H "content-type: application/json" localhost:8000/v1/audio/speech -d '{"input": "अरे, तुम आज कैसे हो?"}' -o audio.mp3 + + - curl -s -H "content-type: application/json" localhost:8000/v1/audio/speech -d '{"input": "Hey, how are you?", "voice": "Feminine, speedy, and cheerfull"}' -o audio_2.mp3 + + - + +--- +TODO + +- Create docker image + - docker build -t indic-parler-tts . + +- Run the container + - docker run -d -p 8000:8000 -v ~/.cache/huggingface:/root/.cache/huggingface parler-tts + + + +- huggingface-cli download parler-tts/parler-tts-mini-expresso + +- with slabstech/parler-tts + - ai4bharat/indic-parler-tts + - huggingface-cli download ai4bharat/indic-parler-tts + - docker run --detach --volume ~/.cache/huggingface:/root/.cache/huggingface --publish 8000:8000 --env MODEL="ai4bharat/indic-parler-tts" slabstech/parler-tts-server + - parler-tts/parler-tts-mini-expresso + - huggingface-cli download parler-tts/parler-tts-mini-expresso + - docker run --detach --volume ~/.cache/huggingface:/root/.cache/huggingface --publish 8000:8000 --env MODEL="parler-tts/parler-tts-mini-expresso" slabstech/parler-tts-server + +- curl -s -H "content-type: application/json" localhost:8000/v1/audio/speech -d '{"input": "Hey, how are you?"}' -o audio.mp3 diff --git a/tutorials/indic/tts/indic-tts-compose.yml b/tutorials/indic/tts/indic-tts-compose.yml new file mode 100644 index 0000000..b3eb5de --- /dev/null +++ b/tutorials/indic/tts/indic-tts-compose.yml @@ -0,0 +1,34 @@ +services: + parler-tts-server: + image: slabstech/parler-tts-server + build: + dockerfile: Dockerfile + context: . + platforms: + - linux/amd64 + tags: + - slabstech/parler-tts-server + develop: + watch: + - path: ./parler_tts_server + action: rebuild + volumes: + - ~/.cache/huggingface:/root/.cache/huggingface + restart: unless-stopped + ports: + - 8000:8000 + environment: + - MODEL=ai4bharat/indic-parler-tts + healthcheck: + test: curl --fail http://0.0.0.0:8000/health || exit 1 + interval: 10s + timeout: 10s + retries: 3 + start_period: 15s + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0'] + capabilities: [gpu] diff --git a/tutorials/indic/tts/test-compose.yml b/tutorials/indic/tts/test-compose.yml new file mode 100644 index 0000000..229cfda --- /dev/null +++ b/tutorials/indic/tts/test-compose.yml @@ -0,0 +1,28 @@ +services: + parler-tts: + build: . + image: pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime + ports: + - "8000:8000" + volumes: + - ~/.cache/huggingface:/root/.cache/huggingface + environment: + - TZ=UTC + - MODEL_NAME=ai4bharat/indic-parler-tts + - PYTHONUNBUFFERED=1 + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0'] + capabilities: [gpu] + restart: unless-stopped + healthcheck: + test: ["CMD", "nvidia-smi"] + interval: 30s + timeout: 10s + retries: 3 + +volumes: + huggingface_cache: diff --git a/tutorials/whisper/web/README.md b/tutorials/whisper/web/README.md index 1811f64..e529beb 100644 --- a/tutorials/whisper/web/README.md +++ b/tutorials/whisper/web/README.md @@ -1,5 +1,17 @@ Whisper Web +- Whisper Web UI + - https://gitlab.com/aadnk/whisper-webui + + - https://gitlab.com/aadnk/whisper-diarization + + - https://gitlab.com/users/aadnk/projects +- https://huggingface.co/spaces/openai/whisper + +- https://huggingface.co/spaces/hf-audio/whisper-large-v3-turbo +- + + - Web GPU - https://github.com/xenova/whisper-web/tree/experimental-webgpu - Spaces - https://huggingface.co/spaces/Xenova/whisper-web - https://github.com/xenova/whisper-web diff --git a/tutorials/whisper/web/gradio_api.py b/tutorials/whisper/web/gradio_api.py new file mode 100644 index 0000000..e3e1c25 --- /dev/null +++ b/tutorials/whisper/web/gradio_api.py @@ -0,0 +1,9 @@ +from gradio_client import Client + +client = Client("https://openai-whisper.hf.space/") +result = client.predict( + "https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav", # str (filepath or URL to file) in 'inputs' Audio component + "transcribe", # str in 'Task' Radio component + api_name="/predict" +) +print(result) \ No newline at end of file diff --git a/tutorials/whisper/web/requirements.txt b/tutorials/whisper/web/requirements.txt new file mode 100644 index 0000000..680aeba --- /dev/null +++ b/tutorials/whisper/web/requirements.txt @@ -0,0 +1,20 @@ +anyio==4.6.2.post1 +certifi==2024.8.30 +charset-normalizer==3.4.0 +exceptiongroup==1.2.2 +filelock==3.16.1 +fsspec==2024.10.0 +gradio_client==1.5.0 +h11==0.14.0 +httpcore==1.0.7 +httpx==0.28.0 +huggingface-hub==0.26.3 +idna==3.10 +packaging==24.2 +PyYAML==6.0.2 +requests==2.32.3 +sniffio==1.3.1 +tqdm==4.67.1 +typing_extensions==4.12.2 +urllib3==2.2.3 +websockets==12.0