diff --git a/pyproject.toml b/pyproject.toml index 5a7bcd4..c40ed81 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ dependencies = [ "requests", "tqdm", "uvicorn", - "yaspin", + "yaspin==3.0.2", ] [project.scripts] blackfish = "app.cli.__main__:main" diff --git a/src/app/cli/services/speech_recognition.py b/src/app/cli/services/speech_recognition.py index 0494d7a..e18c023 100644 --- a/src/app/cli/services/speech_recognition.py +++ b/src/app/cli/services/speech_recognition.py @@ -1,3 +1,4 @@ +import os import click import requests from random import randint @@ -18,7 +19,10 @@ # blackfish run [OPTIONS] speech-recognition [OPTIONS] @click.command() @click.option( - "--model", required=False, default="openai/whisper-large-v3", help="Model to serve." + "--model_id", + required=False, + default="openai/whisper-large-v3", + help="Model to serve.", ) @click.option( "--input_dir", @@ -42,7 +46,7 @@ @click.pass_context def run_speech_recognition( ctx, - model, + model_id, input_dir, name, revision, @@ -52,22 +56,22 @@ def run_speech_recognition( profile = config.BLACKFISH_PROFILES[ctx.obj.get("profile", "default")] - if model in get_models(profile): + if model_id in get_models(profile): if revision is None: - revision = get_latest_commit(model, get_revisions(model, profile)) - model_dir = get_model_dir(model, revision, profile) + revision = get_latest_commit(model_id, get_revisions(model_id, profile)) + model_dir = get_model_dir(model_id, revision, profile) click.echo( f"{LogSymbols.WARNING.value} No revision provided. Using latest" f" available commit {revision}." ) else: - model_dir = get_model_dir(model, revision, profile) + model_dir = get_model_dir(model_id, revision, profile) if model_dir is None: return else: click.echo( - f"{LogSymbols.ERROR.value} Unable to find {model} for profile" + f"{LogSymbols.ERROR.value} Unable to find {model_id} for profile" f" '{profile.name}'." ) return @@ -86,6 +90,10 @@ def run_speech_recognition( if revision is not None: container_options["revision"] = revision + container_options["model_dir"] = os.path.dirname(model_dir) + + container_options["model_id"] = model_id + job_options = {k: v for k, v in ctx.obj.items() if v is not None} del job_options["profile"] @@ -98,14 +106,14 @@ def run_speech_recognition( if dry_run: service = SpeechRecognition( name=name, - model=model, + model=model_id, job_type="slurm", host=profile.host, user=profile.user, ) click.echo("-" * 80) click.echo("Service: speech-recognition") - click.echo(f"Model: {model}") + click.echo(f"Model: {model_id}") click.echo(f"Name: {name}") click.echo("Type: slurm") click.echo(f"Host: {profile.host}") @@ -119,7 +127,7 @@ def run_speech_recognition( json={ "name": name, "image": "speech_recognition", - "model": model, + "model": model_id, "job_type": "slurm", "host": profile.host, "user": profile.user, @@ -149,14 +157,14 @@ def run_speech_recognition( if dry_run: service = SpeechRecognition( name=name, - model=model, + model=model_id, job_type="local", host="localhost", user=profile.user, ) click.echo("-" * 80) click.echo("Service: speech-recognition") - click.echo(f"Model: {model}") + click.echo(f"Model: {model_id}") click.echo(f"Name: {name}") click.echo("Type: local") click.echo("Host: localhost") @@ -173,7 +181,7 @@ def run_speech_recognition( json={ "name": name, "image": "speech_recognition", - "model": model, + "model": model_id, "job_type": "local", "host": "localhost", "user": profile.user, diff --git a/src/app/services/speech_recognition.py b/src/app/services/speech_recognition.py index 64d448c..9e22905 100644 --- a/src/app/services/speech_recognition.py +++ b/src/app/services/speech_recognition.py @@ -13,9 +13,11 @@ # service API. These options are not in job.py @dataclass class SpeechRecognitionConfig(ContainerConfig): - input_dir: str = None + model_id: str = (None,) + model_dir: str = (None,) + input_dir: str = (None,) + port: int = (None,) revision: Optional[str] = None - port: Optional[int] = None class SpeechRecognition(Service): @@ -51,14 +53,14 @@ def launch_script( # Call Blackfish API async def call( self, - file_name: str, + audio_path: str, language: Union[str, None] = None, response_format: Literal["json", "text"] = "json", ) -> requests.Response: logger.info(f"calling service {self.service_id}") try: body = { - "file_name": file_name, + "audio_path": audio_path, "language": language, "response_format": response_format, } diff --git a/src/app/templates/speech_recognition_local.sh b/src/app/templates/speech_recognition_local.sh index f75cf51..b4c0eb5 100644 --- a/src/app/templates/speech_recognition_local.sh +++ b/src/app/templates/speech_recognition_local.sh @@ -6,23 +6,26 @@ docker run -d \ {{ ' --gpus all' if job_config.gres else '' }} \ -p {{container_config["port"]}}:{{container_config["port"]}} \ + -v "{{container_config["input_dir"]}}":"/data/audio" \ + -v "{{container_config["model_dir"]}}":"/data/model" \ + --name speech_recognition \ + fjying/audiototextapi:arm64_hf\ + --model_dir "/data/model" \ + --model_id {{container_config['model_id']}}\ {%- if 'revision' in container_config %} - -e REVISION={{container_config['revision']}}\ + --revision {{container_config['revision']}}\ {%- endif %} - -e SPEECH_RECOGNITION_PORT={{container_config["port"]}}\ - -v "{{container_config["input_dir"]}}":/app/files \ - -e MODEL_DIR="/app/files/models/Whisper_hf/models--openai--whisper-tiny"\ - -e INPUT_DIR="/app/files/data"\ - --name speech_recognition \ - fjying/audiototextapi:arm64_hf + --port {{container_config["port"]}} {%- elif container_config.provider == 'apptainer' %} apptainer run {{ ' --nv' if job_config.gres > 0 else '' }} \ + --bind "{{container_config["input_dir"]}}":"/data/audio" \ + --bind "{{container_config["model_dir"]}}":"/data/model" \ + {{ job_config.cache_dir }}/images/audiototextapi_amd64_hf.sif \ + --model_dir "/data/model" \ + --model_id {{container_config['model_id']}}\ {%- if 'revision' in container_config %} - --env="REVISION={{container_config['revision']}}" \ + --revision {{container_config['revision']}}\ {%- endif %} - --env="SPEECH_RECOGNITION_PORT={{container_config["port"]}}" \ - --env="MODEL_DIR="{{job_config.model_dir}}"" \ - --env="INPUT_DIR="{{container_config['input_dir']}}"" \ - "{{ job_config.cache_dir }}/images/audiototextapi_amd64_hf.sif" + --port $port {%- endif %} {%- endblock %} diff --git a/src/app/templates/speech_recognition_slurm.sh b/src/app/templates/speech_recognition_slurm.sh index 94504c9..9c92db1 100644 --- a/src/app/templates/speech_recognition_slurm.sh +++ b/src/app/templates/speech_recognition_slurm.sh @@ -12,11 +12,13 @@ Command to create ssh tunnel: ssh -N -f -L ${port}:${node}:${port} ${user}@${cluster}.princeton.edu" apptainer run {{ ' --nv' if job_config.gres > 0 else '' }} \ + --bind "{{container_config["input_dir"]}}":"/data/audio" \ + --bind "{{container_config["model_dir"]}}":"/data/model" \ + {{ job_config.cache_dir }}/images/audiototextapi_amd64_hf.sif \ + --model_dir "/data/model" \ + --model_id {{container_config['model_id']}}\ {%- if 'revision' in container_config %} - --env="REVISION={{container_config['revision']}}" \ + --revision {{container_config['revision']}}\ {%- endif %} - --env="SPEECH_RECOGNITION_PORT=$port" \ - --env="MODEL_DIR={{job_config.model_dir}}" \ - --env="INPUT_DIR={{container_config['input_dir']}}" \ - {{ job_config.cache_dir }}/images/audiototextapi_amd64_hf.sif + --port $port {%- endblock %}