iterative · amritghimire · Jul 29, 2025 · Jul 29, 2025 · Jul 29, 2025 · Jul 29, 2025
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -194,13 +194,37 @@ jobs:
       - name: Install nox
         run: uv pip install nox --system
 
+      - name: Install FFmpeg on Windows
+        if: runner.os == 'Windows'
+        run: choco install ffmpeg
+
+      - name: Install FFmpeg on macOS
+        if: runner.os == 'macOS'
+        run: |
+          brew install ffmpeg
+          echo 'DYLD_FALLBACK_LIBRARY_PATH=/opt/homebrew/lib' >> "$GITHUB_ENV"
+
+      - name: Install FFmpeg on Ubuntu
+        if: runner.os == 'Linux'
+        run: |
+          sudo apt update
+          sudo apt install -y ffmpeg
+
       - name: Set hf token
         if: matrix.group == 'llm_and_nlp'
         run: echo 'HF_TOKEN=${{ secrets.HF_TOKEN }}' >> "$GITHUB_ENV"
 
       - name: Run examples
         run: nox -s examples -p ${{ matrix.pyv }} -- -m "${{ matrix.group }}"
 
+      - name: Upload audio-to-text output artifact
+        if: ${{ matrix.group == 'multimodal' && matrix.pyv == '3.13' }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: audio-to-text-output-${{ runner.os }}
+          path: audio-to-text.out
+          if-no-files-found: error
+
   check:
     if: always()
     needs: [lint, datachain, examples]

diff --git a/examples/multimodal/audio-to-text.py b/examples/multimodal/audio-to-text.py
@@ -8,6 +8,11 @@
 fragments, pass them to a model to get text.
 """
 
+import atexit
+import contextlib
+import io
+import os
+import sys
 from collections.abc import Iterator
 
 import torch
@@ -16,6 +21,37 @@
 import datachain as dc
 from datachain import Audio, AudioFile, AudioFragment, C
 
+# Redirect all stdout/stderr to a file while keeping the rest of the code unchanged.
+# You can override the output path via env vars: DC_OUTPUT_FILE or OUTPUT_FILE.
+
+_DC_OUTPUT_FILE: str = (
+    os.environ.get("DC_OUTPUT_FILE")
+    or os.environ.get("OUTPUT_FILE")
+    or "audio-to-text.out"
+)
+
+try:
+    _dc_exit_stack = contextlib.ExitStack()
+    atexit.register(_dc_exit_stack.close)
+    _dc_out_fh = _dc_exit_stack.enter_context(
+        open(_DC_OUTPUT_FILE, "w", encoding="utf-8")  # noqa: SIM115
+    )
+except OSError as _e:  # Fall back to default stdio on failure and report why.
+    _std_err = getattr(sys, "__stderr__", None)
+    if isinstance(_std_err, io.TextIOBase):
+        _std_err.write(
+            f"[audio-to-text.py] Failed to open '{_DC_OUTPUT_FILE}' for writing: {_e}\n"
+        )
+else:
+    # Let the user know once on the real stderr before redirecting.
+    _std_err = getattr(sys, "__stderr__", None)
+    if isinstance(_std_err, io.TextIOBase):
+        _std_err.write(
+            f"[audio-to-text.py] Redirecting stdout/stderr to '{_DC_OUTPUT_FILE}'\n"
+        )
+    sys.stdout = _dc_out_fh
+    sys.stderr = _dc_out_fh
+
 
 def info(file: AudioFile) -> Audio:
     return file.get_info()

diff --git a/mkdocs.yml b/mkdocs.yml
@@ -186,7 +186,7 @@ plugins:
             - https://numpy.org/doc/stable/objects.inv
             - https://pandas.pydata.org/docs/objects.inv
             - https://arrow.apache.org/docs/objects.inv
-            # - https://docs.sqlalchemy.org/objects.inv  # SSL certificate issue
+            - https://docs.sqlalchemy.org/objects.inv  # SSL certificate issue
             - https://docs.pydantic.dev/latest/objects.inv
 
 watch:

diff --git a/pyproject.toml b/pyproject.toml
@@ -49,7 +49,7 @@ dependencies = [
   "Pillow>=10.0.0,<12",
   "msgpack>=1.0.4,<2",
   "psutil",
-  "huggingface_hub<0.34.0",
+  "huggingface_hub",
   "iterative-telemetry>=0.0.10",
   "platformdirs",
   "dvc-studio-client>=0.21,<1",