-
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add auto transcription script
transcribe
- Loading branch information
1 parent
6caeab7
commit 6cb6aeb
Showing
2 changed files
with
52 additions
and
58 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,6 +50,14 @@ | |
cp -r ./internal/assets/* ./build/assets | ||
mv ./build $out | ||
''; | ||
|
||
meta = { | ||
description = "Takes a Whisper IA JSON file as it's first arguent & outputs markdown to stdout appropriate to append to Ray Peat Rodeo markdown file."; | ||
homepage = "https://github.com/marcuswhybrow/ray-peat-rodeo"; | ||
maintainers = [ | ||
"Marcus Whybrow <[email protected]>" | ||
]; | ||
}; | ||
}; | ||
|
||
whisper-json2md = pkgs.buildGoApplication { | ||
|
@@ -65,6 +73,42 @@ | |
''; | ||
}; | ||
|
||
transcribe = pkgs.writeScriptBin "transcribe" '' | ||
set -o xtrace | ||
asset_path="$1" | ||
author="$2" | ||
asset_name=$(basename "$asset_path") | ||
source_url=$(${pkgs.yq-go}/bin/yq ".source.url | select(.)" "$asset_path") | ||
tmp_dir_audio=$(mktemp --directory) | ||
audio_path="$tmp_dir_audio/$asset_name" | ||
${pkgs.yt-dlp}/bin/yt-dlp -x "$source_url" -o "$audio_path" | ||
audio_name_actual=$(ls -AU "$tmp_dir_audio" | head -1) | ||
audio_path_actual="$tmp_dir_audio/$audio_name_actual" | ||
ls "$tmp_dir_audio" | ||
tmp_dir_json=$(mktemp --directory) | ||
${pkgs.openai-whisper}/bin/whisper --language English --output_format json --output_dir "$tmp_dir_json" "$audio_path_actual" | ||
json_name=$(ls -AU "$tmp_dir_json" | head -1) | ||
json_path="$tmp_dir_json/$json_name" | ||
today=$(date +"%Y-%m-%d") | ||
yq="${pkgs.yq-go}/bin/yq --front-matter process --inplace" | ||
$yq ".transcription.date = \"$today\"" "$asset_path" | ||
$yq ".transcription.author = \"Whisper AI\"" "$asset_path" | ||
$yq ".transcription.kind = \"auto-generated\"" "$asset_path" | ||
$yq ".added.author = \"$author\"" "$asset_path" | ||
$yq ".added.date = \"$today\"" "$asset_path" | ||
${inputs.self.packages.x86_64-linux.whisper-json2md}/bin/whisper-json2md "$json_path" >> "$asset_path" | ||
# rm -r "$tmp_dir_audio" | ||
# rm -r "$tmp_dir_json" | ||
''; | ||
|
||
default = build; | ||
}; | ||
|
||
|
@@ -131,6 +175,10 @@ | |
|
||
# Custom tool to convert Whisper JSON output to our markdown format | ||
inputs.self.packages.x86_64-linux.whisper-json2md | ||
|
||
# Convenience bash script using yt-dlp, whisper & whisper-json2md to | ||
# transcribe and update assets with a `source.url` in the frontmatter. | ||
inputs.self.packages.x86_64-linux.transcribe | ||
]; | ||
}; | ||
}); | ||
|