-
Notifications
You must be signed in to change notification settings - Fork 18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Generalise slicer #72
Changes from all commits
12959a1
75e1c82
716e48d
b86110c
dc31b95
091ad47
bdb0da4
8ba9ed8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,102 +18,90 @@ | |
data/sampled-70m40s-80m40s-MartineBarrat_FINAL.mp3 | ||
data/sampled-2m-4m-MartineBarrat_FINAL.mp3 | ||
|
||
Note: | ||
The Script currently doesn't support input in hours format, so hours will need to converted to mins:secs format. | ||
Example: If we want audio from 1hr 20mins 30 secs to 1hr 30mins 40secs, it can be done as shown below: | ||
1hr 20mins 30 secs = 80mins 30secs | ||
1hr 30mins 40sec = 90mins 40secs | ||
|
||
python interviewkit/slicer.py data/Martine+Barrat_FINAL.mp3 80:30 90:40 | ||
|
||
""" | ||
import sys | ||
from pathlib import Path | ||
import shutil | ||
from datetime import timedelta, datetime | ||
from dataclasses import dataclass | ||
import pydub | ||
|
||
try: | ||
import pydub | ||
except ImportError: | ||
print("Please install pydub: pip install pydub") | ||
exit(1) | ||
|
||
if shutil.which("ffmpeg") is None: | ||
print("Please install ffmpeg: https://ffmpeg.org/download.html") | ||
print(" On mac you can: brew install ffmpeg") | ||
exit(1) | ||
|
||
def convert_audio_time_to_msec(audio_time_split_list): | ||
""" Converting mins and secs to msecs for pydub computation """ | ||
|
||
if(audio_time_split_list): | ||
if(len(audio_time_split_list) == 1): | ||
return int(audio_time_split_list[0]) * 60 * 1000 | ||
elif(len(audio_time_split_list) == 2): | ||
return int(audio_time_split_list[0]) * 60 * 1000 + int(audio_time_split_list[1]) * 1000 | ||
else: | ||
print("Error! Audio slice input params invalid. Audio slice supports start/end time in mins or mins:secs format. Please try again with correct input times.") | ||
print("Error inside convert_audio_time_to_msec(audio_time_split_list) funtion.") | ||
exit(1) | ||
else: | ||
print("Error! Audio slice input params invalid. Please try again with correct parameters.") | ||
print("Error inside convert_audio_time_to_msec(audio_time_split_list) funtion.") | ||
exit(1) | ||
|
||
def export_filename(audio_time_list): | ||
""" Filename for exported file """ | ||
|
||
if audio_time_list and len(audio_time_list) == 2: | ||
return f"{audio_time_list[0]}m{audio_time_list[1]}s" | ||
elif audio_time_list and len(audio_time_list) == 1: | ||
return f"{audio_time_list[0]}m" | ||
else: | ||
print("Error! Audio slice input params invalid. Please try again with correct parameters.") | ||
print("Error inside export_filename(audio_time_list) funtion.") | ||
exit(1) | ||
|
||
|
||
def audio_slicing(path, audio_slice_start_time, audio_slice_end_time): | ||
""" It reads the original audio and uses start and end input time params to generate sliced audio. """ | ||
|
||
print("Sampling {} from {} to {}".format(path, audio_slice_start_time, audio_slice_end_time)) | ||
|
||
# Reading original audio file | ||
audio = pydub.AudioSegment.from_file(path) | ||
original_audio_size_ms = audio.duration_seconds * 1000 | ||
EXPECTED_TIME_FORMAT = "%H:%M:%S" | ||
|
||
# Fetching mins and secs from audio input | ||
audio_start_time_list = audio_slice_start_time.split(":") | ||
audio_end_time_list = audio_slice_end_time.split(":") | ||
@dataclass | ||
class SlicerInput: | ||
path: Path | ||
start_time: timedelta | ||
end_time: timedelta | ||
|
||
# Converting audio start and end times in msecs | ||
audio_start_time = convert_audio_time_to_msec(audio_start_time_list) | ||
audio_end_time = convert_audio_time_to_msec(audio_end_time_list) | ||
def convert_time_input_to_time_delta(time_input: str): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think your instinct to reuse something like timedelta here was great. I empathize with the challenges you ran into: parsing time strings is not easy! I can see how you came to the conclusion. You might need a bespoke parser. I feel like someone must have written a moment.js equivalent for Python by now. Maybe there is something like that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks Audrey. Yes it started out quite clean, and then slowly started getting less and less haha. Unfortunate as it could have been a fairly simple solution. It's quite surprising a time concept on its own doesn't really exist in the std library, only when associated with date time. Ah nice, I'll see if there's a library about, I feel like there must be. Thanks for your review, massively appreciate it |
||
|
||
time_info_values = zip(reversed(time_input.split(":")), reversed(EXPECTED_TIME_FORMAT.split(":"))) | ||
|
||
full_time_value, time_info = zip(*time_info_values) | ||
|
||
time = datetime.strptime(":".join(full_time_value), ":".join(time_info)) | ||
|
||
return timedelta(hours=time.hour, minutes=time.minute, seconds=time.second) | ||
|
||
|
||
def parse_input(argv: list[str]): | ||
if len(argv) != 4: | ||
raise ValueError("Usage: python3 slicer.py <filepath> <audio start time in minutes> <audio end time in minutes>") | ||
|
||
# Check if audio start and end times are within original audio size limits | ||
if(audio_start_time > original_audio_size_ms or audio_end_time > original_audio_size_ms): | ||
print("Error! Audio slice input params cannot be greater than original audio size. Please try again with correct parameters.") | ||
exit(1) | ||
path = Path(argv[1]) | ||
|
||
# Converting audio start and end times in msecs | ||
audio_start_time = convert_time_input_to_time_delta(argv[2]) | ||
audio_end_time = convert_time_input_to_time_delta(argv[3]) | ||
|
||
return SlicerInput(path, audio_start_time, audio_end_time) | ||
|
||
def create_output_path(input: SlicerInput): | ||
start_minutes, start_seconds = divmod(input.start_time.seconds, 60) | ||
end_minutes, end_seconds = divmod(input.end_time.seconds, 60) | ||
start_time_output = f'{start_minutes}m{start_seconds}s' | ||
end_time_output = f'{end_minutes}m{end_seconds}s' | ||
new_filepath = input.path.parent / f"sampled-{start_time_output}-{end_time_output}-{input.path.name}" | ||
return new_filepath | ||
|
||
# Audio slicing process | ||
audio = audio[audio_start_time:audio_end_time] | ||
def slice_audio(audio: pydub.AudioSegment, start_time: timedelta , end_time: timedelta): | ||
|
||
|
||
if(start_time.seconds > audio.duration_seconds or end_time.seconds > audio.duration_seconds): | ||
raise ValueError("slice not within audio length") | ||
|
||
start_time_ms = int(start_time.total_seconds() * 1000) | ||
end_time_ms = int(end_time.total_seconds() * 1000) | ||
|
||
return audio[start_time_ms:end_time_ms] | ||
|
||
# Filename for exported file | ||
audio_start_time_name = export_filename(audio_start_time_list) | ||
audio_end_time_name = export_filename(audio_end_time_list) | ||
new_filename = f"{path.parent}/sampled-{audio_start_time_name}-{audio_end_time_name}-{path.name}" | ||
audio.export(new_filename, format="mp3") | ||
print("Created new file: ", new_filename) | ||
|
||
def main(): | ||
if len(sys.argv) != 4: | ||
print("Usage: python3 slicer.py <filepath> <audio start time in minutes> <audio end time in minutes>") | ||
return | ||
|
||
path = Path(sys.argv[1]) | ||
audio_slice_start_time = (sys.argv[2]) | ||
audio_slice_end_time = (sys.argv[3]) | ||
argv = parse_input(sys.argv) | ||
|
||
audio = pydub.AudioSegment.from_file(argv.path) | ||
|
||
try: | ||
sliced_audio = slice_audio(audio, argv.start_time, argv.end_time) | ||
except ValueError: | ||
raise ValueError("Error! Audio slice input params cannot be greater than original audio size. Please try again with correct parameters.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cool, I like what you did here. Extending the value error message is good. In the future we may want something like an exceptions.py with custom exceptions. |
||
|
||
output_file_path = create_output_path(argv) | ||
sliced_audio.export(output_file_path, "mp3") | ||
|
||
print(f'Created new file: {output_file_path}') | ||
|
||
audio_slicing(path, audio_slice_start_time, audio_slice_end_time) | ||
|
||
if __name__ == '__main__': | ||
main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I like the dataclass! Adding a docstring to help Sphinx autodoc: