-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdata.py
122 lines (110 loc) · 5.49 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
"""
This script will convert the audio to spectrogram, or convert spectrogram to audio
As in former direction, the five folder will be created.
We follow the format of MuseDB18 which contains 5 different tracks.
root --+-- mixture
|
+-- drum
|
+-- bass
|
+-- rest
|
+-- vocal
For the latter direction, you should assign both the magnitude and phase folder
The site of MuseDB18 is here: https://sigsep.github.io/datasets/musdb.html
@Author: SunnerLi
"""
from utils import num2str
from tqdm import tqdm
import sounddevice as sd
import numpy as np
import argparse
import librosa
import stempeg
import os
# =========================================================================================
# 1. Parse the direction and related parameters
# =========================================================================================
"""
Parameter Explain
--------------------------------------------------------------------------------------------
--src The source folder where the song you want to split are locate in
(Or the magnitude you want to reconstruct)
--tar The target folder where the spectrogram will store in
(Or the folder where you want to store the reconstruct wave)
--phase The folder of phase. You only need to assign in to_wave direction
--win_size The window size of STFT. The original paper use 1024
--hop_size The hop size of window. The original paper use 768
--sr The sampling rate. Default is 44100
--direction The direction you want to deal with. 'to_spec' or 'to_wave'
--------------------------------------------------------------------------------------------
"""
parser = argparse.ArgumentParser()
parser.add_argument('--src' , type = str)
parser.add_argument('--tar' , type = str)
parser.add_argument('--phase' , type = str, default = '-1')
parser.add_argument('--win_size' , default = 1024)
parser.add_argument('--hop_size' , default = 768)
parser.add_argument('--sr' , default = 44100)
parser.add_argument('--direction' , default = "to_spec")
args = parser.parse_args()
if args.direction == 'to_wave' and args.phase == '-1':
raise Exception("You need to assign the phase parameter in to_wave direction!")
# =========================================================================================
# 2. Convert
# =========================================================================================
if args.direction == 'to_spec':
# create the folder
if not os.path.exists(args.tar):
os.mkdir(args.tar)
os.mkdir(os.path.join(args.tar, 'mixture'))
os.mkdir(os.path.join(args.tar, 'drum'))
os.mkdir(os.path.join(args.tar, 'bass'))
os.mkdir(os.path.join(args.tar, 'rest'))
os.mkdir(os.path.join(args.tar, 'vocal'))
# load the wave, transform to spectrogram and save
tar_folder_list = ['mixture', 'drum', 'bass', 'rest', 'vocal']
loader = tqdm(sorted(os.listdir(args.src)))
for audio_idx, name in enumerate(loader):
norm = None
for i in range(5):
try:
audio, rate = stempeg.read_stems(filename=os.path.join(args.src, name), stem_id=i)
audio = audio[:, 0] + audio[:, 1]
# sd.play(audio, rate, blocking=True)
stft = librosa.stft(audio, n_fft=args.win_size, hop_length=args.hop_size)
spectrum, phase = librosa.magphase(stft)
spectrogram = np.abs(spectrum).astype(np.float32)
norm = spectrogram.max() if norm is None else norm
spectrogram /= norm
np.save(os.path.join(args.tar, tar_folder_list[i], num2str(audio_idx) + '_' + name[:-10] + '_spec'), spectrogram)
np.save(os.path.join(args.tar, tar_folder_list[i], num2str(audio_idx) + '_' + name[:-10] + '_phase'), phase)
except IndexError:
print("[Warning] The song {} cannot capture the {} track".format(name, tar_folder_list[i]))
elif args.direction == 'to_wave':
# create the folder
if not os.path.exists(args.tar):
os.mkdir(args.tar)
# Load the spectrogram and merge
loader = tqdm(sorted(os.listdir(args.src)))
for audio_idx, spec_name in enumerate(loader):
if 'spec' in spec_name:
# load data
phase_name = spec_name[:-8] + 'phase.npy'
mag = np.load(os.path.join(args.src, spec_name))
phase = np.load(os.path.join(args.phase, phase_name))
# resize as the same size
length = min(phase.shape[-1], mag.shape[-1])
mag = mag[:, :length]
phase = phase[:, :length]
# De-normalization
spectrogram = mag*phase
mix_spec = np.load(os.path.join(args.phase, spec_name))
spectrogram *= mix_spec.max()
# reconstruct the audio
y = librosa.istft(spectrogram, win_length=args.win_size, hop_length=args.hop_size)
file_path = os.path.join(args.tar, str(audio_idx) + '.mp3')
librosa.output.write_wav(file_path, y, int(args.sr), norm=True)
else:
raise Exception("Unknown direction {}. Please assign one of them [to_spec, to_wave]".format(args.direction))