diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 00000000..41d963af --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,3 @@ +# format files with black +2eab92b73570d336cab356392bfcf1e08ef602e6 + diff --git a/.github/workflows/format_code.yaml b/.github/workflows/format_code.yaml new file mode 100644 index 00000000..e909cf70 --- /dev/null +++ b/.github/workflows/format_code.yaml @@ -0,0 +1,10 @@ +name: Check code formatting + +on: [push, pull_request] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: psf/black@stable diff --git a/.github/workflows/isort.yaml b/.github/workflows/isort.yaml new file mode 100644 index 00000000..ea44105d --- /dev/null +++ b/.github/workflows/isort.yaml @@ -0,0 +1,10 @@ +name: Run isort + +on: [push, pull_request] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: isort/isort-action@v1 diff --git a/setup.py b/setup.py index fb230882..fb09fded 100644 --- a/setup.py +++ b/setup.py @@ -1,23 +1,24 @@ -from setuptools import Extension, setup -from Cython.Build import cythonize import sys + import numpy +from Cython.Build import cythonize +from setuptools import Extension, setup sourcefiles = [ "smstools/models/utilFunctions_C/utilFunctions.c", - "smstools/models/utilFunctions_C/cutilFunctions.pyx" + "smstools/models/utilFunctions_C/cutilFunctions.pyx", ] -if sys.platform == 'win32': - library = 'msvcrt' +if sys.platform == "win32": + library = "msvcrt" else: - library = 'm' + library = "m" extensions = [ Extension( "smstools.models.utilFunctions_C.utilFunctions_C", sourcefiles, include_dirs=[numpy.get_include()], - libraries=[library] + libraries=[library], ), ] setup( diff --git a/smstools/models/dftModel.py b/smstools/models/dftModel.py index 138934f4..9ef4a231 100644 --- a/smstools/models/dftModel.py +++ b/smstools/models/dftModel.py @@ -1,9 +1,11 @@ # functions that implement analysis and synthesis of sounds using the Discrete Fourier Transform # (for example usage check dftModel_function.py in the interface directory) -import numpy as np import math + +import numpy as np from scipy.fft import fft, ifft + from smstools.models import utilFunctions as UF tol = 1e-14 # threshold used to compute phase @@ -11,15 +13,15 @@ def dftModel(x, w, N): """ - Analysis/synthesis of a signal using the discrete Fourier transform - x: input signal, w: analysis window, N: FFT size - returns y: output signal - """ + Analysis/synthesis of a signal using the discrete Fourier transform + x: input signal, w: analysis window, N: FFT size + returns y: output signal + """ if not (UF.isPower2(N)): # raise error if N not a power of two raise ValueError("FFT size (N) is not a power of 2") - if (w.size > N): # raise error if window size bigger than fft size + if w.size > N: # raise error if window size bigger than fft size raise ValueError("Window size (M) is bigger than FFT size") if all(x == 0): # if input array is zeros return empty output @@ -35,13 +37,17 @@ def dftModel(x, w, N): fftbuffer[-hM2:] = xw[:hM2] X = fft(fftbuffer) # compute FFT absX = abs(X[:hN]) # compute ansolute value of positive side - absX[absX < np.finfo(float).eps] = np.finfo(float).eps # if zeros add epsilon to handle log + absX[absX < np.finfo(float).eps] = np.finfo( + float + ).eps # if zeros add epsilon to handle log mX = 20 * np.log10(absX) # magnitude spectrum of positive frequencies in dB pX = np.unwrap(np.angle(X[:hN])) # unwrapped phase spectrum of positive frequencies # -----synthesis----- Y = np.zeros(N, dtype=complex) # clean output spectrum Y[:hN] = 10 ** (mX / 20) * np.exp(1j * pX) # generate positive frequencies - Y[hN:] = 10 ** (mX[-2:0:-1] / 20) * np.exp(-1j * pX[-2:0:-1]) # generate negative frequencies + Y[hN:] = 10 ** (mX[-2:0:-1] / 20) * np.exp( + -1j * pX[-2:0:-1] + ) # generate negative frequencies fftbuffer = np.real(ifft(Y)) # compute inverse FFT y[:hM2] = fftbuffer[-hM2:] # undo zero-phase window y[hM2:] = fftbuffer[:hM1] @@ -50,10 +56,10 @@ def dftModel(x, w, N): def dftAnal(x, w, N): """ - Analysis of a signal using the discrete Fourier transform - x: input signal, w: analysis window, N: FFT size - returns mX, pX: magnitude and phase spectrum - """ + Analysis of a signal using the discrete Fourier transform + x: input signal, w: analysis window, N: FFT size + returns mX, pX: magnitude and phase spectrum + """ if not (UF.isPower2(N)): # raise error if N not a power of two raise ValueError("FFT size (N) is not a power of 2") @@ -71,20 +77,26 @@ def dftAnal(x, w, N): fftbuffer[-hM2:] = xw[:hM2] X = fft(fftbuffer) # compute FFT absX = abs(X[:hN]) # compute ansolute value of positive side - absX[absX < np.finfo(float).eps] = np.finfo(float).eps # if zeros add epsilon to handle log + absX[absX < np.finfo(float).eps] = np.finfo( + float + ).eps # if zeros add epsilon to handle log mX = 20 * np.log10(absX) # magnitude spectrum of positive frequencies in dB - X[:hN].real[np.abs(X[:hN].real) < tol] = 0.0 # for phase calculation set to 0 the small values - X[:hN].imag[np.abs(X[:hN].imag) < tol] = 0.0 # for phase calculation set to 0 the small values + X[:hN].real[ + np.abs(X[:hN].real) < tol + ] = 0.0 # for phase calculation set to 0 the small values + X[:hN].imag[ + np.abs(X[:hN].imag) < tol + ] = 0.0 # for phase calculation set to 0 the small values pX = np.unwrap(np.angle(X[:hN])) # unwrapped phase spectrum of positive frequencies return mX, pX def dftSynth(mX, pX, M): """ - Synthesis of a signal using the discrete Fourier transform - mX: magnitude spectrum, pX: phase spectrum, M: window size - returns y: output signal - """ + Synthesis of a signal using the discrete Fourier transform + mX: magnitude spectrum, pX: phase spectrum, M: window size + returns y: output signal + """ hN = mX.size # size of positive spectrum, it includes sample 0 N = (hN - 1) * 2 # FFT size @@ -96,7 +108,9 @@ def dftSynth(mX, pX, M): y = np.zeros(M) # initialize output array Y = np.zeros(N, dtype=complex) # clean output spectrum Y[:hN] = 10 ** (mX / 20) * np.exp(1j * pX) # generate positive frequencies - Y[hN:] = 10 ** (mX[-2:0:-1] / 20) * np.exp(-1j * pX[-2:0:-1]) # generate negative frequencies + Y[hN:] = 10 ** (mX[-2:0:-1] / 20) * np.exp( + -1j * pX[-2:0:-1] + ) # generate negative frequencies fftbuffer = np.real(ifft(Y)) # compute inverse FFT y[:hM2] = fftbuffer[-hM2:] # undo zero-phase window y[hM2:] = fftbuffer[:hM1] diff --git a/smstools/models/harmonicModel.py b/smstools/models/harmonicModel.py index 52bc06dd..a1bcd877 100644 --- a/smstools/models/harmonicModel.py +++ b/smstools/models/harmonicModel.py @@ -1,37 +1,41 @@ # functions that implement analysis and synthesis of sounds using the Harmonic Model # (for example usage check the interface directory) +import math + import numpy as np -from scipy.signal.windows import blackmanharris, triang from scipy.fft import ifft -import math +from scipy.signal.windows import blackmanharris, triang + from smstools.models import dftModel as DFT -from smstools.models import utilFunctions as UF from smstools.models import sineModel as SM +from smstools.models import utilFunctions as UF def f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et): """ - Fundamental frequency detection of a sound using twm algorithm - x: input sound; fs: sampling rate; w: analysis window; - N: FFT size; t: threshold in negative dB, - minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, - f0et: error threshold in the f0 detection (ex: 5), - returns f0: fundamental frequency - """ - if (minf0 < 0): # raise exception if minf0 is smaller than 0 + Fundamental frequency detection of a sound using twm algorithm + x: input sound; fs: sampling rate; w: analysis window; + N: FFT size; t: threshold in negative dB, + minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, + f0et: error threshold in the f0 detection (ex: 5), + returns f0: fundamental frequency + """ + if minf0 < 0: # raise exception if minf0 is smaller than 0 raise ValueError("Minumum fundamental frequency (minf0) smaller than 0") - if (maxf0 >= 10000): # raise exception if maxf0 is bigger than fs/2 + if maxf0 >= 10000: # raise exception if maxf0 is bigger than fs/2 raise ValueError("Maximum fundamental frequency (maxf0) bigger than 10000Hz") - if (H <= 0): # raise error if hop size 0 or negative + if H <= 0: # raise error if hop size 0 or negative raise ValueError("Hop size (H) smaller or equal to 0") hN = N // 2 # size of positive spectrum hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor - x = np.append(np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 + x = np.append( + np.zeros(hM2), x + ) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM1)) # add zeros at the end to analyze last sample pin = hM1 # init sound pointer in middle of anal window pend = x.size - hM1 # last sample to start a frame @@ -41,14 +45,15 @@ def f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et): f0t = 0 # initialize f0 track f0stable = 0 # initialize f0 stable while pin < pend: - x1 = x[pin - hM1:pin + hM2] # select frame + x1 = x[pin - hM1 : pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert locations to Hez f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 - if ((f0stable == 0) & (f0t > 0)) \ - or ((f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0)): + if ((f0stable == 0) & (f0t > 0)) or ( + (f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0) + ): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 @@ -59,30 +64,36 @@ def f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et): def harmonicDetection(pfreq, pmag, pphase, f0, nH, hfreqp, fs, harmDevSlope=0.01): """ - Detection of the harmonics of a frame from a set of spectral peaks using f0 - to the ideal harmonic series built on top of a fundamental frequency - pfreq, pmag, pphase: peak frequencies, magnitudes and phases - f0: fundamental frequency, nH: number of harmonics, - hfreqp: harmonic frequencies of previous frame, - fs: sampling rate; harmDevSlope: slope of change of the deviation allowed to perfect harmonic - returns hfreq, hmag, hphase: harmonic frequencies, magnitudes, phases - """ - - if (f0 <= 0): # if no f0 return no harmonics + Detection of the harmonics of a frame from a set of spectral peaks using f0 + to the ideal harmonic series built on top of a fundamental frequency + pfreq, pmag, pphase: peak frequencies, magnitudes and phases + f0: fundamental frequency, nH: number of harmonics, + hfreqp: harmonic frequencies of previous frame, + fs: sampling rate; harmDevSlope: slope of change of the deviation allowed to perfect harmonic + returns hfreq, hmag, hphase: harmonic frequencies, magnitudes, phases + """ + + if f0 <= 0: # if no f0 return no harmonics return np.zeros(nH), np.zeros(nH), np.zeros(nH) hfreq = np.zeros(nH) # initialize harmonic frequencies hmag = np.zeros(nH) - 100 # initialize harmonic magnitudes hphase = np.zeros(nH) # initialize harmonic phases hf = f0 * np.arange(1, nH + 1) # initialize harmonic frequencies hi = 0 # initialize harmonic index - if len(hfreqp) == 0: # if no incomming harmonic tracks initialize to harmonic series + if ( + len(hfreqp) == 0 + ): # if no incomming harmonic tracks initialize to harmonic series hfreqp = hf while (f0 > 0) and (hi < nH) and (hf[hi] < fs / 2): # find harmonic peaks pei = np.argmin(abs(pfreq - hf[hi])) # closest peak dev1 = abs(pfreq[pei] - hf[hi]) # deviation from perfect harmonic - dev2 = (abs(pfreq[pei] - hfreqp[hi]) if hfreqp[hi] > 0 else fs) # deviation from previous frame + dev2 = ( + abs(pfreq[pei] - hfreqp[hi]) if hfreqp[hi] > 0 else fs + ) # deviation from previous frame threshold = f0 / 3 + harmDevSlope * pfreq[pei] - if ((dev1 < threshold) or (dev2 < threshold)): # accept peak if deviation is small + if (dev1 < threshold) or ( + dev2 < threshold + ): # accept peak if deviation is small hfreq[hi] = pfreq[pei] # harmonic frequencies hmag[hi] = pmag[pei] # harmonic magnitudes hphase[hi] = pphase[pei] # harmonic phases @@ -92,19 +103,21 @@ def harmonicDetection(pfreq, pmag, pphase, f0, nH, hfreqp, fs, harmDevSlope=0.01 def harmonicModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): """ - Analysis/synthesis of a sound using the sinusoidal harmonic model - x: input sound, fs: sampling rate, w: analysis window, - N: FFT size (minimum 512), t: threshold in negative dB, - nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, - maxf0: maximim f0 frequency in Hz, - f0et: error threshold in the f0 detection (ex: 5), - returns y: output array sound - """ + Analysis/synthesis of a sound using the sinusoidal harmonic model + x: input sound, fs: sampling rate, w: analysis window, + N: FFT size (minimum 512), t: threshold in negative dB, + nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, + maxf0: maximim f0 frequency in Hz, + f0et: error threshold in the f0 detection (ex: 5), + returns y: output array sound + """ hN = N // 2 # size of positive spectrum hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor - x = np.append(np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 + x = np.append( + np.zeros(hM2), x + ) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM1)) # add zeros at the end to analyze last sample Ns = 512 # FFT size for synthesis (even) H = Ns // 4 # Hop size used for analysis and synthesis @@ -117,57 +130,70 @@ def harmonicModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2 * H) # overlapping window - sw[hNs - H:hNs + H] = ow + sw[hNs - H : hNs + H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window - sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] # window for overlap-add + sw[hNs - H : hNs + H] = ( + sw[hNs - H : hNs + H] / bh[hNs - H : hNs + H] + ) # window for overlap-add hfreqp = [] f0t = 0 f0stable = 0 while pin < pend: # -----analysis----- - x1 = x[pin - hM1:pin + hM2] # select frame + x1 = x[pin - hM1 : pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 - if ((f0stable == 0) & (f0t > 0)) \ - or ((f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0)): + if ((f0stable == 0) & (f0t > 0)) or ( + (f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0) + ): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 - hfreq, hmag, hphase = harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics + hfreq, hmag, hphase = harmonicDetection( + ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs + ) # find harmonics hfreqp = hfreq # -----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines fftbuffer = np.real(ifft(Yh)) # inverse FFT - yh[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window - yh[hNs - 1:] = fftbuffer[:hNs + 1] - y[pin - hNs:pin + hNs] += sw * yh # overlap-add + yh[: hNs - 1] = fftbuffer[hNs + 1 :] # undo zero-phase window + yh[hNs - 1 :] = fftbuffer[: hNs + 1] + y[pin - hNs : pin + hNs] += sw * yh # overlap-add pin += H # advance sound pointer - y = np.delete(y, range(hM2)) # delete half of first window which was added in stftAnal - y = np.delete(y, range(y.size - hM1, y.size)) # add zeros at the end to analyze last sample + y = np.delete( + y, range(hM2) + ) # delete half of first window which was added in stftAnal + y = np.delete( + y, range(y.size - hM1, y.size) + ) # add zeros at the end to analyze last sample return y -def harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=.02): +def harmonicModelAnal( + x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=0.02 +): + """ + Analysis of a sound using the sinusoidal harmonic model + x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512); t: threshold in negative dB, + nH: maximum number of harmonics; minf0: minimum f0 frequency in Hz, + maxf0: maximim f0 frequency in Hz; f0et: error threshold in the f0 detection (ex: 5), + harmDevSlope: slope of harmonic deviation; minSineDur: minimum length of harmonics + returns xhfreq, xhmag, xhphase: harmonic frequencies, magnitudes and phases """ - Analysis of a sound using the sinusoidal harmonic model - x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512); t: threshold in negative dB, - nH: maximum number of harmonics; minf0: minimum f0 frequency in Hz, - maxf0: maximim f0 frequency in Hz; f0et: error threshold in the f0 detection (ex: 5), - harmDevSlope: slope of harmonic deviation; minSineDur: minimum length of harmonics - returns xhfreq, xhmag, xhphase: harmonic frequencies, magnitudes and phases - """ - - if (minSineDur < 0): # raise exception if minSineDur is smaller than 0 + + if minSineDur < 0: # raise exception if minSineDur is smaller than 0 raise ValueError("Minimum duration of sine tracks smaller than 0") hN = N // 2 # size of positive spectrum hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor - x = np.append(np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 + x = np.append( + np.zeros(hM2), x + ) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM2)) # add zeros at the end to analyze last sample pin = hM1 # init sound pointer in middle of anal window pend = x.size - hM1 # last sample to start a frame @@ -177,19 +203,21 @@ def harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0. f0t = 0 # initialize f0 track f0stable = 0 # initialize f0 stable while pin <= pend: - x1 = x[pin - hM1:pin + hM2] # select frame + x1 = x[pin - hM1 : pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 - if ((f0stable == 0) & (f0t > 0)) \ - or ((f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0)): + if ((f0stable == 0) & (f0t > 0)) or ( + (f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0) + ): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 - hfreq, hmag, hphase = harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs, - harmDevSlope) # find harmonics + hfreq, hmag, hphase = harmonicDetection( + ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs, harmDevSlope + ) # find harmonics hfreqp = hfreq if pin == hM1: # first frame xhfreq = np.array([hfreq]) @@ -200,5 +228,7 @@ def harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0. xhmag = np.vstack((xhmag, np.array([hmag]))) xhphase = np.vstack((xhphase, np.array([hphase]))) pin += H # advance sound pointer - xhfreq = SM.cleaningSineTracks(xhfreq, round(fs * minSineDur / H)) # delete tracks shorter than minSineDur + xhfreq = SM.cleaningSineTracks( + xhfreq, round(fs * minSineDur / H) + ) # delete tracks shorter than minSineDur return xhfreq, xhmag, xhphase diff --git a/smstools/models/hprModel.py b/smstools/models/hprModel.py index 2c8e2920..17393608 100644 --- a/smstools/models/hprModel.py +++ b/smstools/models/hprModel.py @@ -1,111 +1,124 @@ # functions that implement analysis and synthesis of sounds using the Harmonic plus Residual Model # (for example usage check the interface directory) -import numpy as np import math -from scipy.signal.windows import blackmanharris, triang + +import numpy as np from scipy.fft import fft, ifft -from smstools.models import harmonicModel as HM +from scipy.signal.windows import blackmanharris, triang + from smstools.models import dftModel as DFT -from smstools.models import utilFunctions as UF +from smstools.models import harmonicModel as HM from smstools.models import sineModel as SM +from smstools.models import utilFunctions as UF + def hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope): - """Analysis of a sound using the harmonic plus residual model - x: input sound, fs: sampling rate, w: analysis window; N: FFT size, t: threshold in negative dB, - minSineDur: minimum duration of sinusoidal tracks - nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound - maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm - harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation - returns hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; xr: residual signal - """ + """Analysis of a sound using the harmonic plus residual model + x: input sound, fs: sampling rate, w: analysis window; N: FFT size, t: threshold in negative dB, + minSineDur: minimum duration of sinusoidal tracks + nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound + maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm + harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation + returns hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; xr: residual signal + """ + + # perform harmonic analysis + hfreq, hmag, hphase = HM.harmonicModelAnal( + x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur + ) + Ns = 512 + xr = UF.sineSubtraction( + x, Ns, H, hfreq, hmag, hphase, fs + ) # subtract sinusoids from original sound + return hfreq, hmag, hphase, xr - # perform harmonic analysis - hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) - Ns = 512 - xr = UF.sineSubtraction(x, Ns, H, hfreq, hmag, hphase, fs) # subtract sinusoids from original sound - return hfreq, hmag, hphase, xr def hprModelSynth(hfreq, hmag, hphase, xr, N, H, fs): - """ - Synthesis of a sound using the sinusoidal plus residual model - tfreq, tmag, tphase: sinusoidal frequencies, amplitudes and phases; stocEnv: stochastic envelope - N: synthesis FFT size; H: hop size, fs: sampling rate - returns y: output sound, yh: harmonic component - """ + """ + Synthesis of a sound using the sinusoidal plus residual model + tfreq, tmag, tphase: sinusoidal frequencies, amplitudes and phases; stocEnv: stochastic envelope + N: synthesis FFT size; H: hop size, fs: sampling rate + returns y: output sound, yh: harmonic component + """ - yh = SM.sineModelSynth(hfreq, hmag, hphase, N, H, fs) # synthesize sinusoids - y = yh[:min(yh.size, xr.size)]+xr[:min(yh.size, xr.size)] # sum sinusoids and residual components - return y, yh + yh = SM.sineModelSynth(hfreq, hmag, hphase, N, H, fs) # synthesize sinusoids + y = ( + yh[: min(yh.size, xr.size)] + xr[: min(yh.size, xr.size)] + ) # sum sinusoids and residual components + return y, yh -def hprModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): - """ - Analysis/synthesis of a sound using the harmonic plus residual model - x: input sound, fs: sampling rate, w: analysis window, - N: FFT size (minimum 512), t: threshold in negative dB, - nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, - maxf0: maximim f0 frequency in Hz, - f0et: error threshold in the f0 detection (ex: 5), - maxhd: max. relative deviation in harmonic detection (ex: .2) - returns y: output sound, yh: harmonic component, xr: residual component - """ - hN = N//2 # size of positive spectrum - hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding - hM2 = int(math.floor(w.size/2)) # half analysis window size by floor - Ns = 512 # FFT size for synthesis (even) - H = Ns//4 # Hop size used for analysis and synthesis - hNs = Ns//2 - pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window - pend = x.size - max(hNs, hM1) # last sample to start a frame - yhw = np.zeros(Ns) # initialize output sound frame - xrw = np.zeros(Ns) # initialize output sound frame - yh = np.zeros(x.size) # initialize output array - xr = np.zeros(x.size) # initialize output array - w = w / sum(w) # normalize analysis window - sw = np.zeros(Ns) - ow = triang(2*H) # overlapping window - sw[hNs-H:hNs+H] = ow - bh = blackmanharris(Ns) # synthesis window - bh = bh / sum(bh) # normalize synthesis window - wr = bh # window for residual - sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] - hfreqp = [] - f0t = 0 - f0stable = 0 - while pin0)) \ - or ((f0stable>0)&(np.abs(f0stable-f0t) 0)) or ( + (f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0) + ): + f0stable = f0t # consider a stable f0 if it is close to the previous one + else: + f0stable = 0 + hfreq, hmag, hphase = HM.harmonicDetection( + ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs + ) # find harmonics + hfreqp = hfreq + ri = pin - hNs - 1 # input sound pointer for residual analysis + xw2 = x[ri : ri + Ns] * wr # window the input sound + fftbuffer = np.zeros(Ns) # reset buffer + fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer + fftbuffer[hNs:] = xw2[:hNs] + X2 = fft(fftbuffer) # compute FFT of input signal for residual analysis + # -----synthesis----- + Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate sines + Xr = X2 - Yh # get the residual complex spectrum + fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum + yhw[: hNs - 1] = fftbuffer[hNs + 1 :] # undo zero-phase window + yhw[hNs - 1 :] = fftbuffer[: hNs + 1] + fftbuffer = np.real(ifft(Xr)) # inverse FFT of residual spectrum + xrw[: hNs - 1] = fftbuffer[hNs + 1 :] # undo zero-phase window + xrw[hNs - 1 :] = fftbuffer[: hNs + 1] + yh[ri : ri + Ns] += sw * yhw # overlap-add for sines + xr[ri : ri + Ns] += sw * xrw # overlap-add for residual + pin += H # advance sound pointer + y = yh + xr # sum of harmonic and residual components + return y, yh, xr diff --git a/smstools/models/hpsModel.py b/smstools/models/hpsModel.py index 5ae5e6c9..99738098 100644 --- a/smstools/models/hpsModel.py +++ b/smstools/models/hpsModel.py @@ -1,30 +1,36 @@ # functions that implement analysis and synthesis of sounds using the Harmonic plus Stochastic Model # (for example usage check the examples interface) +import math + import numpy as np -from scipy.signal import resample -from scipy.signal.windows import blackmanharris, triang, hann from scipy.fft import fft, ifft -import math +from scipy.signal import resample +from scipy.signal.windows import blackmanharris, hann, triang + +from smstools.models import dftModel as DFT from smstools.models import harmonicModel as HM from smstools.models import sineModel as SM -from smstools.models import dftModel as DFT from smstools.models import stochasticModel as STM from smstools.models import utilFunctions as UF -def hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf): +def hpsModelAnal( + x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf +): + """ + Analysis of a sound using the harmonic plus stochastic model + x: input sound, fs: sampling rate, w: analysis window; N: FFT size, t: threshold in negative dB, + nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, + maxf0: maximim f0 frequency in Hz; f0et: error threshold in the f0 detection (ex: 5), + harmDevSlope: slope of harmonic deviation; minSineDur: minimum length of harmonics + returns hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; stocEnv: stochastic residual """ - Analysis of a sound using the harmonic plus stochastic model - x: input sound, fs: sampling rate, w: analysis window; N: FFT size, t: threshold in negative dB, - nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, - maxf0: maximim f0 frequency in Hz; f0et: error threshold in the f0 detection (ex: 5), - harmDevSlope: slope of harmonic deviation; minSineDur: minimum length of harmonics - returns hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; stocEnv: stochastic residual - """ # perform harmonic analysis - hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) + hfreq, hmag, hphase = HM.harmonicModelAnal( + x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur + ) # subtract sinusoids from original sound xr = UF.sineSubtraction(x, Ns, H, hfreq, hmag, hphase, fs) # perform stochastic analysis of residual @@ -34,26 +40,28 @@ def hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSin def hpsModelSynth(hfreq, hmag, hphase, stocEnv, N, H, fs): """ - Synthesis of a sound using the harmonic plus stochastic model - hfreq, hmag: harmonic frequencies and amplitudes; stocEnv: stochastic envelope - Ns: synthesis FFT size; H: hop size, fs: sampling rate - returns y: output sound, yh: harmonic component, yst: stochastic component - """ + Synthesis of a sound using the harmonic plus stochastic model + hfreq, hmag: harmonic frequencies and amplitudes; stocEnv: stochastic envelope + Ns: synthesis FFT size; H: hop size, fs: sampling rate + returns y: output sound, yh: harmonic component, yst: stochastic component + """ yh = SM.sineModelSynth(hfreq, hmag, hphase, N, H, fs) # synthesize harmonics yst = STM.stochasticModelSynth(stocEnv, H, H * 2) # synthesize stochastic residual - y = yh[:min(yh.size, yst.size)] + yst[:min(yh.size, yst.size)] # sum harmonic and stochastic components + y = ( + yh[: min(yh.size, yst.size)] + yst[: min(yh.size, yst.size)] + ) # sum harmonic and stochastic components return y, yh, yst def hpsModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, stocf): """ - Analysis/synthesis of a sound using the harmonic plus stochastic model, one frame at a time, no harmonic tracking - x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512), t: threshold in negative dB, - nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz; maxf0: maximim f0 frequency in Hz, - f0et: error threshold in the f0 detection (ex: 5); stocf: decimation factor of mag spectrum for stochastic analysis - returns y: output sound, yh: harmonic component, yst: stochastic component - """ + Analysis/synthesis of a sound using the harmonic plus stochastic model, one frame at a time, no harmonic tracking + x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512), t: threshold in negative dB, + nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz; maxf0: maximim f0 frequency in Hz, + f0et: error threshold in the f0 detection (ex: 5); stocf: decimation factor of mag spectrum for stochastic analysis + returns y: output sound, yh: harmonic component, yst: stochastic component + """ hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor @@ -69,58 +77,68 @@ def hpsModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, stocf): w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2 * H) # overlapping window - sw[hNs - H:hNs + H] = ow + sw[hNs - H : hNs + H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual - sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] # synthesis window for harmonic component + sw[hNs - H : hNs + H] = ( + sw[hNs - H : hNs + H] / bh[hNs - H : hNs + H] + ) # synthesis window for harmonic component sws = H * hann(Ns) / 2 # synthesis window for stochastic hfreqp = [] f0t = 0 f0stable = 0 while pin < pend: # -----analysis----- - x1 = x[pin - hM1:pin + hM2] # select frame + x1 = x[pin - hM1 : pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert peak locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 - if ((f0stable == 0) & (f0t > 0)) \ - or ((f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0)): + if ((f0stable == 0) & (f0t > 0)) or ( + (f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0) + ): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 - hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics + hfreq, hmag, hphase = HM.harmonicDetection( + ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs + ) # find harmonics hfreqp = hfreq ri = pin - hNs - 1 # input sound pointer for residual analysis - xw2 = x[ri:ri + Ns] * wr # window the input sound + xw2 = x[ri : ri + Ns] * wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis # -----synthesis----- - Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines of harmonic component + Yh = UF.genSpecSines( + hfreq, hmag, hphase, Ns, fs + ) # generate spec sines of harmonic component Xr = X2 - Yh # get the residual complex spectrum mXr = 20 * np.log10(abs(Xr[:hNs])) # magnitude spectrum of residual - mXrenv = resample(np.maximum(-200, mXr), - mXr.size * stocf) # decimate the magnitude spectrum and avoid -Inf + mXrenv = resample( + np.maximum(-200, mXr), mXr.size * stocf + ) # decimate the magnitude spectrum and avoid -Inf stocEnv = resample(mXrenv, hNs) # interpolate to original size pYst = 2 * np.pi * np.random.rand(hNs) # generate phase random values Yst = np.zeros(Ns, dtype=complex) Yst[:hNs] = 10 ** (stocEnv / 20) * np.exp(1j * pYst) # generate positive freq. - Yst[hNs + 1:] = 10 ** (stocEnv[:0:-1] / 20) * np.exp(-1j * pYst[:0:-1]) # generate negative freq. + Yst[hNs + 1 :] = 10 ** (stocEnv[:0:-1] / 20) * np.exp( + -1j * pYst[:0:-1] + ) # generate negative freq. fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum - yhw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window - yhw[hNs - 1:] = fftbuffer[:hNs + 1] + yhw[: hNs - 1] = fftbuffer[hNs + 1 :] # undo zero-phase window + yhw[hNs - 1 :] = fftbuffer[: hNs + 1] fftbuffer = np.real(ifft(Yst)) # inverse FFT of stochastic spectrum - ystw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window - ystw[hNs - 1:] = fftbuffer[:hNs + 1] + ystw[: hNs - 1] = fftbuffer[hNs + 1 :] # undo zero-phase window + ystw[hNs - 1 :] = fftbuffer[: hNs + 1] - yh[ri:ri + Ns] += sw * yhw # overlap-add for sines - yst[ri:ri + Ns] += sws * ystw # overlap-add for stochastic + yh[ri : ri + Ns] += sw * yhw # overlap-add for sines + yst[ri : ri + Ns] += sws * ystw # overlap-add for stochastic pin += H # advance sound pointer y = yh + yst # sum of harmonic and stochastic components diff --git a/smstools/models/sineModel.py b/smstools/models/sineModel.py index 0028f7a7..1ad95411 100644 --- a/smstools/models/sineModel.py +++ b/smstools/models/sineModel.py @@ -1,29 +1,33 @@ # functions that implement analysis and synthesis of sounds using the Sinusoidal Model # (for example usage check the examples in interface) +import math + import numpy as np +from scipy.fft import fftshift, ifft from scipy.signal.windows import blackmanharris, triang -from scipy.fft import ifft, fftshift -import math + from smstools.models import dftModel as DFT from smstools.models import utilFunctions as UF def sineTracking(pfreq, pmag, pphase, tfreq, freqDevOffset=20, freqDevSlope=0.01): """ - Tracking sinusoids from one frame to the next - pfreq, pmag, pphase: frequencies and magnitude of current frame - tfreq: frequencies of incoming tracks from previous frame - freqDevOffset: maximum frequency deviation at 0Hz - freqDevSlope: slope increase of maximum frequency deviation - returns tfreqn, tmagn, tphasen: frequency, magnitude and phase of tracks - """ + Tracking sinusoids from one frame to the next + pfreq, pmag, pphase: frequencies and magnitude of current frame + tfreq: frequencies of incoming tracks from previous frame + freqDevOffset: maximum frequency deviation at 0Hz + freqDevSlope: slope increase of maximum frequency deviation + returns tfreqn, tmagn, tphasen: frequency, magnitude and phase of tracks + """ tfreqn = np.zeros(tfreq.size) # initialize array for output frequencies tmagn = np.zeros(tfreq.size) # initialize array for output magnitudes tphasen = np.zeros(tfreq.size) # initialize array for output phases pindexes = np.array(np.nonzero(pfreq), dtype=int)[0] # indexes of current peaks - incomingTracks = np.array(np.nonzero(tfreq), dtype=int)[0] # indexes of incoming tracks + incomingTracks = np.array(np.nonzero(tfreq), dtype=int)[ + 0 + ] # indexes of incoming tracks newTracks = np.zeros(tfreq.size, dtype=int) - 1 # initialize to -1 new tracks magOrder = np.argsort(-pmag[pindexes]) # order current peaks by magnitude pfreqt = np.copy(pfreq) # copy current peaks to temporary array @@ -35,12 +39,22 @@ def sineTracking(pfreq, pmag, pphase, tfreq, freqDevOffset=20, freqDevSlope=0.01 for i in magOrder: # iterate over current peaks if incomingTracks.size == 0: # break when no more incoming tracks break - track = np.argmin(abs(pfreqt[i] - tfreq[incomingTracks])) # closest incoming track to peak - freqDistance = abs(pfreq[i] - tfreq[incomingTracks[track]]) # measure freq distance - if freqDistance < (freqDevOffset + freqDevSlope * pfreq[i]): # choose track if distance is small + track = np.argmin( + abs(pfreqt[i] - tfreq[incomingTracks]) + ) # closest incoming track to peak + freqDistance = abs( + pfreq[i] - tfreq[incomingTracks[track]] + ) # measure freq distance + if freqDistance < ( + freqDevOffset + freqDevSlope * pfreq[i] + ): # choose track if distance is small newTracks[incomingTracks[track]] = i # assign peak index to track index - incomingTracks = np.delete(incomingTracks, track) # delete index of track in incomming tracks - indext = np.array(np.nonzero(newTracks != -1), dtype=int)[0] # indexes of assigned tracks + incomingTracks = np.delete( + incomingTracks, track + ) # delete index of track in incomming tracks + indext = np.array(np.nonzero(newTracks != -1), dtype=int)[ + 0 + ] # indexes of assigned tracks if indext.size > 0: indexp = newTracks[indext] # indexes of assigned peaks tfreqn[indext] = pfreqt[indexp] # output freq tracks @@ -51,29 +65,33 @@ def sineTracking(pfreq, pmag, pphase, tfreq, freqDevOffset=20, freqDevSlope=0.01 pphaset = np.delete(pphaset, indexp) # delete used peaks # create new tracks from non used peaks - emptyt = np.array(np.nonzero(tfreq == 0), dtype=int)[0] # indexes of empty incoming tracks + emptyt = np.array(np.nonzero(tfreq == 0), dtype=int)[ + 0 + ] # indexes of empty incoming tracks peaksleft = np.argsort(-pmagt) # sort left peaks by magnitude - if ((peaksleft.size > 0) & (emptyt.size >= peaksleft.size)): # fill empty tracks - tfreqn[emptyt[:peaksleft.size]] = pfreqt[peaksleft] - tmagn[emptyt[:peaksleft.size]] = pmagt[peaksleft] - tphasen[emptyt[:peaksleft.size]] = pphaset[peaksleft] - elif ((peaksleft.size > 0) & (emptyt.size < peaksleft.size)): # add more tracks if necessary - tfreqn[emptyt] = pfreqt[peaksleft[:emptyt.size]] - tmagn[emptyt] = pmagt[peaksleft[:emptyt.size]] - tphasen[emptyt] = pphaset[peaksleft[:emptyt.size]] - tfreqn = np.append(tfreqn, pfreqt[peaksleft[emptyt.size:]]) - tmagn = np.append(tmagn, pmagt[peaksleft[emptyt.size:]]) - tphasen = np.append(tphasen, pphaset[peaksleft[emptyt.size:]]) + if (peaksleft.size > 0) & (emptyt.size >= peaksleft.size): # fill empty tracks + tfreqn[emptyt[: peaksleft.size]] = pfreqt[peaksleft] + tmagn[emptyt[: peaksleft.size]] = pmagt[peaksleft] + tphasen[emptyt[: peaksleft.size]] = pphaset[peaksleft] + elif (peaksleft.size > 0) & ( + emptyt.size < peaksleft.size + ): # add more tracks if necessary + tfreqn[emptyt] = pfreqt[peaksleft[: emptyt.size]] + tmagn[emptyt] = pmagt[peaksleft[: emptyt.size]] + tphasen[emptyt] = pphaset[peaksleft[: emptyt.size]] + tfreqn = np.append(tfreqn, pfreqt[peaksleft[emptyt.size :]]) + tmagn = np.append(tmagn, pmagt[peaksleft[emptyt.size :]]) + tphasen = np.append(tphasen, pphaset[peaksleft[emptyt.size :]]) return tfreqn, tmagn, tphasen def cleaningSineTracks(tfreq, minTrackLength=3): """ - Delete short fragments of a collection of sinusoidal tracks - tfreq: frequency of tracks - minTrackLength: minimum duration of tracks in number of frames - returns tfreqn: output frequency of tracks - """ + Delete short fragments of a collection of sinusoidal tracks + tfreq: frequency of tracks + minTrackLength: minimum duration of tracks in number of frames + returns tfreqn: output frequency of tracks + """ if tfreq.shape[1] == 0: # if no tracks return input return tfreq @@ -81,27 +99,37 @@ def cleaningSineTracks(tfreq, minTrackLength=3): nTracks = tfreq[0, :].size # number of tracks in a frame for t in range(nTracks): # iterate over all tracks trackFreqs = tfreq[:, t] # frequencies of one track - trackBegs = np.nonzero((trackFreqs[:nFrames - 1] <= 0) # begining of track contours - & (trackFreqs[1:] > 0))[0] + 1 + trackBegs = ( + np.nonzero( + (trackFreqs[: nFrames - 1] <= 0) # begining of track contours + & (trackFreqs[1:] > 0) + )[0] + + 1 + ) if trackFreqs[0] > 0: trackBegs = np.insert(trackBegs, 0, 0) - trackEnds = np.nonzero((trackFreqs[:nFrames - 1] > 0) # end of track contours - & (trackFreqs[1:] <= 0))[0] + 1 + trackEnds = ( + np.nonzero( + (trackFreqs[: nFrames - 1] > 0) # end of track contours + & (trackFreqs[1:] <= 0) + )[0] + + 1 + ) if trackFreqs[nFrames - 1] > 0: trackEnds = np.append(trackEnds, nFrames - 1) trackLengths = 1 + trackEnds - trackBegs # lengths of trach contours for i, j in zip(trackBegs, trackLengths): # delete short track contours if j <= minTrackLength: - trackFreqs[i:i + j] = 0 + trackFreqs[i : i + j] = 0 return tfreq def sineModel(x, fs, w, N, t): """ - Analysis/synthesis of a sound using the sinusoidal model, without sine tracking - x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB - returns y: output array sound - """ + Analysis/synthesis of a sound using the sinusoidal model, without sine tracking + x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB + returns y: output array sound + """ hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor @@ -115,64 +143,93 @@ def sineModel(x, fs, w, N, t): w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2 * H) # triangular window - sw[hNs - H:hNs + H] = ow # add triangular window + sw[hNs - H : hNs + H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window - sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] # normalized synthesis window + sw[hNs - H : hNs + H] = ( + sw[hNs - H : hNs + H] / bh[hNs - H : hNs + H] + ) # normalized synthesis window while pin < pend: # while input sound pointer is within sound # -----analysis----- - x1 = x[pin - hM1:pin + hM2] # select frame + x1 = x[pin - hM1 : pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks - iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation + iploc, ipmag, ipphase = UF.peakInterp( + mX, pX, ploc + ) # refine peak values by interpolation ipfreq = fs * iploc / float(N) # convert peak locations to Hertz # -----synthesis----- - Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum + Y = UF.genSpecSines( + ipfreq, ipmag, ipphase, Ns, fs + ) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT - yw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window - yw[hNs - 1:] = fftbuffer[:hNs + 1] - y[pin - hNs:pin + hNs] += sw * yw # overlap-add and apply a synthesis window + yw[: hNs - 1] = fftbuffer[hNs + 1 :] # undo zero-phase window + yw[hNs - 1 :] = fftbuffer[: hNs + 1] + y[pin - hNs : pin + hNs] += sw * yw # overlap-add and apply a synthesis window pin += H # advance sound pointer return y -def sineModelAnal(x, fs, w, N, H, t, maxnSines=100, minSineDur=.01, freqDevOffset=20, freqDevSlope=0.01): +def sineModelAnal( + x, + fs, + w, + N, + H, + t, + maxnSines=100, + minSineDur=0.01, + freqDevOffset=20, + freqDevSlope=0.01, +): + """ + Analysis of a sound using the sinusoidal model with sine tracking + x: input array sound, w: analysis window, N: size of complex spectrum, H: hop-size, t: threshold in negative dB + maxnSines: maximum number of sines per frame, minSineDur: minimum duration of sines in seconds + freqDevOffset: minimum frequency deviation at 0Hz, freqDevSlope: slope increase of minimum frequency deviation + returns xtfreq, xtmag, xtphase: frequencies, magnitudes and phases of sinusoidal tracks """ - Analysis of a sound using the sinusoidal model with sine tracking - x: input array sound, w: analysis window, N: size of complex spectrum, H: hop-size, t: threshold in negative dB - maxnSines: maximum number of sines per frame, minSineDur: minimum duration of sines in seconds - freqDevOffset: minimum frequency deviation at 0Hz, freqDevSlope: slope increase of minimum frequency deviation - returns xtfreq, xtmag, xtphase: frequencies, magnitudes and phases of sinusoidal tracks - """ - - if (minSineDur < 0): # raise error if minSineDur is smaller than 0 + + if minSineDur < 0: # raise error if minSineDur is smaller than 0 raise ValueError("Minimum duration of sine tracks smaller than 0") hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor - x = np.append(np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 + x = np.append( + np.zeros(hM2), x + ) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM2)) # add zeros at the end to analyze last sample pin = hM1 # initialize sound pointer in middle of analysis window pend = x.size - hM1 # last sample to start a frame w = w / sum(w) # normalize analysis window tfreq = np.array([]) while pin < pend: # while input sound pointer is within sound - x1 = x[pin - hM1:pin + hM2] # select frame + x1 = x[pin - hM1 : pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks - iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation + iploc, ipmag, ipphase = UF.peakInterp( + mX, pX, ploc + ) # refine peak values by interpolation ipfreq = fs * iploc / float(N) # convert peak locations to Hertz # perform sinusoidal tracking by adding peaks to trajectories - tfreq, tmag, tphase = sineTracking(ipfreq, ipmag, ipphase, tfreq, freqDevOffset, freqDevSlope) - tfreq = np.resize(tfreq, min(maxnSines, tfreq.size)) # limit number of tracks to maxnSines - tmag = np.resize(tmag, min(maxnSines, tmag.size)) # limit number of tracks to maxnSines - tphase = np.resize(tphase, min(maxnSines, tphase.size)) # limit number of tracks to maxnSines + tfreq, tmag, tphase = sineTracking( + ipfreq, ipmag, ipphase, tfreq, freqDevOffset, freqDevSlope + ) + tfreq = np.resize( + tfreq, min(maxnSines, tfreq.size) + ) # limit number of tracks to maxnSines + tmag = np.resize( + tmag, min(maxnSines, tmag.size) + ) # limit number of tracks to maxnSines + tphase = np.resize( + tphase, min(maxnSines, tphase.size) + ) # limit number of tracks to maxnSines jtfreq = np.zeros(maxnSines) # temporary output array jtmag = np.zeros(maxnSines) # temporary output array jtphase = np.zeros(maxnSines) # temporary output array - jtfreq[:tfreq.size] = tfreq # save track frequencies to temporary array - jtmag[:tmag.size] = tmag # save track magnitudes to temporary array - jtphase[:tphase.size] = tphase # save track magnitudes to temporary array + jtfreq[: tfreq.size] = tfreq # save track frequencies to temporary array + jtmag[: tmag.size] = tmag # save track magnitudes to temporary array + jtphase[: tphase.size] = tphase # save track magnitudes to temporary array if pin == hM1: # if first frame initialize output sine tracks xtfreq = jtfreq xtmag = jtmag @@ -189,11 +246,11 @@ def sineModelAnal(x, fs, w, N, H, t, maxnSines=100, minSineDur=.01, freqDevOffse def sineModelSynth(tfreq, tmag, tphase, N, H, fs): """ - Synthesis of a sound using the sinusoidal model - tfreq,tmag,tphase: frequencies, magnitudes and phases of sinusoids - N: synthesis FFT size, H: hop size, fs: sampling rate - returns y: output array sound - """ + Synthesis of a sound using the sinusoidal model + tfreq,tmag,tphase: frequencies, magnitudes and phases of sinusoids + N: synthesis FFT size, H: hop size, fs: sampling rate + returns y: output array sound + """ hN = N // 2 # half of FFT size for synthesis L = tfreq.shape[0] # number of frames @@ -202,23 +259,29 @@ def sineModelSynth(tfreq, tmag, tphase, N, H, fs): y = np.zeros(ysize) # initialize output array sw = np.zeros(N) # initialize synthesis window ow = triang(2 * H) # triangular window - sw[hN - H:hN + H] = ow # add triangular window + sw[hN - H : hN + H] = ow # add triangular window bh = blackmanharris(N) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window - sw[hN - H:hN + H] = sw[hN - H:hN + H] / bh[hN - H:hN + H] # normalized synthesis window + sw[hN - H : hN + H] = ( + sw[hN - H : hN + H] / bh[hN - H : hN + H] + ) # normalized synthesis window lastytfreq = tfreq[0, :] # initialize synthesis frequencies - ytphase = 2 * np.pi * np.random.rand(tfreq[0, :].size) # initialize synthesis phases + ytphase = ( + 2 * np.pi * np.random.rand(tfreq[0, :].size) + ) # initialize synthesis phases for l in range(L): # iterate over all frames - if (tphase.size > 0): # if no phases generate them + if tphase.size > 0: # if no phases generate them ytphase = tphase[l, :] else: ytphase += (np.pi * (lastytfreq + tfreq[l, :]) / fs) * H # propagate phases - # Y = UF.genSpecSines_p(tfreq[l,:], tmag[l,:], ytphase, N, fs) # generate sines in the spectrum (python version) - Y = UF.genSpecSines(tfreq[l, :], tmag[l, :], ytphase, N, fs) # generate sines in the spectrum + # Y = UF.genSpecSines_p(tfreq[l,:], tmag[l,:], ytphase, N, fs) # generate sines in the spectrum (python version) + Y = UF.genSpecSines( + tfreq[l, :], tmag[l, :], ytphase, N, fs + ) # generate sines in the spectrum lastytfreq = tfreq[l, :] # save frequency for phase propagation ytphase = ytphase % (2 * np.pi) # make phase inside 2*pi yw = np.real(fftshift(ifft(Y))) # compute inverse FFT - y[pout:pout + N] += sw * yw # overlap-add and apply a synthesis window + y[pout : pout + N] += sw * yw # overlap-add and apply a synthesis window pout += H # advance sound pointer y = np.delete(y, range(hN)) # delete half of first window y = np.delete(y, range(y.size - hN, y.size)) # delete half of the last window diff --git a/smstools/models/sprModel.py b/smstools/models/sprModel.py index 40450b08..fec54651 100644 --- a/smstools/models/sprModel.py +++ b/smstools/models/sprModel.py @@ -1,14 +1,17 @@ # functions that implement analysis and synthesis of sounds using the Sinusoidal plus Residual Model # (for example usage check the examples interface) +import math + import numpy as np -from scipy.signal.windows import blackmanharris, triang from scipy.fft import fft, ifft -import math +from scipy.signal.windows import blackmanharris, triang + from smstools.models import dftModel as DFT from smstools.models import sineModel as SM from smstools.models import utilFunctions as UF + def sprModelAnal(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope): """ Analysis of a sound using the sinusoidal plus residual model @@ -21,11 +24,16 @@ def sprModelAnal(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDe """ # perform sinusoidal analysis - tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) + tfreq, tmag, tphase = SM.sineModelAnal( + x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope + ) Ns = 512 - xr = UF.sineSubtraction(x, Ns, H, tfreq, tmag, tphase, fs) # subtract sinusoids from original sound + xr = UF.sineSubtraction( + x, Ns, H, tfreq, tmag, tphase, fs + ) # subtract sinusoids from original sound return tfreq, tmag, tphase, xr + def sprModelSynth(tfreq, tmag, tphase, xr, N, H, fs): """ Synthesis of a sound using the sinusoidal plus residual model @@ -34,10 +42,13 @@ def sprModelSynth(tfreq, tmag, tphase, xr, N, H, fs): returns y: output sound, y: sinusoidal component """ - ys = SM.sineModelSynth(tfreq, tmag, tphase, N, H, fs) # synthesize sinusoids - y = ys[:min(ys.size, xr.size)]+xr[:min(ys.size, xr.size)] # sum sinusoids and residual components + ys = SM.sineModelSynth(tfreq, tmag, tphase, N, H, fs) # synthesize sinusoids + y = ( + ys[: min(ys.size, xr.size)] + xr[: min(ys.size, xr.size)] + ) # sum sinusoids and residual components return y, ys + def sprModel(x, fs, w, N, t): """ Analysis/synthesis of a sound using the sinusoidal plus residual model, one frame at a time @@ -46,49 +57,53 @@ def sprModel(x, fs, w, N, t): returns y: output sound, ys: sinusoidal component, xr: residual component """ - hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding - hM2 = int(math.floor(w.size/2)) # half analysis window size by floor - Ns = 512 # FFT size for synthesis (even) - H = Ns//4 # Hop size used for analysis and synthesis - hNs = Ns//2 - pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window - pend = x.size - max(hNs, hM1) # last sample to start a frame - ysw = np.zeros(Ns) # initialize output sound frame - xrw = np.zeros(Ns) # initialize output sound frame - ys = np.zeros(x.size) # initialize output array - xr = np.zeros(x.size) # initialize output array - w = w / sum(w) # normalize analysis window + hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding + hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor + Ns = 512 # FFT size for synthesis (even) + H = Ns // 4 # Hop size used for analysis and synthesis + hNs = Ns // 2 + pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window + pend = x.size - max(hNs, hM1) # last sample to start a frame + ysw = np.zeros(Ns) # initialize output sound frame + xrw = np.zeros(Ns) # initialize output sound frame + ys = np.zeros(x.size) # initialize output array + xr = np.zeros(x.size) # initialize output array + w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) - ow = triang(2*H) # overlapping window - sw[hNs-H:hNs+H] = ow - bh = blackmanharris(Ns) # synthesis window - bh = bh / sum(bh) # normalize synthesis window - wr = bh # window for residual - sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] - while pin 1): # raise exception if decimation factor too big + if stocf > 1: # raise exception if decimation factor too big raise ValueError("Stochastic decimation factor above 1") - if (H <= 0): # raise error if hop size 0 or negative + if H <= 0: # raise error if hop size 0 or negative raise ValueError("Hop size (H) smaller or equal to 0") if not (UF.isPower2(N)): # raise error if N not a power of two raise ValueError("FFT size (N) is not a power of 2") w = hann(N) # analysis window - x = np.append(np.zeros(No2), x) # add zeros at beginning to center first window at sample 0 + x = np.append( + np.zeros(No2), x + ) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(No2)) # add zeros at the end to analyze last sample pin = No2 # initialize sound pointer in middle of analysis window pend = x.size - No2 # last sample to start a frame if melScale == 1: - binFreqsMel = hertz_to_mel(np.arange(hN)*fs/float(N)) + binFreqsMel = hertz_to_mel(np.arange(hN) * fs / float(N)) uniformMelFreq = np.linspace(binFreqsMel[0], binFreqsMel[-1], hN) while pin <= pend: - xw = x[pin - No2:pin + No2] * w # window the input sound + xw = x[pin - No2 : pin + No2] * w # window the input sound X = fft(xw) # compute FFT mX = 20 * np.log10(abs(X[:hN])) # magnitude spectrum of positive frequencies if melScale == 1: spl = splrep(binFreqsMel, np.maximum(-200, mX)) - mY = resample(splev(uniformMelFreq, spl), int(stocf * hN)) # decimate the mag spectrum + mY = resample( + splev(uniformMelFreq, spl), int(stocf * hN) + ) # decimate the mag spectrum else: - mY = resample(np.maximum(-200, mX), int(stocf * hN)) # decimate the mag spectrum + mY = resample( + np.maximum(-200, mX), int(stocf * hN) + ) # decimate the mag spectrum if pin == No2: # first frame stocEnv = np.array([mY]) else: # rest of frames @@ -71,12 +81,12 @@ def stochasticModelAnal(x, H, N, stocf, fs=44100, melScale=1): def stochasticModelSynth(stocEnv, H, N, fs=44100, melScale=1): """ - Stochastic synthesis of a sound - stocEnv: stochastic envelope; H: hop size; N: fft size - fs: sampling rate - melScale: choose between linear scale, 0, or mel scale, 1 (should match the analysis) - returns y: output sound - """ + Stochastic synthesis of a sound + stocEnv: stochastic envelope; H: hop size; N: fft size + fs: sampling rate + melScale: choose between linear scale, 0, or mel scale, 1 (should match the analysis) + returns y: output sound + """ if not (UF.isPower2(N)): # raise error if N not a power of two raise ValueError("N is not a power of two") @@ -89,7 +99,7 @@ def stochasticModelSynth(stocEnv, H, N, fs=44100, melScale=1): ws = 2 * hann(N) # synthesis window pout = 0 # output sound pointer if melScale == 1: - binFreqsMel = hertz_to_mel(np.arange(hN)*fs/float(N)) + binFreqsMel = hertz_to_mel(np.arange(hN) * fs / float(N)) uniformMelFreq = np.linspace(binFreqsMel[0], binFreqsMel[-1], hN) for l in range(L): mY = resample(stocEnv[l, :], hN) # interpolate to original size @@ -99,9 +109,11 @@ def stochasticModelSynth(stocEnv, H, N, fs=44100, melScale=1): pY = 2 * np.pi * np.random.rand(hN) # generate phase random values Y = np.zeros(N, dtype=complex) # initialize synthesis spectrum Y[:hN] = 10 ** (mY / 20) * np.exp(1j * pY) # generate positive freq. - Y[hN:] = 10 ** (mY[-2:0:-1] / 20) * np.exp(-1j * pY[-2:0:-1]) # generate negative freq. + Y[hN:] = 10 ** (mY[-2:0:-1] / 20) * np.exp( + -1j * pY[-2:0:-1] + ) # generate negative freq. fftbuffer = np.real(ifft(Y)) # inverse FFT - y[pout:pout + N] += ws * fftbuffer # overlap-add + y[pout : pout + N] += ws * fftbuffer # overlap-add pout += H y = np.delete(y, range(No2)) # delete half of first window y = np.delete(y, range(y.size - No2, y.size)) # delete half of the last window @@ -110,46 +122,52 @@ def stochasticModelSynth(stocEnv, H, N, fs=44100, melScale=1): def stochasticModel(x, H, N, stocf, fs=44100, melScale=1): """ - Stochastic analysis/synthesis of a sound, one frame at a time - x: input array sound, H: hop size, N: fft size - stocf: decimation factor of mag spectrum for stochastic analysis, bigger than 0, maximum of 1 - fs: sampling rate - melScale: choose between linear scale, 0, or mel scale, 1 (should match the analysis) - returns y: output sound - """ + Stochastic analysis/synthesis of a sound, one frame at a time + x: input array sound, H: hop size, N: fft size + stocf: decimation factor of mag spectrum for stochastic analysis, bigger than 0, maximum of 1 + fs: sampling rate + melScale: choose between linear scale, 0, or mel scale, 1 (should match the analysis) + returns y: output sound + """ hN = N // 2 + 1 # positive size of fft No2 = N // 2 # half of N - if (hN * stocf < 3): # raise exception if decimation factor too small + if hN * stocf < 3: # raise exception if decimation factor too small raise ValueError("Stochastic decimation factor too small") - if (stocf > 1): # raise exception if decimation factor too big + if stocf > 1: # raise exception if decimation factor too big raise ValueError("Stochastic decimation factor above 1") - if (H <= 0): # raise error if hop size 0 or negative + if H <= 0: # raise error if hop size 0 or negative raise ValueError("Hop size (H) smaller or equal to 0") if not (UF.isPower2(N)): # raise error if N not a power of twou raise ValueError("FFT size (N) is not a power of 2") w = hann(N) # analysis/synthesis window - x = np.append(np.zeros(No2), x) # add zeros at beginning to center first window at sample 0 + x = np.append( + np.zeros(No2), x + ) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(No2)) # add zeros at the end to analyze last sample pin = No2 # initialize sound pointer in middle of analysis window pend = x.size - No2 # last sample to start a frame y = np.zeros(x.size) # initialize output array if melScale == 1: - binFreqsMel = hertz_to_mel(np.arange(hN)*fs/float(N)) + binFreqsMel = hertz_to_mel(np.arange(hN) * fs / float(N)) uniformMelFreq = np.linspace(binFreqsMel[0], binFreqsMel[-1], hN) while pin <= pend: # -----analysis----- - xw = x[pin - No2:pin + No2] * w # window the input sound + xw = x[pin - No2 : pin + No2] * w # window the input sound X = fft(xw) # compute FFT mX = 20 * np.log10(abs(X[:hN])) # magnitude spectrum of positive frequencies if melScale == 1: spl = splrep(binFreqsMel, np.maximum(-200, mX)) - stocEnv = resample(splev(uniformMelFreq, spl), int(stocf * hN)) # decimate the mag spectrum + stocEnv = resample( + splev(uniformMelFreq, spl), int(stocf * hN) + ) # decimate the mag spectrum else: - stocEnv = resample(np.maximum(-200, mX), int(stocf * hN)) # decimate the mag spectrum + stocEnv = resample( + np.maximum(-200, mX), int(stocf * hN) + ) # decimate the mag spectrum # -----synthesis----- mY = resample(stocEnv, hN) # interpolate to original size if melScale == 1: @@ -158,10 +176,14 @@ def stochasticModel(x, H, N, stocf, fs=44100, melScale=1): pY = 2 * np.pi * np.random.rand(hN) # generate phase random values Y = np.zeros(N, dtype=complex) Y[:hN] = 10 ** (mY / 20) * np.exp(1j * pY) # generate positive freq. - Y[hN:] = 10 ** (mY[-2:0:-1] / 20) * np.exp(-1j * pY[-2:0:-1]) # generate negative freq. + Y[hN:] = 10 ** (mY[-2:0:-1] / 20) * np.exp( + -1j * pY[-2:0:-1] + ) # generate negative freq. fftbuffer = np.real(ifft(Y)) # inverse FFT - y[pin - No2:pin + No2] += w * fftbuffer # overlap-add + y[pin - No2 : pin + No2] += w * fftbuffer # overlap-add pin += H # advance sound pointer y = np.delete(y, range(No2)) # delete half of first window which was added - y = np.delete(y, range(y.size - No2, y.size)) # delete half of last window which was added + y = np.delete( + y, range(y.size - No2, y.size) + ) # delete half of last window which was added return y diff --git a/smstools/models/utilFunctions.py b/smstools/models/utilFunctions.py index de7e3d51..c7e7aa7e 100644 --- a/smstools/models/utilFunctions.py +++ b/smstools/models/utilFunctions.py @@ -4,8 +4,8 @@ import sys import numpy as np -from scipy.fft import fft, ifft, fftshift -from scipy.io.wavfile import write, read +from scipy.fft import fft, fftshift, ifft +from scipy.io.wavfile import read, write from scipy.signal import resample from scipy.signal.windows import blackmanharris, triang @@ -13,13 +13,17 @@ from smstools.models.utilFunctions_C import utilFunctions_C as UF_C except ImportError: print("\n") - print("-------------------------------------------------------------------------------") + print( + "-------------------------------------------------------------------------------" + ) print("Warning:") print("Cython modules for some of the core functions were not imported.") print("Please refer to the README.md file in the 'sms-tools' directory,") print("for the instructions to compile the cython modules.") print("Exiting the code!!") - print("-------------------------------------------------------------------------------") + print( + "-------------------------------------------------------------------------------" + ) print("\n") sys.exit(0) @@ -35,33 +39,39 @@ def isPower2(num): """ - Check if num is power of two - """ + Check if num is power of two + """ return ((num & (num - 1)) == 0) and num > 0 -INT16_FAC = (2 ** 15) -INT32_FAC = (2 ** 31) -INT64_FAC = (2 ** 63) -norm_fact = {'int16': INT16_FAC, 'int32': INT32_FAC, 'int64': INT64_FAC, 'float32': 1.0, 'float64': 1.0} +INT16_FAC = 2**15 +INT32_FAC = 2**31 +INT64_FAC = 2**63 +norm_fact = { + "int16": INT16_FAC, + "int32": INT32_FAC, + "int64": INT64_FAC, + "float32": 1.0, + "float64": 1.0, +} def wavread(filename): """ - Read a sound file and convert it to a normalized floating point array - filename: name of file to read - returns fs: sampling rate of file, x: floating point array - """ + Read a sound file and convert it to a normalized floating point array + filename: name of file to read + returns fs: sampling rate of file, x: floating point array + """ - if (os.path.isfile(filename) == False): # raise error if wrong input file + if os.path.isfile(filename) == False: # raise error if wrong input file raise ValueError("Input file is wrong") fs, x = read(filename) - if (len(x.shape) != 1): # raise error if more than one channel + if len(x.shape) != 1: # raise error if more than one channel raise ValueError("Audio file should be mono") - if (fs != 44100): # raise error if more than one channel + if fs != 44100: # raise error if more than one channel raise ValueError("Sampling rate of input sound should be 44100") # scale down and convert audio into floating point number in range of -1 to 1 @@ -71,11 +81,13 @@ def wavread(filename): def wavplay(filename): """ - Play a wav audio file from system using OS calls - filename: name of file to read - """ - if (os.path.isfile(filename) == False): # raise error if wrong input file - print("Input file does not exist. Make sure you computed the analysis/synthesis") + Play a wav audio file from system using OS calls + filename: name of file to read + """ + if os.path.isfile(filename) == False: # raise error if wrong input file + print( + "Input file does not exist. Make sure you computed the analysis/synthesis" + ) else: if sys.platform == "linux" or sys.platform == "linux2": # linux @@ -95,10 +107,10 @@ def wavplay(filename): def wavwrite(y, fs, filename): """ - Write a sound file from an array with the sound and the sampling rate - y: floating point array of one dimension, fs: sampling rate - filename: name of file to create - """ + Write a sound file from an array with the sound and the sampling rate + y: floating point array of one dimension, fs: sampling rate + filename: name of file to create + """ x = copy.deepcopy(y) # copy array x *= INT16_FAC # scaling floating point -1 to 1 range signal to int16 range @@ -108,14 +120,18 @@ def wavwrite(y, fs, filename): def peakDetection(mX, t): """ - Detect spectral peak locations - mX: magnitude spectrum, t: threshold - returns ploc: peak locations - """ + Detect spectral peak locations + mX: magnitude spectrum, t: threshold + returns ploc: peak locations + """ thresh = np.where(np.greater(mX[1:-1], t), mX[1:-1], 0) # locations above threshold - next_minor = np.where(mX[1:-1] > mX[2:], mX[1:-1], 0) # locations higher than the next one - prev_minor = np.where(mX[1:-1] > mX[:-2], mX[1:-1], 0) # locations higher than the previous one + next_minor = np.where( + mX[1:-1] > mX[2:], mX[1:-1], 0 + ) # locations higher than the next one + prev_minor = np.where( + mX[1:-1] > mX[:-2], mX[1:-1], 0 + ) # locations higher than the previous one ploc = thresh * next_minor * prev_minor # locations fulfilling the three criteria ploc = ploc.nonzero()[0] + 1 # add 1 to compensate for previous steps return ploc @@ -123,26 +139,28 @@ def peakDetection(mX, t): def peakInterp(mX, pX, ploc): """ - Interpolate peak values using parabolic interpolation - mX, pX: magnitude and phase spectrum, ploc: locations of peaks - returns iploc, ipmag, ipphase: interpolated peak location, magnitude and phase values - """ + Interpolate peak values using parabolic interpolation + mX, pX: magnitude and phase spectrum, ploc: locations of peaks + returns iploc, ipmag, ipphase: interpolated peak location, magnitude and phase values + """ val = mX[ploc] # magnitude of peak bin lval = mX[ploc - 1] # magnitude of bin at left rval = mX[ploc + 1] # magnitude of bin at right iploc = ploc + 0.5 * (lval - rval) / (lval - 2 * val + rval) # center of parabola ipmag = val - 0.25 * (lval - rval) * (iploc - ploc) # magnitude of peaks - ipphase = np.interp(iploc, np.arange(0, pX.size), pX) # phase of peaks by linear interpolation + ipphase = np.interp( + iploc, np.arange(0, pX.size), pX + ) # phase of peaks by linear interpolation return iploc, ipmag, ipphase def sinc(x, N): """ - Generate the main lobe of a sinc function (Dirichlet kernel) - x: array of indexes to compute; N: size of FFT to simulate - returns y: samples of the main lobe of a sinc function - """ + Generate the main lobe of a sinc function (Dirichlet kernel) + x: array of indexes to compute; N: size of FFT to simulate + returns y: samples of the main lobe of a sinc function + """ y = np.sin(N * x / 2) / np.sin(x / 2) # compute the sinc function y[np.isnan(y)] = N # avoid NaN if x == 0 @@ -151,10 +169,10 @@ def sinc(x, N): def genBhLobe(x): """ - Generate the main lobe of a Blackman-Harris window - x: bin positions to compute (real values) - returns y: main lobe os spectrum of a Blackman-Harris window - """ + Generate the main lobe of a Blackman-Harris window + x: bin positions to compute (real values) + returns y: main lobe os spectrum of a Blackman-Harris window + """ N = 512 # size of fft to use f = x * np.pi * 2 / N # frequency sampling @@ -162,18 +180,20 @@ def genBhLobe(x): y = np.zeros(x.size) # initialize window consts = [0.35875, 0.48829, 0.14128, 0.01168] # window constants for m in range(0, 4): # iterate over the four sincs to sum - y += consts[m] / 2 * (sinc(f - df * m, N) + sinc(f + df * m, N)) # sum of scaled sinc functions + y += ( + consts[m] / 2 * (sinc(f - df * m, N) + sinc(f + df * m, N)) + ) # sum of scaled sinc functions y = y / N / consts[0] # normalize return y def genSpecSines(ipfreq, ipmag, ipphase, N, fs): """ - Generate a spectrum from a series of sine values, calling a C function - ipfreq, ipmag, ipphase: sine peaks frequencies, magnitudes and phases - N: size of the complex spectrum to generate; fs: sampling frequency - returns Y: generated complex spectrum of sines - """ + Generate a spectrum from a series of sine values, calling a C function + ipfreq, ipmag, ipphase: sine peaks frequencies, magnitudes and phases + N: size of the complex spectrum to generate; fs: sampling frequency + returns Y: generated complex spectrum of sines + """ Y = UF_C.genSpecSines(N * ipfreq / float(fs), ipmag, ipphase, N) return Y @@ -181,41 +201,50 @@ def genSpecSines(ipfreq, ipmag, ipphase, N, fs): def genSpecSines_p(ipfreq, ipmag, ipphase, N, fs): """ - Generate a spectrum from a series of sine values - iploc, ipmag, ipphase: sine peaks locations, magnitudes and phases - N: size of the complex spectrum to generate; fs: sampling rate - returns Y: generated complex spectrum of sines - """ + Generate a spectrum from a series of sine values + iploc, ipmag, ipphase: sine peaks locations, magnitudes and phases + N: size of the complex spectrum to generate; fs: sampling rate + returns Y: generated complex spectrum of sines + """ Y = np.zeros(N, dtype=complex) # initialize output complex spectrum hN = N // 2 # size of positive freq. spectrum for i in range(0, ipfreq.size): # generate all sine spectral lobes loc = N * ipfreq[i] / fs # it should be in range ]0,hN-1[ - if loc == 0 or loc > hN - 1: continue + if loc == 0 or loc > hN - 1: + continue binremainder = round(loc) - loc - lb = np.arange(binremainder - 4, binremainder + 5) # main lobe (real value) bins to read - lmag = genBhLobe(lb) * 10 ** (ipmag[i] / 20) # lobe magnitudes of the complex exponential - b = np.arange(round(loc) - 4, round(loc) + 5, dtype='int') + lb = np.arange( + binremainder - 4, binremainder + 5 + ) # main lobe (real value) bins to read + lmag = genBhLobe(lb) * 10 ** ( + ipmag[i] / 20 + ) # lobe magnitudes of the complex exponential + b = np.arange(round(loc) - 4, round(loc) + 5, dtype="int") for m in range(0, 9): if b[m] < 0: # peak lobe crosses DC bin Y[-b[m]] += lmag[m] * np.exp(-1j * ipphase[i]) elif b[m] > hN: # peak lobe croses Nyquist bin Y[b[m]] += lmag[m] * np.exp(-1j * ipphase[i]) elif b[m] == 0 or b[m] == hN: # peak lobe in the limits of the spectrum - Y[b[m]] += lmag[m] * np.exp(1j * ipphase[i]) + lmag[m] * np.exp(-1j * ipphase[i]) + Y[b[m]] += lmag[m] * np.exp(1j * ipphase[i]) + lmag[m] * np.exp( + -1j * ipphase[i] + ) else: # peak lobe in positive freq. range Y[b[m]] += lmag[m] * np.exp(1j * ipphase[i]) - Y[hN + 1:] = Y[hN - 1:0:-1].conjugate() # fill the negative part of the spectrum + Y[hN + 1 :] = Y[ + hN - 1 : 0 : -1 + ].conjugate() # fill the negative part of the spectrum return Y def sinewaveSynth(freqs, amp, H, fs): """ - Synthesis of one sinusoid with time-varying frequency - freqs, amps: array of frequencies and amplitudes of sinusoids - H: hop size, fs: sampling rate - returns y: output array sound - """ + Synthesis of one sinusoid with time-varying frequency + freqs, amps: array of frequencies and amplitudes of sinusoids + H: hop size, fs: sampling rate + returns y: output array sound + """ t = np.arange(H) / float(fs) # time array lastphase = 0 # initialize synthesis phase @@ -230,7 +259,7 @@ def sinewaveSynth(freqs, amp, H, fs): freq = np.ones(H) * freqs[l] elif (lastfreq > 0) & (freqs[l] > 0): # if freqs in boundaries use both A = np.ones(H) * amp - if (lastfreq == freqs[l]): + if lastfreq == freqs[l]: freq = np.ones(H) * lastfreq else: freq = np.arange(lastfreq, freqs[l], (freqs[l] - lastfreq) / H) @@ -240,75 +269,93 @@ def sinewaveSynth(freqs, amp, H, fs): phase = 2 * np.pi * freq * t + lastphase # generate phase values yh = A * np.cos(phase) # compute sine for one frame lastfreq = freqs[l] # save frequency for phase propagation - lastphase = np.remainder(phase[H - 1], 2 * np.pi) # save phase to be use for next frame + lastphase = np.remainder( + phase[H - 1], 2 * np.pi + ) # save phase to be use for next frame y = np.append(y, yh) # append frame to previous one return y def cleaningTrack(track, minTrackLength=3): """ - Delete fragments of one single track smaller than minTrackLength - track: array of values; minTrackLength: minimum duration of tracks in number of frames - returns cleanTrack: array of clean values - """ + Delete fragments of one single track smaller than minTrackLength + track: array of values; minTrackLength: minimum duration of tracks in number of frames + returns cleanTrack: array of clean values + """ nFrames = track.size # number of frames cleanTrack = np.copy(track) # copy array - trackBegs = np.nonzero((track[:nFrames - 1] <= 0) # beginning of track contours - & (track[1:] > 0))[0] + 1 + trackBegs = ( + np.nonzero( + (track[: nFrames - 1] <= 0) & (track[1:] > 0) # beginning of track contours + )[0] + + 1 + ) if track[0] > 0: trackBegs = np.insert(trackBegs, 0, 0) - trackEnds = np.nonzero((track[:nFrames - 1] > 0) & (track[1:] <= 0))[0] + 1 + trackEnds = np.nonzero((track[: nFrames - 1] > 0) & (track[1:] <= 0))[0] + 1 if track[nFrames - 1] > 0: trackEnds = np.append(trackEnds, nFrames - 1) trackLengths = 1 + trackEnds - trackBegs # lengths of trach contours for i, j in zip(trackBegs, trackLengths): # delete short track contours if j <= minTrackLength: - cleanTrack[i:i + j] = 0 + cleanTrack[i : i + j] = 0 return cleanTrack def f0Twm(pfreq, pmag, ef0max, minf0, maxf0, f0t=0): """ - Function that wraps the f0 detection function TWM, selecting the possible f0 candidates - and calling the function TWM with them - pfreq, pmag: peak frequencies and magnitudes, - ef0max: maximum error allowed, minf0, maxf0: minimum and maximum f0 - f0t: f0 of previous frame if stable - returns f0: fundamental frequency in Hz - """ - if (minf0 < 0): # raise exception if minf0 is smaller than 0 + Function that wraps the f0 detection function TWM, selecting the possible f0 candidates + and calling the function TWM with them + pfreq, pmag: peak frequencies and magnitudes, + ef0max: maximum error allowed, minf0, maxf0: minimum and maximum f0 + f0t: f0 of previous frame if stable + returns f0: fundamental frequency in Hz + """ + if minf0 < 0: # raise exception if minf0 is smaller than 0 raise ValueError("Minimum fundamental frequency (minf0) smaller than 0") - if (maxf0 >= 10000): # raise exception if maxf0 is bigger than 10000Hz + if maxf0 >= 10000: # raise exception if maxf0 is bigger than 10000Hz raise ValueError("Maximum fundamental frequency (maxf0) bigger than 10000Hz") - if (pfreq.size < 3) & (f0t == 0): # return 0 if less than 3 peaks and not previous f0 + if (pfreq.size < 3) & ( + f0t == 0 + ): # return 0 if less than 3 peaks and not previous f0 return 0 - f0c = np.argwhere((pfreq > minf0) & (pfreq < maxf0))[:, 0] # use only peaks within given range - if (f0c.size == 0): # return 0 if no peaks within range + f0c = np.argwhere((pfreq > minf0) & (pfreq < maxf0))[ + :, 0 + ] # use only peaks within given range + if f0c.size == 0: # return 0 if no peaks within range return 0 f0cf = pfreq[f0c] # frequencies of peak candidates f0cm = pmag[f0c] # magnitude of peak candidates if f0t > 0: # if stable f0 in previous frame - shortlist = np.argwhere(np.abs(f0cf - f0t) < f0t / 2.0)[:, 0] # use only peaks close to it + shortlist = np.argwhere(np.abs(f0cf - f0t) < f0t / 2.0)[ + :, 0 + ] # use only peaks close to it maxc = np.argmax(f0cm) maxcfd = f0cf[maxc] % f0t if maxcfd > f0t / 2: maxcfd = f0t - maxcfd - if (maxc not in shortlist) and (maxcfd > (f0t / 4)): # or the maximum magnitude peak is not a harmonic + if (maxc not in shortlist) and ( + maxcfd > (f0t / 4) + ): # or the maximum magnitude peak is not a harmonic shortlist = np.append(maxc, shortlist) f0cf = f0cf[shortlist] # frequencies of candidates - if (f0cf.size == 0): # return 0 if no peak candidates + if f0cf.size == 0: # return 0 if no peak candidates return 0 - f0, f0error = UF_C.twm(pfreq, pmag, f0cf) # call the TWM function with peak candidates, cython version - # f0, f0error = TWM_p(pfreq, pmag, f0cf) # call the TWM function with peak candidates, python version + f0, f0error = UF_C.twm( + pfreq, pmag, f0cf + ) # call the TWM function with peak candidates, cython version + # f0, f0error = TWM_p(pfreq, pmag, f0cf) # call the TWM function with peak candidates, python version - if (f0 > 0) and (f0error < ef0max): # accept and return f0 if below max error allowed + if (f0 > 0) and ( + f0error < ef0max + ): # accept and return f0 if below max error allowed return f0 else: return 0 @@ -316,12 +363,12 @@ def f0Twm(pfreq, pmag, ef0max, minf0, maxf0, f0t=0): def TWM_p(pfreq, pmag, f0c): """ - Two-way mismatch algorithm for f0 detection (by Beauchamp&Maher) - [better to use the C version of this function: UF_C.twm] - pfreq, pmag: peak frequencies in Hz and magnitudes, - f0c: frequencies of f0 candidates - returns f0, f0Error: fundamental frequency detected and its error - """ + Two-way mismatch algorithm for f0 detection (by Beauchamp&Maher) + [better to use the C version of this function: UF_C.twm] + pfreq, pmag: peak frequencies in Hz and magnitudes, + f0c: frequencies of f0 candidates + returns f0, f0Error: fundamental frequency detected and its error + """ p = 0.5 # weighting by frequency value q = 1.4 # weighting related to magnitude of peaks @@ -363,60 +410,74 @@ def TWM_p(pfreq, pmag, f0c): def sineSubtraction(x, N, H, sfreq, smag, sphase, fs): """ - Subtract sinusoids from a sound - x: input sound, N: fft-size, H: hop-size - sfreq, smag, sphase: sinusoidal frequencies, magnitudes and phases - returns xr: residual sound - """ + Subtract sinusoids from a sound + x: input sound, N: fft-size, H: hop-size + sfreq, smag, sphase: sinusoidal frequencies, magnitudes and phases + returns xr: residual sound + """ hN = N // 2 # half of fft size - x = np.append(np.zeros(hN), x) # add zeros at beginning to center first window at sample 0 + x = np.append( + np.zeros(hN), x + ) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hN)) # add zeros at the end to analyze last sample bh = blackmanharris(N) # blackman harris window w = bh / sum(bh) # normalize window sw = np.zeros(N) # initialize synthesis window - sw[hN - H:hN + H] = triang(2 * H) / w[hN - H:hN + H] # synthesis window + sw[hN - H : hN + H] = triang(2 * H) / w[hN - H : hN + H] # synthesis window L = sfreq.shape[0] # number of frames, this works if no sines xr = np.zeros(x.size) # initialize output array pin = 0 for l in range(L): - xw = x[pin:pin + N] * w # window the input sound + xw = x[pin : pin + N] * w # window the input sound X = fft(fftshift(xw)) # compute FFT - Yh = UF_C.genSpecSines(N * sfreq[l, :] / fs, smag[l, :], sphase[l, :], N) # generate spec sines, cython version - # Yh = genSpecSines_p(N*sfreq[l,:]/fs, smag[l,:], sphase[l,:], N, fs) # generate spec sines, python version + Yh = UF_C.genSpecSines( + N * sfreq[l, :] / fs, smag[l, :], sphase[l, :], N + ) # generate spec sines, cython version + # Yh = genSpecSines_p(N*sfreq[l,:]/fs, smag[l,:], sphase[l,:], N, fs) # generate spec sines, python version Xr = X - Yh # subtract sines from original spectrum xrw = np.real(fftshift(ifft(Xr))) # inverse FFT - xr[pin:pin + N] += xrw * sw # overlap-add + xr[pin : pin + N] += xrw * sw # overlap-add pin += H # advance sound pointer - xr = np.delete(xr, range(hN)) # delete half of first window which was added in stftAnal - xr = np.delete(xr, range(xr.size - hN, xr.size)) # delete half of last window which was added in stftAnal + xr = np.delete( + xr, range(hN) + ) # delete half of first window which was added in stftAnal + xr = np.delete( + xr, range(xr.size - hN, xr.size) + ) # delete half of last window which was added in stftAnal return xr def stochasticResidualAnal(x, N, H, sfreq, smag, sphase, fs, stocf): """ - Subtract sinusoids from a sound and approximate the residual with an envelope - x: input sound, N: fft size, H: hop-size - sfreq, smag, sphase: sinusoidal frequencies, magnitudes and phases - fs: sampling rate; stocf: stochastic factor, used in the approximation - returns stocEnv: stochastic approximation of residual - """ + Subtract sinusoids from a sound and approximate the residual with an envelope + x: input sound, N: fft size, H: hop-size + sfreq, smag, sphase: sinusoidal frequencies, magnitudes and phases + fs: sampling rate; stocf: stochastic factor, used in the approximation + returns stocEnv: stochastic approximation of residual + """ hN = N // 2 # half of fft size - x = np.append(np.zeros(hN), x) # add zeros at beginning to center first window at sample 0 + x = np.append( + np.zeros(hN), x + ) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hN)) # add zeros at the end to analyze last sample bh = blackmanharris(N) # synthesis window w = bh / sum(bh) # normalize synthesis window L = sfreq.shape[0] # number of frames, this works if no sines pin = 0 for l in range(L): - xw = x[pin:pin + N] * w # window the input sound + xw = x[pin : pin + N] * w # window the input sound X = fft(fftshift(xw)) # compute FFT - Yh = UF_C.genSpecSines(N * sfreq[l, :] / fs, smag[l, :], sphase[l, :], N) # generate spec sines, cython version - # Yh = genSpecSines_p(N*sfreq[l,:]/fs, smag[l,:], sphase[l,:], N, fs) # generate spec sines, python version + Yh = UF_C.genSpecSines( + N * sfreq[l, :] / fs, smag[l, :], sphase[l, :], N + ) # generate spec sines, cython version + # Yh = genSpecSines_p(N*sfreq[l,:]/fs, smag[l,:], sphase[l,:], N, fs) # generate spec sines, python version Xr = X - Yh # subtract sines from original spectrum mXr = 20 * np.log10(abs(Xr[:hN])) # magnitude spectrum of residual - mXrenv = resample(np.maximum(-200, mXr), mXr.size * stocf) # decimate the mag spectrum + mXrenv = resample( + np.maximum(-200, mXr), mXr.size * stocf + ) # decimate the mag spectrum if l == 0: # if first frame stocEnv = np.array([mXrenv]) else: # rest of frames diff --git a/smstools/models/utilFunctions_C/cutilFunctions.pyx b/smstools/models/utilFunctions_C/cutilFunctions.pyx index 85e3686e..06e3f5b8 100644 --- a/smstools/models/utilFunctions_C/cutilFunctions.pyx +++ b/smstools/models/utilFunctions_C/cutilFunctions.pyx @@ -1,9 +1,10 @@ #this is a cython wrapper on C functions to call them in python import numpy as np + cimport numpy as np -from libc.stdlib cimport * from cutilFunctions cimport * +from libc.stdlib cimport * np.import_array() diff --git a/smstools/transformations/harmonicTransformations.py b/smstools/transformations/harmonicTransformations.py index 2872adcf..862bf4c1 100644 --- a/smstools/transformations/harmonicTransformations.py +++ b/smstools/transformations/harmonicTransformations.py @@ -4,27 +4,33 @@ from scipy.interpolate import interp1d -def harmonicFreqScaling(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs): +def harmonicFreqScaling( + hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs +): """ - Frequency scaling of the harmonics of a sound - hfreq, hmag: frequencies and magnitudes of input harmonics - freqScaling: scaling factors, in time-value pairs (value of 1 no scaling) - freqStretching: stretching factors, in time-value pairs (value of 1 no stretching) - timbrePreservation: 0 no timbre preservation, 1 timbre preservation - fs: sampling rate of input sound - returns yhfreq, yhmag: frequencies and magnitudes of output harmonics - """ - if (freqScaling.size % 2 != 0): # raise exception if array not even length + Frequency scaling of the harmonics of a sound + hfreq, hmag: frequencies and magnitudes of input harmonics + freqScaling: scaling factors, in time-value pairs (value of 1 no scaling) + freqStretching: stretching factors, in time-value pairs (value of 1 no stretching) + timbrePreservation: 0 no timbre preservation, 1 timbre preservation + fs: sampling rate of input sound + returns yhfreq, yhmag: frequencies and magnitudes of output harmonics + """ + if freqScaling.size % 2 != 0: # raise exception if array not even length raise ValueError("Frequency scaling array does not have an even size") - if (freqStretching.size % 2 != 0): # raise exception if array not even length + if freqStretching.size % 2 != 0: # raise exception if array not even length raise ValueError("Frequency stretching array does not have an even size") L = hfreq.shape[0] # number of frames # create interpolation object with the scaling values - freqScalingEnv = np.interp(np.arange(L), L * freqScaling[::2] / freqScaling[-2], freqScaling[1::2]) + freqScalingEnv = np.interp( + np.arange(L), L * freqScaling[::2] / freqScaling[-2], freqScaling[1::2] + ) # create interpolation object with the stretching values - freqStretchingEnv = np.interp(np.arange(L), L * freqStretching[::2] / freqStretching[-2], freqStretching[1::2]) + freqStretchingEnv = np.interp( + np.arange(L), L * freqStretching[::2] / freqStretching[-2], freqStretching[1::2] + ) yhfreq = np.zeros_like(hfreq) # create empty output matrix yhmag = np.zeros_like(hmag) # create empty output matrix for l in range(L): # go through all frames @@ -36,11 +42,19 @@ def harmonicFreqScaling(hfreq, hmag, freqScaling, freqStretching, timbrePreserva x_vals = np.append(np.append(0, hfreq[l, ind_valid]), fs / 2) # values of harmonic magnitudes to be considered for interpolation y_vals = np.append(np.append(hmag[l, 0], hmag[l, ind_valid]), hmag[l, -1]) - specEnvelope = interp1d(x_vals, y_vals, kind='linear', bounds_error=False, fill_value=-100) - yhfreq[l, ind_valid] = hfreq[l, ind_valid] * freqScalingEnv[l] # scale frequencies - yhfreq[l, ind_valid] = yhfreq[l, ind_valid] * (freqStretchingEnv[l] ** ind_valid) # stretch frequencies + specEnvelope = interp1d( + x_vals, y_vals, kind="linear", bounds_error=False, fill_value=-100 + ) + yhfreq[l, ind_valid] = ( + hfreq[l, ind_valid] * freqScalingEnv[l] + ) # scale frequencies + yhfreq[l, ind_valid] = yhfreq[l, ind_valid] * ( + freqStretchingEnv[l] ** ind_valid + ) # stretch frequencies if (timbrePreservation == 1) & (ind_valid.size > 1): # if timbre preservation - yhmag[l, ind_valid] = specEnvelope(yhfreq[l, ind_valid]) # change amplitudes to maintain timbre + yhmag[l, ind_valid] = specEnvelope( + yhfreq[l, ind_valid] + ) # change amplitudes to maintain timbre else: yhmag[l, ind_valid] = hmag[l, ind_valid] # use same amplitudes as input return yhfreq, yhmag diff --git a/smstools/transformations/hpsTransformations.py b/smstools/transformations/hpsTransformations.py index e3bfa98c..a1009736 100644 --- a/smstools/transformations/hpsTransformations.py +++ b/smstools/transformations/hpsTransformations.py @@ -3,82 +3,104 @@ import numpy as np from scipy.interpolate import interp1d + def hpsTimeScale(hfreq, hmag, stocEnv, timeScaling): - """ - Time scaling of the harmonic plus stochastic representation - hfreq, hmag: harmonic frequencies and magnitudes, stocEnv: residual envelope - timeScaling: scaling factors, in time-value pairs - returns yhfreq, yhmag, ystocEnv: hps output representation - """ + """ + Time scaling of the harmonic plus stochastic representation + hfreq, hmag: harmonic frequencies and magnitudes, stocEnv: residual envelope + timeScaling: scaling factors, in time-value pairs + returns yhfreq, yhmag, ystocEnv: hps output representation + """ + + if timeScaling.size % 2 != 0: # raise exception if array not even length + raise ValueError("Time scaling array does not have an even size") + + L = hfreq[:, 0].size # number of input frames + maxInTime = max(timeScaling[::2]) # maximum value used as input times + maxOutTime = max(timeScaling[1::2]) # maximum value used in output times + outL = int(L * maxOutTime / maxInTime) # number of output frames + inFrames = (L - 1) * timeScaling[::2] / maxInTime # input time values in frames + outFrames = outL * timeScaling[1::2] / maxOutTime # output time values in frames + timeScalingEnv = interp1d( + outFrames, inFrames, fill_value=0 + ) # interpolation function + indexes = timeScalingEnv(np.arange(outL)) # generate frame indexes for the output + yhfreq = np.zeros((indexes.shape[0], hfreq.shape[1])) # allocate space for yhfreq + yhmag = np.zeros((indexes.shape[0], hmag.shape[1])) # allocate space for yhmag + ystocEnv = np.zeros( + (indexes.shape[0], stocEnv.shape[1]) + ) # allocate space for ystocEnv + frameIdx = 0 + for l in indexes[1:]: # iterate over all output frame indexes + yhfreq[frameIdx, :] = hfreq[int(round(l)), :] # get the closest input frame + yhmag[frameIdx, :] = hmag[int(round(l)), :] # get the closest input frame + ystocEnv[frameIdx, :] = stocEnv[int(round(l)), :] # get the closest input frame + frameIdx += 1 + return yhfreq, yhmag, ystocEnv + + +def hpsMorph( + hfreq1, hmag1, stocEnv1, hfreq2, hmag2, stocEnv2, hfreqIntp, hmagIntp, stocIntp +): + """ + Morph between two sounds using the harmonic plus stochastic model + hfreq1, hmag1, stocEnv1: hps representation of sound 1 + hfreq2, hmag2, stocEnv2: hps representation of sound 2 + hfreqIntp: interpolation factor between the harmonic frequencies of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) + hmagIntp: interpolation factor between the harmonic magnitudes of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) + stocIntp: interpolation factor between the stochastic representation of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) + returns yhfreq, yhmag, ystocEnv: hps output representation + """ + + if hfreqIntp.size % 2 != 0: # raise exception if array not even length + raise ValueError( + "Harmonic frequencies interpolation array does not have an even size" + ) + + if hmagIntp.size % 2 != 0: # raise exception if array not even length + raise ValueError("Harmonic magnitudes interpolation does not have an even size") - if (timeScaling.size % 2 != 0): # raise exception if array not even length - raise ValueError("Time scaling array does not have an even size") - - L = hfreq[:,0].size # number of input frames - maxInTime = max(timeScaling[::2]) # maximum value used as input times - maxOutTime = max(timeScaling[1::2]) # maximum value used in output times - outL = int(L*maxOutTime/maxInTime) # number of output frames - inFrames = (L-1)*timeScaling[::2]/maxInTime # input time values in frames - outFrames = outL*timeScaling[1::2]/maxOutTime # output time values in frames - timeScalingEnv = interp1d(outFrames, inFrames, fill_value=0) # interpolation function - indexes = timeScalingEnv(np.arange(outL)) # generate frame indexes for the output - yhfreq = np.zeros((indexes.shape[0], hfreq.shape[1])) # allocate space for yhfreq - yhmag = np.zeros((indexes.shape[0], hmag.shape[1])) # allocate space for yhmag - ystocEnv = np.zeros((indexes.shape[0], stocEnv.shape[1]))# allocate space for ystocEnv - frameIdx = 0 - for l in indexes[1:]: # iterate over all output frame indexes - yhfreq[frameIdx,:] = hfreq[int(round(l)),:] # get the closest input frame - yhmag[frameIdx,:] = hmag[int(round(l)),:] # get the closest input frame - ystocEnv[frameIdx,:] = stocEnv[int(round(l)),:] # get the closest input frame - frameIdx += 1 - return yhfreq, yhmag, ystocEnv - - -def hpsMorph(hfreq1, hmag1, stocEnv1, hfreq2, hmag2, stocEnv2, hfreqIntp, hmagIntp, stocIntp): - """ - Morph between two sounds using the harmonic plus stochastic model - hfreq1, hmag1, stocEnv1: hps representation of sound 1 - hfreq2, hmag2, stocEnv2: hps representation of sound 2 - hfreqIntp: interpolation factor between the harmonic frequencies of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) - hmagIntp: interpolation factor between the harmonic magnitudes of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) - stocIntp: interpolation factor between the stochastic representation of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) - returns yhfreq, yhmag, ystocEnv: hps output representation - """ + if stocIntp.size % 2 != 0: # raise exception if array not even length + raise ValueError("Stochastic component array does not have an even size") - if (hfreqIntp.size % 2 != 0): # raise exception if array not even length - raise ValueError("Harmonic frequencies interpolation array does not have an even size") - - if (hmagIntp.size % 2 != 0): # raise exception if array not even length - raise ValueError("Harmonic magnitudes interpolation does not have an even size") - - if (stocIntp.size % 2 != 0): # raise exception if array not even length - raise ValueError("Stochastic component array does not have an even size") - - L1 = hfreq1[:,0].size # number of frames of sound 1 - L2 = hfreq2[:,0].size # number of frames of sound 2 - hfreqIntp[::2] = (L1-1)*hfreqIntp[::2]/hfreqIntp[-2] # normalize input values - hmagIntp[::2] = (L1-1)*hmagIntp[::2]/hmagIntp[-2] # normalize input values - stocIntp[::2] = (L1-1)*stocIntp[::2]/stocIntp[-2] # normalize input values - hfreqIntpEnv = interp1d(hfreqIntp[0::2], hfreqIntp[1::2], fill_value=0) # interpolation function - hfreqIndexes = hfreqIntpEnv(np.arange(L1)) # generate frame indexes for the output - hmagIntpEnv = interp1d(hmagIntp[0::2], hmagIntp[1::2], fill_value=0) # interpolation function - hmagIndexes = hmagIntpEnv(np.arange(L1)) # generate frame indexes for the output - stocIntpEnv = interp1d(stocIntp[0::2], stocIntp[1::2], fill_value=0) # interpolation function - stocIndexes = stocIntpEnv(np.arange(L1)) # generate frame indexes for the output - yhfreq = np.zeros_like(hfreq1) # create empty output matrix - yhmag = np.zeros_like(hmag1) # create empty output matrix - ystocEnv = np.zeros_like(stocEnv1) # create empty output matrix - - for l in range(L1): # generate morphed frames - dataIndex = int(round(((L2-1)*l)/float(L1-1))) - # identify harmonics that are present in both frames - harmonics = np.intersect1d(np.array(np.nonzero(hfreq1[l,:]), dtype=np.int)[0], np.array(np.nonzero(hfreq2[dataIndex,:]), dtype=np.int)[0]) - # interpolate the frequencies of the existing harmonics - yhfreq[l,harmonics] = (1-hfreqIndexes[l])* hfreq1[l,harmonics] + hfreqIndexes[l] * hfreq2[dataIndex,harmonics] - # interpolate the magnitudes of the existing harmonics - yhmag[l,harmonics] = (1-hmagIndexes[l])* hmag1[l,harmonics] + hmagIndexes[l] * hmag2[dataIndex,harmonics] - # interpolate the stochastic envelopes of both frames - ystocEnv[l,:] = (1-stocIndexes[l])* stocEnv1[l,:] + stocIndexes[l] * stocEnv2[dataIndex,:] - return yhfreq, yhmag, ystocEnv - + L1 = hfreq1[:, 0].size # number of frames of sound 1 + L2 = hfreq2[:, 0].size # number of frames of sound 2 + hfreqIntp[::2] = (L1 - 1) * hfreqIntp[::2] / hfreqIntp[-2] # normalize input values + hmagIntp[::2] = (L1 - 1) * hmagIntp[::2] / hmagIntp[-2] # normalize input values + stocIntp[::2] = (L1 - 1) * stocIntp[::2] / stocIntp[-2] # normalize input values + hfreqIntpEnv = interp1d( + hfreqIntp[0::2], hfreqIntp[1::2], fill_value=0 + ) # interpolation function + hfreqIndexes = hfreqIntpEnv(np.arange(L1)) # generate frame indexes for the output + hmagIntpEnv = interp1d( + hmagIntp[0::2], hmagIntp[1::2], fill_value=0 + ) # interpolation function + hmagIndexes = hmagIntpEnv(np.arange(L1)) # generate frame indexes for the output + stocIntpEnv = interp1d( + stocIntp[0::2], stocIntp[1::2], fill_value=0 + ) # interpolation function + stocIndexes = stocIntpEnv(np.arange(L1)) # generate frame indexes for the output + yhfreq = np.zeros_like(hfreq1) # create empty output matrix + yhmag = np.zeros_like(hmag1) # create empty output matrix + ystocEnv = np.zeros_like(stocEnv1) # create empty output matrix + for l in range(L1): # generate morphed frames + dataIndex = int(round(((L2 - 1) * l) / float(L1 - 1))) + # identify harmonics that are present in both frames + harmonics = np.intersect1d( + np.array(np.nonzero(hfreq1[l, :]), dtype=np.int)[0], + np.array(np.nonzero(hfreq2[dataIndex, :]), dtype=np.int)[0], + ) + # interpolate the frequencies of the existing harmonics + yhfreq[l, harmonics] = (1 - hfreqIndexes[l]) * hfreq1[ + l, harmonics + ] + hfreqIndexes[l] * hfreq2[dataIndex, harmonics] + # interpolate the magnitudes of the existing harmonics + yhmag[l, harmonics] = (1 - hmagIndexes[l]) * hmag1[l, harmonics] + hmagIndexes[ + l + ] * hmag2[dataIndex, harmonics] + # interpolate the stochastic envelopes of both frames + ystocEnv[l, :] = (1 - stocIndexes[l]) * stocEnv1[l, :] + stocIndexes[ + l + ] * stocEnv2[dataIndex, :] + return yhfreq, yhmag, ystocEnv diff --git a/smstools/transformations/sineTransformations.py b/smstools/transformations/sineTransformations.py index 71c2cbd8..2c1086a0 100644 --- a/smstools/transformations/sineTransformations.py +++ b/smstools/transformations/sineTransformations.py @@ -3,48 +3,60 @@ import numpy as np from scipy.interpolate import interp1d + def sineTimeScaling(sfreq, smag, timeScaling): - """ - Time scaling of sinusoidal tracks - sfreq, smag: frequencies and magnitudes of input sinusoidal tracks - timeScaling: scaling factors, in time-value pairs - returns ysfreq, ysmag: frequencies and magnitudes of output sinusoidal tracks - """ - if (timeScaling.size % 2 != 0): # raise exception if array not even length - raise ValueError("Time scaling array does not have an even size") - - L = sfreq.shape[0] # number of input frames - maxInTime = max(timeScaling[::2]) # maximum value used as input times - maxOutTime = max(timeScaling[1::2]) # maximum value used in output times - outL = int(L*maxOutTime/maxInTime) # number of output frames - inFrames = (L-1)*timeScaling[::2]/maxInTime # input time values in frames - outFrames = outL*timeScaling[1::2]/maxOutTime # output time values in frames - timeScalingEnv = interp1d(outFrames, inFrames, fill_value=0) # interpolation function - indexes = timeScalingEnv(np.arange(outL)) # generate frame indexes for the output - ysfreq = sfreq[int(round(indexes[0])),:] # first output frame - ysmag = smag[int(round(indexes[0])),:] # first output frame - for l in indexes[1:]: # generate frames for output sine tracks - ysfreq = np.vstack((ysfreq, sfreq[int(round(l)),:])) # get closest frame to scaling value - ysmag = np.vstack((ysmag, smag[int(round(l)),:])) # get closest frame to scaling value - return ysfreq, ysmag + """ + Time scaling of sinusoidal tracks + sfreq, smag: frequencies and magnitudes of input sinusoidal tracks + timeScaling: scaling factors, in time-value pairs + returns ysfreq, ysmag: frequencies and magnitudes of output sinusoidal tracks + """ + if timeScaling.size % 2 != 0: # raise exception if array not even length + raise ValueError("Time scaling array does not have an even size") + + L = sfreq.shape[0] # number of input frames + maxInTime = max(timeScaling[::2]) # maximum value used as input times + maxOutTime = max(timeScaling[1::2]) # maximum value used in output times + outL = int(L * maxOutTime / maxInTime) # number of output frames + inFrames = (L - 1) * timeScaling[::2] / maxInTime # input time values in frames + outFrames = outL * timeScaling[1::2] / maxOutTime # output time values in frames + timeScalingEnv = interp1d( + outFrames, inFrames, fill_value=0 + ) # interpolation function + indexes = timeScalingEnv(np.arange(outL)) # generate frame indexes for the output + ysfreq = sfreq[int(round(indexes[0])), :] # first output frame + ysmag = smag[int(round(indexes[0])), :] # first output frame + for l in indexes[1:]: # generate frames for output sine tracks + ysfreq = np.vstack( + (ysfreq, sfreq[int(round(l)), :]) + ) # get closest frame to scaling value + ysmag = np.vstack( + (ysmag, smag[int(round(l)), :]) + ) # get closest frame to scaling value + return ysfreq, ysmag + def sineFreqScaling(sfreq, freqScaling): - """ - Frequency scaling of sinusoidal tracks - sfreq: frequencies of input sinusoidal tracks - freqScaling: scaling factors, in time-value pairs (value of 1 is no scaling) - returns ysfreq: frequencies of output sinusoidal tracks - """ - if (freqScaling.size % 2 != 0): # raise exception if array not even length - raise ValueError("Frequency scaling array does not have an even size") - - L = sfreq.shape[0] # number of input frames - # create interpolation object from the scaling values - freqScalingEnv = np.interp(np.arange(L), L*freqScaling[::2]/freqScaling[-2], freqScaling[1::2]) - ysfreq = np.zeros_like(sfreq) # create empty output matrix - for l in range(L): # go through all frames - ind_valid = np.where(sfreq[l,:]!=0)[0] # check if there are frequency values - if ind_valid.size == 0: # if no values go to next frame - continue - ysfreq[l,ind_valid] = sfreq[l,ind_valid] * freqScalingEnv[l] # scale of frequencies - return ysfreq + """ + Frequency scaling of sinusoidal tracks + sfreq: frequencies of input sinusoidal tracks + freqScaling: scaling factors, in time-value pairs (value of 1 is no scaling) + returns ysfreq: frequencies of output sinusoidal tracks + """ + if freqScaling.size % 2 != 0: # raise exception if array not even length + raise ValueError("Frequency scaling array does not have an even size") + + L = sfreq.shape[0] # number of input frames + # create interpolation object from the scaling values + freqScalingEnv = np.interp( + np.arange(L), L * freqScaling[::2] / freqScaling[-2], freqScaling[1::2] + ) + ysfreq = np.zeros_like(sfreq) # create empty output matrix + for l in range(L): # go through all frames + ind_valid = np.where(sfreq[l, :] != 0)[0] # check if there are frequency values + if ind_valid.size == 0: # if no values go to next frame + continue + ysfreq[l, ind_valid] = ( + sfreq[l, ind_valid] * freqScalingEnv[l] + ) # scale of frequencies + return ysfreq diff --git a/smstools/transformations/stftTransformations.py b/smstools/transformations/stftTransformations.py index 447b30fe..14a460ab 100644 --- a/smstools/transformations/stftTransformations.py +++ b/smstools/transformations/stftTransformations.py @@ -1,92 +1,115 @@ # functions that implement transformations using the stft +import math +import os +import sys + import numpy as np -import sys, os, math from scipy.signal import resample + from smstools.models import dftModel as DFT + def stftFiltering(x, fs, w, N, H, filter): - """ - Apply a filter to a sound by using the STFT - x: input sound, w: analysis window, N: FFT size, H: hop size - filter: magnitude response of filter with frequency-magnitude pairs (in dB) - returns y: output sound - """ + """ + Apply a filter to a sound by using the STFT + x: input sound, w: analysis window, N: FFT size, H: hop size + filter: magnitude response of filter with frequency-magnitude pairs (in dB) + returns y: output sound + """ - M = w.size # size of analysis window - hM1 = int(math.floor((M+1)/2)) # half analysis window size by rounding - hM2 = int(math.floor(M/2)) # half analysis window size by floor - x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 - x = np.append(x,np.zeros(hM1)) # add zeros at the end to analyze last sample - pin = hM1 # initialize sound pointer in middle of analysis window - pend = x.size-hM1 # last sample to start a frame - w = w / sum(w) # normalize analysis window - y = np.zeros(x.size) # initialize output array - while pin<=pend: # while sound pointer is smaller than last sample - #-----analysis----- - x1 = x[pin-hM1:pin+hM2] # select one frame of input sound - mX, pX = DFT.dftAnal(x1, w, N) # compute dft - #------transformation----- - mY = mX + filter # filter input magnitude spectrum - #-----synthesis----- - y1 = DFT.dftSynth(mY, pX, M) # compute idft - y[pin-hM1:pin+hM2] += H*y1 # overlap-add to generate output sound - pin += H # advance sound pointer - y = np.delete(y, range(hM2)) # delete half of first window which was added in stftAnal - y = np.delete(y, range(y.size-hM1, y.size)) # add zeros at the end to analyze last sample - return y + M = w.size # size of analysis window + hM1 = int(math.floor((M + 1) / 2)) # half analysis window size by rounding + hM2 = int(math.floor(M / 2)) # half analysis window size by floor + x = np.append( + np.zeros(hM2), x + ) # add zeros at beginning to center first window at sample 0 + x = np.append(x, np.zeros(hM1)) # add zeros at the end to analyze last sample + pin = hM1 # initialize sound pointer in middle of analysis window + pend = x.size - hM1 # last sample to start a frame + w = w / sum(w) # normalize analysis window + y = np.zeros(x.size) # initialize output array + while pin <= pend: # while sound pointer is smaller than last sample + # -----analysis----- + x1 = x[pin - hM1 : pin + hM2] # select one frame of input sound + mX, pX = DFT.dftAnal(x1, w, N) # compute dft + # ------transformation----- + mY = mX + filter # filter input magnitude spectrum + # -----synthesis----- + y1 = DFT.dftSynth(mY, pX, M) # compute idft + y[pin - hM1 : pin + hM2] += H * y1 # overlap-add to generate output sound + pin += H # advance sound pointer + y = np.delete( + y, range(hM2) + ) # delete half of first window which was added in stftAnal + y = np.delete( + y, range(y.size - hM1, y.size) + ) # add zeros at the end to analyze last sample + return y def stftMorph(x1, x2, fs, w1, N1, w2, N2, H1, smoothf, balancef): - """ - Morph of two sounds using the STFT - x1, x2: input sounds, fs: sampling rate - w1, w2: analysis windows, N1, N2: FFT sizes, H1: hop size - smoothf: smooth factor of sound 2, bigger than 0 to max of 1, where 1 is no smothing, - balancef: balance between the 2 sounds, from 0 to 1, where 0 is sound 1 and 1 is sound 2 - returns y: output sound - """ + """ + Morph of two sounds using the STFT + x1, x2: input sounds, fs: sampling rate + w1, w2: analysis windows, N1, N2: FFT sizes, H1: hop size + smoothf: smooth factor of sound 2, bigger than 0 to max of 1, where 1 is no smothing, + balancef: balance between the 2 sounds, from 0 to 1, where 0 is sound 1 and 1 is sound 2 + returns y: output sound + """ - if (N2/2*smoothf < 3): # raise exception if decimation factor too small - raise ValueError("Smooth factor too small") + if N2 / 2 * smoothf < 3: # raise exception if decimation factor too small + raise ValueError("Smooth factor too small") - if (smoothf > 1): # raise exception if decimation factor too big - raise ValueError("Smooth factor above 1") + if smoothf > 1: # raise exception if decimation factor too big + raise ValueError("Smooth factor above 1") - if (balancef > 1 or balancef < 0): # raise exception if balancef outside 0-1 - raise ValueError("Balance factor outside range") + if balancef > 1 or balancef < 0: # raise exception if balancef outside 0-1 + raise ValueError("Balance factor outside range") - if (H1 <= 0): # raise error if hop size 0 or negative - raise ValueError("Hop size (H1) smaller or equal to 0") + if H1 <= 0: # raise error if hop size 0 or negative + raise ValueError("Hop size (H1) smaller or equal to 0") - M1 = w1.size # size of analysis window - hM1_1 = int(math.floor((M1+1)/2)) # half analysis window size by rounding - hM1_2 = int(math.floor(M1/2)) # half analysis window size by floor - L = int(x1.size/H1) # number of frames for x1 - x1 = np.append(np.zeros(hM1_2),x1) # add zeros at beginning to center first window at sample 0 - x1 = np.append(x1,np.zeros(hM1_1)) # add zeros at the end to analyze last sample - pin1 = hM1_1 # initialize sound pointer in middle of analysis window - w1 = w1 / sum(w1) # normalize analysis window - M2 = w2.size # size of analysis window - hM2_1 = int(math.floor((M2+1)/2)) # half analysis window size by rounding - hM2_2 = int(math.floor(M2/2)) # half analysis window size by floor2 - H2 = int(x2.size/L) # hop size for second sound - x2 = np.append(np.zeros(hM2_2),x2) # add zeros at beginning to center first window at sample 0 - x2 = np.append(x2,np.zeros(hM2_1)) # add zeros at the end to analyze last sample - pin2 = hM2_1 # initialize sound pointer in middle of analysis window - y = np.zeros(x1.size) # initialize output array - for l in range(L): - #-----analysis----- - mX1, pX1 = DFT.dftAnal(x1[pin1-hM1_1:pin1+hM1_2], w1, N1) # compute dft - mX2, pX2 = DFT.dftAnal(x2[pin2-hM2_1:pin2+hM2_2], w2, N2) # compute dft - #-----transformation----- - mX2smooth = resample(np.maximum(-200, mX2), int(mX2.size*smoothf)) # smooth spectrum of second sound - mX2 = resample(mX2smooth, mX1.size) # generate back the same size spectrum - mY = balancef * mX2 + (1-balancef) * mX1 # generate output spectrum - #-----synthesis----- - y[pin1-hM1_1:pin1+hM1_2] += H1*DFT.dftSynth(mY, pX1, M1) # overlap-add to generate output sound - pin1 += H1 # advance sound pointer - pin2 += H2 # advance sound pointer - y = np.delete(y, range(hM1_2)) # delete half of first window which was added in stftAnal - y = np.delete(y, range(y.size-hM1_1, y.size)) # add zeros at the end to analyze last sample - return y + M1 = w1.size # size of analysis window + hM1_1 = int(math.floor((M1 + 1) / 2)) # half analysis window size by rounding + hM1_2 = int(math.floor(M1 / 2)) # half analysis window size by floor + L = int(x1.size / H1) # number of frames for x1 + x1 = np.append( + np.zeros(hM1_2), x1 + ) # add zeros at beginning to center first window at sample 0 + x1 = np.append(x1, np.zeros(hM1_1)) # add zeros at the end to analyze last sample + pin1 = hM1_1 # initialize sound pointer in middle of analysis window + w1 = w1 / sum(w1) # normalize analysis window + M2 = w2.size # size of analysis window + hM2_1 = int(math.floor((M2 + 1) / 2)) # half analysis window size by rounding + hM2_2 = int(math.floor(M2 / 2)) # half analysis window size by floor2 + H2 = int(x2.size / L) # hop size for second sound + x2 = np.append( + np.zeros(hM2_2), x2 + ) # add zeros at beginning to center first window at sample 0 + x2 = np.append(x2, np.zeros(hM2_1)) # add zeros at the end to analyze last sample + pin2 = hM2_1 # initialize sound pointer in middle of analysis window + y = np.zeros(x1.size) # initialize output array + for l in range(L): + # -----analysis----- + mX1, pX1 = DFT.dftAnal(x1[pin1 - hM1_1 : pin1 + hM1_2], w1, N1) # compute dft + mX2, pX2 = DFT.dftAnal(x2[pin2 - hM2_1 : pin2 + hM2_2], w2, N2) # compute dft + # -----transformation----- + mX2smooth = resample( + np.maximum(-200, mX2), int(mX2.size * smoothf) + ) # smooth spectrum of second sound + mX2 = resample(mX2smooth, mX1.size) # generate back the same size spectrum + mY = balancef * mX2 + (1 - balancef) * mX1 # generate output spectrum + # -----synthesis----- + y[pin1 - hM1_1 : pin1 + hM1_2] += H1 * DFT.dftSynth( + mY, pX1, M1 + ) # overlap-add to generate output sound + pin1 += H1 # advance sound pointer + pin2 += H2 # advance sound pointer + y = np.delete( + y, range(hM1_2) + ) # delete half of first window which was added in stftAnal + y = np.delete( + y, range(y.size - hM1_1, y.size) + ) # add zeros at the end to analyze last sample + return y diff --git a/smstools/transformations/stochasticTransformations.py b/smstools/transformations/stochasticTransformations.py index 94b7bf5a..359f91dd 100644 --- a/smstools/transformations/stochasticTransformations.py +++ b/smstools/transformations/stochasticTransformations.py @@ -5,21 +5,27 @@ def stochasticTimeScale(stocEnv, timeScaling): - """ - Time scaling of the stochastic representation of a sound - stocEnv: stochastic envelope - timeScaling: scaling factors, in time-value pairs - returns ystocEnv: stochastic envelope - """ - if (timeScaling.size % 2 != 0): # raise exception if array not even length - raise ValueError("Time scaling array does not have an even size") - - L = stocEnv[:,0].size # number of input frames - outL = int(L*timeScaling[-1]/timeScaling[-2]) # number of synthesis frames - # create interpolation object with the time scaling values - timeScalingEnv = interp1d(timeScaling[::2]/timeScaling[-2], timeScaling[1::2]/timeScaling[-1]) - indexes = (L-1)*timeScalingEnv(np.arange(outL)/float(outL)) # generate output time indexes - ystocEnv = stocEnv[0,:] # first output frame is same than input - for l in indexes[1:]: # step through the output frames - ystocEnv = np.vstack((ystocEnv, stocEnv[int(round(l)),:])) # get the closest input frame - return ystocEnv + """ + Time scaling of the stochastic representation of a sound + stocEnv: stochastic envelope + timeScaling: scaling factors, in time-value pairs + returns ystocEnv: stochastic envelope + """ + if timeScaling.size % 2 != 0: # raise exception if array not even length + raise ValueError("Time scaling array does not have an even size") + + L = stocEnv[:, 0].size # number of input frames + outL = int(L * timeScaling[-1] / timeScaling[-2]) # number of synthesis frames + # create interpolation object with the time scaling values + timeScalingEnv = interp1d( + timeScaling[::2] / timeScaling[-2], timeScaling[1::2] / timeScaling[-1] + ) + indexes = (L - 1) * timeScalingEnv( + np.arange(outL) / float(outL) + ) # generate output time indexes + ystocEnv = stocEnv[0, :] # first output frame is same than input + for l in indexes[1:]: # step through the output frames + ystocEnv = np.vstack( + (ystocEnv, stocEnv[int(round(l)), :]) + ) # get the closest input frame + return ystocEnv