diff --git a/ComputeBeatHisto.m b/ComputeBeatHisto.m index bb29a46..47834f6 100644 --- a/ComputeBeatHisto.m +++ b/ComputeBeatHisto.m @@ -7,7 +7,7 @@ %> %> @param afAudioData: time domain sample data, dimension channels X samples %> @param f_s: sample rate of audio data -%> @param method: preferred method of beat histogram computation (default: Corr), can be [] empty +%> @param cMethod: method of beat histogram computation (default: 'FFT') %> @param afWindow: FFT window of length iBlockLength (default: hann), can be [] empty %> @param iBlockLength: internal block length (default: 4096 samples) %> @param iHopLength: internal hop length (default: 512 samples) @@ -15,7 +15,7 @@ %> @retval T: Beat histogram %> @retval Bpm: tempo axis % ====================================================================== -function [T, Bpm] = ComputeBeatHisto (afAudioData, f_s, method, afWindow, iBlockLength, iHopLength) +function [T, Bpm] = ComputeBeatHisto (afAudioData, f_s, cMethod, afWindow, iBlockLength, iHopLength) % set default parameters if necessary if (nargin < 6) @@ -28,7 +28,7 @@ afWindow = hann(iBlockLength,'periodic'); end if (nargin < 3) - method = 'FFT'; + cMethod = 'FFT'; end % compute FFT window function if (length(afWindow) ~= iBlockLength) @@ -39,14 +39,14 @@ afAudioData = ToolDownmix(afAudioData); % novelty function - [d,t,peaks] = ComputeNoveltyFunction ( 'Flux', ... + [d,t, G_T,peaks] = ComputeNoveltyFunction ( 'Flux', ... afAudioData, ... f_s, ... afWindow, ... iBlockLength, ... iHopLength); - if strcmp(method,'Corr') + if strcmp(cMethod,'Corr') % compute autocorrelation afCorr = xcorr(d,'coeff'); afCorr = afCorr((ceil((length(afCorr)/2))+1):end); @@ -59,15 +59,14 @@ if length(d)< 2*iLength d = [d zeros(1,2*iLength-length(d))]; end - [X,f,tf] = spectrogram( d, ... + [X,f,tf] = ComputeSpectrogram( d, ... + f_s, ... [hann(iLength); zeros(iLength,1)], ... - iLength-iHopLength, ... 2*iLength, ... - f_s); + iLength/4 ); % adjust output BPM range T = mean(abs(X),2); - T(1:8) = 0; Bpm = f*60; lIdx = max(find(Bpm < 30)); hIdx = min(find(Bpm > 200)); diff --git a/ComputeSpectrogram.m b/ComputeSpectrogram.m new file mode 100644 index 0000000..0596e79 --- /dev/null +++ b/ComputeSpectrogram.m @@ -0,0 +1,61 @@ +% ====================================================================== +%> @brief computes a mel spectrogram from the audio data +%> +%> @param x: time domain sample data, dimension channels X samples +%> @param f_s: sample rate of audio data +%> @param afWindow: FFT window of length iBlockLength (default: hann), can be [] empty +%> @param iBlockLength: internal block length (default: 4096 samples) +%> @param iHopLength: internal hop length (default: 2048 samples) +%> +%> @retval X spectrogram +%> @retval f frequency bands +%> @retval t time stamps +% ====================================================================== +function [X, f, t] = ComputeSpectrogram (x, f_s, afWindow, iBlockLength, iHopLength, bNormalize) + + % set default parameters if necessary + if (nargin < 6) + bNormalize = true; + end + if (nargin < 5) + iHopLength = 2048; + end + if (nargin < 4) + iBlockLength = 4096; + end + if (nargin < 3 || isempty(afWindow)) + afWindow = hann(iBlockLength,'periodic'); + end + + if (length(afWindow) ~= iBlockLength) + error('window length mismatch'); + end + + if (size(afWindow,1) < size(afWindow,2)) + afWindow = afWindow'; + end + if (size(x,1) < size(x,2)) + x = x'; + end + + % pre-processing: down-mixing + x = ToolDownmix(x); + + % pre-processing: normalization + if bNormalize + x = ToolNormalizeAudio(x); + end + + [x_b, t] = ToolBlockAudio (x, iBlockLength, iHopLength, f_s); + + X = zeros(size(x_b,2)/2+1, size(x_b,1)); + f = linspace(0,(size(X,1)-1), f_s/2); + + for n=1:size(X,2) + tmp = fft(x_b(n,:)' .* afWindow); + X(:,n) = abs(tmp(1:size(X,1))) * 2 / iBlockLength; + end + + % normalization + X([1 end],:)= X([1 end],:)/sqrt(2); +end diff --git a/ToolBlockAudio.m b/ToolBlockAudio.m new file mode 100644 index 0000000..8d28a32 --- /dev/null +++ b/ToolBlockAudio.m @@ -0,0 +1,27 @@ +% ====================================================================== +%> @brief blocks audio signal into overlapping blocks +%> +%> @param x: audio signal (dimension length x 1) +%> @param iBlockLength: target block size +%> @param iHopLength: target hopsize +%> @param f_s: sample rate +%> +%> @retval x_b (dimension iNumOfBlocks x iBlockLength) +%> @retval t time stamps for blocks +% ====================================================================== +function [x_b, t] = ToolBlockAudio(x, iBlockLength, iHopLength, f_s) + + iNumBlocks = ceil(size(x,1) / iHopLength ); + + % time stamp vector + t = (0:(iNumBlocks-1)) * iHopLength / f_s; + + % pad with block length zeros just to make sure it runs for weird inputs, too + xPadded = [x; zeros(iBlockLength+iHopLength, 1)]; + + x_b = zeros(iNumBlocks, iBlockLength); + + for n=1:iNumBlocks + x_b(n,:) = xPadded((n-1)*iHopLength+1:(n-1)*iHopLength+iBlockLength); + end +end diff --git a/ToolNormalizeAudio.m b/ToolNormalizeAudio.m index 935bf20..4a553d5 100644 --- a/ToolNormalizeAudio.m +++ b/ToolNormalizeAudio.m @@ -9,5 +9,7 @@ if (length(x)> 1) x_norm = x/max(abs(x),[],'all'); + else + x_norm = x; end end