Skip to content

Commit

Permalink
Merge pull request #2 from eshaz/better-multi-threading
Browse files Browse the repository at this point in the history
Better multi threading
  • Loading branch information
eshaz authored Jun 27, 2022
2 parents d31bf09 + eb96d51 commit f8fb1d4
Show file tree
Hide file tree
Showing 7 changed files with 577 additions and 217 deletions.
1 change: 1 addition & 0 deletions .github/FUNDING.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
github: eshaz
12 changes: 12 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ correlate-scalar:
-O4 \
--reorder-functions \
--reorder-locals \
--coalesce-locals \
--optimize-instructions \
--optimize-added-constants-propagate \
--const-hoisting \
--simplify-globals \
--simplify-locals \
--strip-producers \
--vacuum \
--converge \
Expand All @@ -52,6 +58,12 @@ correlate-simd:
-O4 \
--reorder-functions \
--reorder-locals \
--coalesce-locals \
--optimize-instructions \
--optimize-added-constants-propagate \
--const-hoisting \
--simplify-globals \
--simplify-locals \
--strip-producers \
--vacuum \
--converge \
Expand Down
8 changes: 6 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "synaudio",
"version": "0.1.0",
"version": "0.2.0",
"description": "Library that finds the synchronization point between two similar audio clips.",
"files": [
"index.js",
Expand Down Expand Up @@ -36,6 +36,10 @@
},
"type": "module",
"homepage": "https://github.com/eshaz/synaudio#readme",
"funding": {
"type": "individual",
"url": "https://github.com/sponsors/eshaz"
},
"devDependencies": {
"@types/jest": "^27.5.1",
"jest": "^27.5.1",
Expand Down
276 changes: 162 additions & 114 deletions src/SynAudio.js

Large diffs are not rendered by default.

160 changes: 90 additions & 70 deletions src/correlate.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,22 @@
#define min(a, b) a < b ? a : b
#define max(a, b) a > b ? a : b


#define calc_covariance(cov, a, b, aMean, bMean) cov += (a - aMean) * (b - bMean)
#define calc_stddev(std, data, dataMean) std += (data - dataMean) * (data - dataMean)
#define sum_covariance(cov, sampleSize) cov / ((float) sampleSize - 1)
#define sum_stddev(std, sampleSize) __builtin_sqrt(std / ((float) sampleSize - 1))

#ifdef WASM_SIMD

#include <wasm_simd128.h>

typedef float float4 __attribute__((__vector_size__(16)));
#define float4_size 4
#define new_float4 wasm_f32x4_splat(0)
#define float4_to_float(vec) (wasm_f32x4_extract_lane(vec, 0) + wasm_f32x4_extract_lane(vec, 1) + wasm_f32x4_extract_lane(vec, 2) + wasm_f32x4_extract_lane(vec, 3))

#define calc_covariance(vec, a, b, aMean, bMean) vec = \
#define calc_covariance_float4(vec, a, b, aMean, bMean) vec = \
wasm_f32x4_add(\
vec, \
wasm_f32x4_mul( \
Expand All @@ -41,7 +48,7 @@ typedef float float4 __attribute__((__vector_size__(16)));
) \
) \
)
#define calc_stddev(vec, data, dataMean) vec = \
#define calc_stddev_float4(vec, data, dataMean) vec = \
wasm_f32x4_add( \
vec, \
wasm_f32x4_mul( \
Expand All @@ -55,78 +62,89 @@ typedef float float4 __attribute__((__vector_size__(16)));
) \
) \
)

#define vec_to_float(vec) (wasm_f32x4_extract_lane(vec, 0) + wasm_f32x4_extract_lane(vec, 1) + wasm_f32x4_extract_lane(vec, 2) + wasm_f32x4_extract_lane(vec, 3))

#define sum_covariance(cov, sampleSize) vec_to_float(cov) / ((float) sampleSize - 1)
#define sum_stddev(std, sampleSize) __builtin_sqrt(vec_to_float(std) / ((float) sampleSize - 1))
#define sum_covariance_float4(cov, sampleSize) float4_to_float(cov) / ((float) sampleSize - 1)
#define sum_stddev_float4(std, sampleSize) __builtin_sqrt(float4_to_float(std) / ((float) sampleSize - 1))

#else

typedef float float4;
#define float4_size 1
#define new_float4 0
#define float4_to_float(f) (float) f

#define calc_stddev(std, data, dataMean) std += (data - dataMean) * (data - dataMean)
#define calc_covariance(cov, a, b, aMean, bMean) cov += (a - aMean) * (b - bMean)

#define sum_covariance(cov, sampleSize) cov / ((float) sampleSize - 1)
#define sum_stddev(std, sampleSize) __builtin_sqrt(std / ((float) sampleSize - 1))
#define calc_covariance_float4 calc_covariance
#define calc_stddev_float4 calc_stddev
#define sum_covariance_float4 sum_covariance
#define sum_stddev_float4 sum_stddev

#endif

#define calc_correlation_step_float4(cov, aStd, bStd, aMean, bMean, a, b) \
calc_stddev_float4(aStd, a, aMean); \
calc_stddev_float4(bStd, b, bMean); \
calc_covariance_float4(covariance, a, b, aMean, bMean);

#define calc_correlation_step(cov, aStd, bStd, aMean, bMean, a, b) \
calc_stddev(aStd, a, aMean); \
calc_stddev(bStd, b, bMean); \
calc_covariance(covariance, a, b, aMean, bMean);


float calc_correlation(float *a, float *b, float aMean, float bMean, long sampleSize) {
int loopUnroll = 1*float4_size;
int loopUnroll = 4*float4_size;
float4 covariance = new_float4;
float4 aStd = new_float4;
float4 bStd = new_float4;

int i;
for (
int i = 0;
i = 0;
i < sampleSize - loopUnroll;
i+=loopUnroll
) {
calc_correlation_step(covariance, aStd, bStd, aMean, bMean, a[i], b[i] );
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 1 * float4_size], b[i + 1 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 2 * float4_size], b[i + 2 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 3 * float4_size], b[i + 3 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 4 * float4_size], b[i + 4 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 5 * float4_size], b[i + 5 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 6 * float4_size], b[i + 6 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 7 * float4_size], b[i + 7 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 8 * float4_size], b[i + 8 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 9 * float4_size], b[i + 9 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 10 * float4_size], b[i + 10 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 11 * float4_size], b[i + 11 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 12 * float4_size], b[i + 12 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 13 * float4_size], b[i + 13 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 14 * float4_size], b[i + 14 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 15 * float4_size], b[i + 15 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 16 * float4_size], b[i + 16 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 17 * float4_size], b[i + 17 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 18 * float4_size], b[i + 18 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 19 * float4_size], b[i + 19 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 20 * float4_size], b[i + 20 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 21 * float4_size], b[i + 21 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 22 * float4_size], b[i + 22 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 23 * float4_size], b[i + 23 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 24 * float4_size], b[i + 24 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 25 * float4_size], b[i + 25 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 26 * float4_size], b[i + 26 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 27 * float4_size], b[i + 27 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 28 * float4_size], b[i + 28 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 29 * float4_size], b[i + 29 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 30 * float4_size], b[i + 30 * float4_size]);
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 31 * float4_size], b[i + 31 * float4_size]);
calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i], b[i] );
calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 1 * float4_size], b[i + 1 * float4_size]);
calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 2 * float4_size], b[i + 2 * float4_size]);
calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 3 * float4_size], b[i + 3 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 4 * float4_size], b[i + 4 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 5 * float4_size], b[i + 5 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 6 * float4_size], b[i + 6 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 7 * float4_size], b[i + 7 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 8 * float4_size], b[i + 8 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 9 * float4_size], b[i + 9 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 10 * float4_size], b[i + 10 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 11 * float4_size], b[i + 11 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 12 * float4_size], b[i + 12 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 13 * float4_size], b[i + 13 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 14 * float4_size], b[i + 14 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 15 * float4_size], b[i + 15 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 16 * float4_size], b[i + 16 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 17 * float4_size], b[i + 17 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 18 * float4_size], b[i + 18 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 19 * float4_size], b[i + 19 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 20 * float4_size], b[i + 20 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 21 * float4_size], b[i + 21 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 22 * float4_size], b[i + 22 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 23 * float4_size], b[i + 23 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 24 * float4_size], b[i + 24 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 25 * float4_size], b[i + 25 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 26 * float4_size], b[i + 26 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 27 * float4_size], b[i + 27 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 28 * float4_size], b[i + 28 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 29 * float4_size], b[i + 29 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 30 * float4_size], b[i + 30 * float4_size]);
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 31 * float4_size], b[i + 31 * float4_size]);
}

// calculate any remaining data
float covarianceRemaining = float4_to_float(covariance);
float aStdFloat = float4_to_float(aStd);
float bStdFloat = float4_to_float(bStd);

for (; i < sampleSize; i++) {
calc_correlation_step(covarianceRemaining, aStdFloat, bStdFloat, aMean, bMean, a[i], b[i]);
}

return sum_covariance(covariance, sampleSize) / (sum_stddev(aStd, sampleSize) * sum_stddev(bStd, sampleSize));
return sum_covariance(covarianceRemaining, sampleSize) / (sum_stddev(aStdFloat, sampleSize) * sum_stddev(bStdFloat, sampleSize));;
}

void sum_channels(float *data, long samples, int channels) {
Expand Down Expand Up @@ -198,28 +216,30 @@ void correlate(
}
}

// narrow down exact correlation from previous results
aOffsetStart = max(*bestSampleOffset - initialGranularity * 2, 0);
aOffsetEnd = min(*bestSampleOffset + initialGranularity * 2, aSamples - sampleSize);

aSum = sum_for_mean(a, aOffsetStart, aOffsetStart + sampleSize);

for (long aOffset = aOffsetStart; aOffset < aOffsetEnd; aOffset++) {
float aMean = aSum / sampleSize;
// shift mean sum up one element
aSum -= (double) a[aOffset];
aSum += (double) a[aOffset + sampleSize];

correlation = calc_correlation(a + aOffset, b, aMean, bMean, sampleSize);

if (*bestCorrelation < correlation) {
bestAMean = aMean;
*bestCorrelation = correlation;
*bestSampleOffset = aOffset;
if (initialGranularity > 1) {
// narrow down exact correlation from previous results
aOffsetStart = max(*bestSampleOffset - initialGranularity * initialGranularity, 0);
aOffsetEnd = min(*bestSampleOffset + initialGranularity * initialGranularity, aSamples - sampleSize);

aSum = sum_for_mean(a, aOffsetStart, aOffsetStart + sampleSize);

for (long aOffset = aOffsetStart; aOffset < aOffsetEnd; aOffset++) {
float aMean = aSum / sampleSize;
// shift mean sum up one element
aSum -= (double) a[aOffset];
aSum += (double) a[aOffset + sampleSize];

correlation = calc_correlation(a + aOffset, b, aMean, bMean, sampleSize);

if (*bestCorrelation < correlation) {
bestAMean = aMean;
*bestCorrelation = correlation;
*bestSampleOffset = aOffset;
}
}

long bOffsetStart = 0;
long bOffsetEnd = sampleSize;
float bMeanLength = bOffsetEnd;
}

long bOffsetStart = 0;
long bOffsetEnd = sampleSize;
float bMeanLength = bOffsetEnd;
}
Loading

0 comments on commit f8fb1d4

Please sign in to comment.