diff --git a/third_party/opus/BUILD.gn b/third_party/opus/BUILD.gn index 308f73f6094e..121c4d7c61f4 100644 --- a/third_party/opus/BUILD.gn +++ b/third_party/opus/BUILD.gn @@ -1,4 +1,4 @@ -# Copyright 2014 The Chromium Authors. All rights reserved. +# Copyright 2014 The Chromium Authors # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -11,7 +11,7 @@ use_opus_fixed_point = current_cpu == "arm" || current_cpu == "arm64" # If ARM optimizations shall be used to accelerate performance. # TODO(scottmg): Disabled on Fuchsia for now, see https://crbug.com/775272. use_opus_arm_optimization = - current_cpu == "arm" || (current_cpu == "arm64" && is_ios) + current_cpu == "arm" || (current_cpu == "arm64" && (is_ios || is_win)) && !is_starboard # NaCl, unlike Chrome, doesn't target SSE2 minimum, so skip optimizations for @@ -626,21 +626,24 @@ test("test_opus_padding") { deps = [ ":opus" ] } -# Compilation fails on windows due to wstring/string mistmatch. -# This is not worth looking at it since the benchmark is tailored for android. -# It is ok to run it on linux though, for experimentation purpose. -if (is_android || is_linux || is_chromeos) { - test("opus_tests") { - sources = [ "tests/opus_benchmark.cc" ] - - data = [ "tests/resources/speech_mono_32_48kHz.pcm" ] - - deps = [ - ":opus", - "//base", - "//testing/gtest", - "//testing/gtest:gtest_main", - ] +# Not all checkouts have a //base directory. +if (build_with_chromium) { + # Compilation fails on windows due to wstring/string mistmatch. + # This is not worth looking at it since the benchmark is tailored for android. + # It is ok to run it on linux though, for experimentation purpose. + if (is_android || is_linux || is_chromeos) { + test("opus_tests") { + sources = [ "tests/opus_benchmark.cc" ] + + data = [ "tests/resources/speech_mono_32_48kHz.pcm" ] + + deps = [ + ":opus", + "//base", + "//testing/gtest", + "//testing/gtest:gtest_main", + ] + } } } } diff --git a/third_party/opus/OWNERS b/third_party/opus/OWNERS index f46b8d176964..a3c8f89f1731 100644 --- a/third_party/opus/OWNERS +++ b/third_party/opus/OWNERS @@ -1,2 +1,2 @@ flim@chromium.org -henrika@chromium.org +gustaf@chromium.org diff --git a/third_party/opus/README.chromium b/third_party/opus/README.chromium index f0818fb17b23..24120310e433 100644 --- a/third_party/opus/README.chromium +++ b/third_party/opus/README.chromium @@ -1,6 +1,6 @@ Name: opus URL: https://gitlab.xiph.org/xiph/opus -Version: 16395923 +Version: 8cf872a1 License: BSD License File: src/COPYING Security Critical: yes @@ -18,4 +18,5 @@ Local changes: * add workaround to ignore some int-overflows when fuzzing (see crbug/1146174) Opus' own unit tests are located in ./src/tests -Additional chromium tests are located in ./tests +Additional chromium tests are located in ./tests, and require chromium +checkout to run the tests. diff --git a/third_party/opus/convert_rtcd_assembler.py b/third_party/opus/convert_rtcd_assembler.py index ad7e36d3ac38..14e8d08bf660 100755 --- a/third_party/opus/convert_rtcd_assembler.py +++ b/third_party/opus/convert_rtcd_assembler.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014 The Chromium Authors. All rights reserved. +# Copyright 2014 The Chromium Authors # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. diff --git a/third_party/opus/src/CMakeLists.txt b/third_party/opus/src/CMakeLists.txt index 02de9b74361d..9d824cdcf292 100644 --- a/third_party/opus/src/CMakeLists.txt +++ b/third_party/opus/src/CMakeLists.txt @@ -366,11 +366,23 @@ if(NOT OPUS_ENABLE_FLOAT_API) endif() if(NOT OPUS_DISABLE_INTRINSICS) - if((OPUS_X86_MAY_HAVE_SSE AND NOT OPUS_X86_PRESUME_SSE) OR + if(((OPUS_X86_MAY_HAVE_SSE AND NOT OPUS_X86_PRESUME_SSE) OR (OPUS_X86_MAY_HAVE_SSE2 AND NOT OPUS_X86_PRESUME_SSE2) OR (OPUS_X86_MAY_HAVE_SSE4_1 AND NOT OPUS_X86_PRESUME_SSE4_1) OR - (OPUS_X86_MAY_HAVE_AVX AND NOT OPUS_X86_PRESUME_AVX)) + (OPUS_X86_MAY_HAVE_AVX AND NOT OPUS_X86_PRESUME_AVX)) AND + RUNTIME_CPU_CAPABILITY_DETECTION) target_compile_definitions(opus PRIVATE OPUS_HAVE_RTCD) + if(NOT MSVC) + if(CPU_INFO_BY_ASM_SUPPORTED) + target_compile_definitions(opus PRIVATE CPU_INFO_BY_ASM) + elseif(CPU_INFO_BY_C_SUPPORTED) + target_compile_definitions(opus PRIVATE CPU_INFO_BY_C) + else() + message(ERROR "Runtime cpu capability detection is enabled while CPU_INFO is not supported") + endif() + endif() + add_sources_group(opus celt ${celt_sources_x86_rtcd}) + add_sources_group(opus silk ${silk_sources_x86_rtcd}) endif() if(SSE1_SUPPORTED) @@ -455,15 +467,13 @@ if(NOT OPUS_DISABLE_INTRINSICS) endif() endif() - if(CMAKE_SYSTEM_PROCESSOR MATCHES "(arm|aarch64)") - add_sources_group(opus celt ${celt_sources_arm}) - endif() - if(COMPILER_SUPPORT_NEON) if(OPUS_MAY_HAVE_NEON) if(RUNTIME_CPU_CAPABILITY_DETECTION) message(STATUS "OPUS_MAY_HAVE_NEON enabling runtime detection") target_compile_definitions(opus PRIVATE OPUS_HAVE_RTCD) + add_sources_group(opus celt ${celt_sources_arm_rtcd}) + add_sources_group(opus silk ${silk_sources_arm_rtcd}) else() message(ERROR "Runtime cpu capability detection needed for MAY_HAVE_NEON") endif() @@ -565,6 +575,7 @@ if(OPUS_BUILD_PROGRAMS) target_include_directories(opus_custom_demo PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) target_link_libraries(opus_custom_demo PRIVATE opus) + target_compile_definitions(opus_custom_demo PRIVATE OPUS_BUILD) endif() add_executable(opus_demo ${opus_demo_sources}) @@ -572,11 +583,16 @@ if(OPUS_BUILD_PROGRAMS) target_include_directories(opus_demo PRIVATE silk) # debug.h target_include_directories(opus_demo PRIVATE celt) # arch.h target_link_libraries(opus_demo PRIVATE opus ${OPUS_REQUIRED_LIBRARIES}) + target_compile_definitions(opus_demo PRIVATE OPUS_BUILD) # compare add_executable(opus_compare ${opus_compare_sources}) target_include_directories(opus_compare PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) target_link_libraries(opus_compare PRIVATE opus ${OPUS_REQUIRED_LIBRARIES}) + if(MSVC) + # move cosmetic warning to level 4 for opus_compare + target_compile_options(opus_compare PRIVATE /w44244) + endif() endif() if(BUILD_TESTING AND NOT BUILD_SHARED_LIBS) @@ -587,6 +603,7 @@ if(BUILD_TESTING AND NOT BUILD_SHARED_LIBS) target_include_directories(test_opus_decode PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) target_link_libraries(test_opus_decode PRIVATE opus) + target_compile_definitions(test_opus_decode PRIVATE OPUS_BUILD) if(OPUS_FIXED_POINT) target_compile_definitions(test_opus_decode PRIVATE DISABLE_FLOAT_API) endif() @@ -608,6 +625,7 @@ if(BUILD_TESTING AND NOT BUILD_SHARED_LIBS) target_include_directories(test_opus_api PRIVATE ${CMAKE_CURRENT_BINARY_DIR} celt) target_link_libraries(test_opus_api PRIVATE opus) + target_compile_definitions(test_opus_api PRIVATE OPUS_BUILD) if(OPUS_FIXED_POINT) target_compile_definitions(test_opus_api PRIVATE DISABLE_FLOAT_API) endif() @@ -620,6 +638,7 @@ if(BUILD_TESTING AND NOT BUILD_SHARED_LIBS) target_include_directories(test_opus_encode PRIVATE ${CMAKE_CURRENT_BINARY_DIR} celt) target_link_libraries(test_opus_encode PRIVATE opus) + target_compile_definitions(test_opus_encode PRIVATE OPUS_BUILD) add_test(NAME test_opus_encode COMMAND ${CMAKE_COMMAND} -DTEST_EXECUTABLE=$ -DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME} diff --git a/third_party/opus/src/Makefile.am b/third_party/opus/src/Makefile.am index 70a2ebfaef29..492fc09da4f7 100644 --- a/third_party/opus/src/Makefile.am +++ b/third_party/opus/src/Makefile.am @@ -36,6 +36,11 @@ else OPUS_SOURCES += $(OPUS_SOURCES_FLOAT) endif +if CPU_X86 +if HAVE_RTCD +CELT_SOURCES += $(CELT_SOURCES_X86_RTCD) +SILK_SOURCES += $(SILK_SOURCES_X86_RTCD) +endif if HAVE_SSE CELT_SOURCES += $(CELT_SOURCES_SSE) endif @@ -45,10 +50,13 @@ endif if HAVE_SSE4_1 CELT_SOURCES += $(CELT_SOURCES_SSE4_1) endif +endif if CPU_ARM -CELT_SOURCES += $(CELT_SOURCES_ARM) -SILK_SOURCES += $(SILK_SOURCES_ARM) +if HAVE_RTCD +CELT_SOURCES += $(CELT_SOURCES_ARM_RTCD) +SILK_SOURCES += $(SILK_SOURCES_ARM_RTCD) +endif if HAVE_ARM_NEON_INTR CELT_SOURCES += $(CELT_SOURCES_ARM_NEON_INTR) @@ -225,6 +233,8 @@ EXTRA_DIST = opus.pc.in \ cmake/RunTest.cmake \ cmake/config.h.cmake.in \ cmake/vla.c \ + cmake/cpu_info_by_asm.c \ + cmake/cpu_info_by_c.c \ meson/get-version.py \ meson/read-sources-list.py \ meson.build \ diff --git a/third_party/opus/src/celt/arm/armcpu.c b/third_party/opus/src/celt/arm/armcpu.c index cce3ae3a9db7..c7d16e6d6161 100644 --- a/third_party/opus/src/celt/arm/armcpu.c +++ b/third_party/opus/src/celt/arm/armcpu.c @@ -156,7 +156,7 @@ opus_uint32 opus_cpu_capabilities(void) "your platform. Reconfigure with --disable-rtcd (or send patches)." #endif -int opus_select_arch(void) +static int opus_select_arch_impl(void) { opus_uint32 flags = opus_cpu_capabilities(); int arch = 0; @@ -184,4 +184,11 @@ int opus_select_arch(void) return arch; } +int opus_select_arch(void) { + int arch = opus_select_arch_impl(); +#ifdef FUZZING + arch = rand()%(arch+1); +#endif + return arch; +} #endif diff --git a/third_party/opus/src/celt/arm/pitch_neon_intr.c b/third_party/opus/src/celt/arm/pitch_neon_intr.c index 1ac38c433a8a..35cc46e2c2b2 100644 --- a/third_party/opus/src/celt/arm/pitch_neon_intr.c +++ b/third_party/opus/src/celt/arm/pitch_neon_intr.c @@ -137,22 +137,27 @@ void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, const opus /* celt_inner_prod_neon_float_c_simulation() simulates the floating-point */ /* operations of celt_inner_prod_neon(), and both functions should have bit */ /* exact output. */ -static opus_val32 celt_inner_prod_neon_float_c_simulation(const opus_val16 *x, const opus_val16 *y, int N) +static opus_val32 celt_inner_prod_neon_float_c_simulation(const opus_val16 *x, const opus_val16 *y, float *err, int N) { int i; + *err = 0; opus_val32 xy, xy0 = 0, xy1 = 0, xy2 = 0, xy3 = 0; for (i = 0; i < N - 3; i += 4) { xy0 = MAC16_16(xy0, x[i + 0], y[i + 0]); xy1 = MAC16_16(xy1, x[i + 1], y[i + 1]); xy2 = MAC16_16(xy2, x[i + 2], y[i + 2]); xy3 = MAC16_16(xy3, x[i + 3], y[i + 3]); + *err += ABS32(xy0)+ABS32(xy1)+ABS32(xy2)+ABS32(xy3); } xy0 += xy2; xy1 += xy3; xy = xy0 + xy1; + *err += ABS32(xy1)+ABS32(xy0)+ABS32(xy); for (; i < N; i++) { xy = MAC16_16(xy, x[i], y[i]); + *err += ABS32(xy); } + *err = *err*2e-7 + N*1e-37; return xy; } @@ -160,32 +165,10 @@ static opus_val32 celt_inner_prod_neon_float_c_simulation(const opus_val16 *x, c /* operations of dual_inner_prod_neon(), and both functions should have bit */ /* exact output. */ static void dual_inner_prod_neon_float_c_simulation(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, - int N, opus_val32 *xy1, opus_val32 *xy2) + int N, opus_val32 *xy1, opus_val32 *xy2, float *err) { - int i; - opus_val32 xy01, xy02, xy01_0 = 0, xy01_1 = 0, xy01_2 = 0, xy01_3 = 0, xy02_0 = 0, xy02_1 = 0, xy02_2 = 0, xy02_3 = 0; - for (i = 0; i < N - 3; i += 4) { - xy01_0 = MAC16_16(xy01_0, x[i + 0], y01[i + 0]); - xy01_1 = MAC16_16(xy01_1, x[i + 1], y01[i + 1]); - xy01_2 = MAC16_16(xy01_2, x[i + 2], y01[i + 2]); - xy01_3 = MAC16_16(xy01_3, x[i + 3], y01[i + 3]); - xy02_0 = MAC16_16(xy02_0, x[i + 0], y02[i + 0]); - xy02_1 = MAC16_16(xy02_1, x[i + 1], y02[i + 1]); - xy02_2 = MAC16_16(xy02_2, x[i + 2], y02[i + 2]); - xy02_3 = MAC16_16(xy02_3, x[i + 3], y02[i + 3]); - } - xy01_0 += xy01_2; - xy02_0 += xy02_2; - xy01_1 += xy01_3; - xy02_1 += xy02_3; - xy01 = xy01_0 + xy01_1; - xy02 = xy02_0 + xy02_1; - for (; i < N; i++) { - xy01 = MAC16_16(xy01, x[i], y01[i]); - xy02 = MAC16_16(xy02, x[i], y02[i]); - } - *xy1 = xy01; - *xy2 = xy02; + *xy1 = celt_inner_prod_neon_float_c_simulation(x, y01, &err[0], N); + *xy2 = celt_inner_prod_neon_float_c_simulation(x, y02, &err[1], N); } #endif /* OPUS_CHECK_ASM */ @@ -225,7 +208,12 @@ opus_val32 celt_inner_prod_neon(const opus_val16 *x, const opus_val16 *y, int N) } #ifdef OPUS_CHECK_ASM - celt_assert(ABS32(celt_inner_prod_neon_float_c_simulation(x, y, N) - xy) <= VERY_SMALL); + { + float err, res; + res = celt_inner_prod_neon_float_c_simulation(x, y, &err, N); + /*if (ABS32(res - xy) > err) fprintf(stderr, "%g %g %g\n", res, xy, err);*/ + celt_assert(ABS32(res - xy) <= err); + } #endif return xy; @@ -280,9 +268,12 @@ void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, const opus #ifdef OPUS_CHECK_ASM { opus_val32 xy1_c, xy2_c; - dual_inner_prod_neon_float_c_simulation(x, y01, y02, N, &xy1_c, &xy2_c); - celt_assert(ABS32(xy1_c - *xy1) <= VERY_SMALL); - celt_assert(ABS32(xy2_c - *xy2) <= VERY_SMALL); + float err[2]; + dual_inner_prod_neon_float_c_simulation(x, y01, y02, N, &xy1_c, &xy2_c, err); + /*if (ABS32(xy1_c - *xy1) > err[0]) fprintf(stderr, "dual1 fail: %g %g %g\n", xy1_c, *xy1, err[0]); + if (ABS32(xy2_c - *xy2) > err[1]) fprintf(stderr, "dual2 fail: %g %g %g\n", xy2_c, *xy2, err[1]);*/ + celt_assert(ABS32(xy1_c - *xy1) <= err[0]); + celt_assert(ABS32(xy2_c - *xy2) <= err[1]); } #endif } diff --git a/third_party/opus/src/celt/bands.c b/third_party/opus/src/celt/bands.c index 2702963c37af..5320ffab07bf 100644 --- a/third_party/opus/src/celt/bands.c +++ b/third_party/opus/src/celt/bands.c @@ -901,7 +901,7 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx, sctx->itheta = itheta; sctx->qalloc = qalloc; } -static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, int b, +static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, celt_norm *lowband_out) { int c; @@ -926,7 +926,6 @@ static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, sign = ec_dec_bits(ec, 1); } ctx->remaining_bits -= 1<resynth) x[0] = sign ? -NORM_SCALING : NORM_SCALING; @@ -1134,7 +1133,7 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X, /* Special case for one sample */ if (N==1) { - return quant_band_n1(ctx, X, NULL, b, lowband_out); + return quant_band_n1(ctx, X, NULL, lowband_out); } if (tf_change>0) @@ -1256,7 +1255,7 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm /* Special case for one sample */ if (N==1) { - return quant_band_n1(ctx, X, Y, b, lowband_out); + return quant_band_n1(ctx, X, Y, lowband_out); } orig_fill = fill; @@ -1381,6 +1380,7 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm return cm; } +#ifndef DISABLE_UPDATE_DRAFT static void special_hybrid_folding(const CELTMode *m, celt_norm *norm, celt_norm *norm2, int start, int M, int dual_stereo) { int n1, n2; @@ -1393,6 +1393,7 @@ static void special_hybrid_folding(const CELTMode *m, celt_norm *norm, celt_norm if (dual_stereo) OPUS_COPY(&norm2[n1], &norm2[2*n1 - n2], n2-n1); } +#endif void quant_all_bands(int encode, const CELTMode *m, int start, int end, celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks, diff --git a/third_party/opus/src/celt/celt_decoder.c b/third_party/opus/src/celt/celt_decoder.c index 74ca3b740df7..883dae15d290 100644 --- a/third_party/opus/src/celt/celt_decoder.c +++ b/third_party/opus/src/celt/celt_decoder.c @@ -90,7 +90,7 @@ struct OpusCustomDecoder { opus_uint32 rng; int error; int last_pitch_index; - int loss_count; + int loss_duration; int skip_plc; int postfilter_period; int postfilter_period_old; @@ -512,7 +512,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) int nbEBands; int overlap; int start; - int loss_count; + int loss_duration; int noise_based; const opus_int16 *eBands; SAVE_STACK; @@ -532,9 +532,9 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) oldLogE2 = oldLogE + 2*nbEBands; backgroundLogE = oldLogE2 + 2*nbEBands; - loss_count = st->loss_count; + loss_duration = st->loss_duration; start = st->start; - noise_based = loss_count >= 5 || start != 0 || st->skip_plc; + noise_based = loss_duration >= 40 || start != 0 || st->skip_plc; if (noise_based) { /* Noise-based PLC/CNG */ @@ -557,9 +557,13 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) #else ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ #endif + c=0; do { + OPUS_MOVE(decode_mem[c], decode_mem[c]+N, + DECODE_BUFFER_SIZE-N+(overlap>>1)); + } while (++crng = seed; - c=0; do { - OPUS_MOVE(decode_mem[c], decode_mem[c]+N, - DECODE_BUFFER_SIZE-N+(overlap>>1)); - } while (++cdownsample, 0, st->arch); } else { int exc_length; @@ -602,7 +601,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) VARDECL(opus_val16, _exc); VARDECL(opus_val16, fir_tmp); - if (loss_count == 0) + if (loss_duration == 0) { st->last_pitch_index = pitch_index = celt_plc_pitch_search(decode_mem, C, st->arch); } else { @@ -630,9 +629,9 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) buf = decode_mem[c]; for (i=0;iloss_count = loss_count+1; + /* Saturate to soemthing large to avoid wrap-around. */ + st->loss_duration = IMIN(10000, loss_duration+(1<skip_plc = st->loss_count != 0; + st->skip_plc = st->loss_duration != 0; if (dec == NULL) { @@ -1140,25 +1141,21 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat if (C==1) OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands); - /* In case start or end were to change */ if (!isTransient) { - opus_val16 max_background_increase; OPUS_COPY(oldLogE2, oldLogE, 2*nbEBands); OPUS_COPY(oldLogE, oldBandE, 2*nbEBands); - /* In normal circumstances, we only allow the noise floor to increase by - up to 2.4 dB/second, but when we're in DTX, we allow up to 6 dB - increase for each update.*/ - if (st->loss_count < 10) - max_background_increase = M*QCONST16(0.001f,DB_SHIFT); - else - max_background_increase = QCONST16(1.f,DB_SHIFT); - for (i=0;i<2*nbEBands;i++) - backgroundLogE[i] = MIN16(backgroundLogE[i] + max_background_increase, oldBandE[i]); } else { for (i=0;i<2*nbEBands;i++) oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]); } + /* In normal circumstances, we only allow the noise floor to increase by + up to 2.4 dB/second, but when we're in DTX we give the weight of + all missing packets to the update packet. */ + max_background_increase = IMIN(160, st->loss_duration+M)*QCONST16(0.001f,DB_SHIFT); + for (i=0;i<2*nbEBands;i++) + backgroundLogE[i] = MIN16(backgroundLogE[i] + max_background_increase, oldBandE[i]); + /* In case start or end were to change */ c=0; do { for (i=0;irng = dec->rng; deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum); - st->loss_count = 0; + st->loss_duration = 0; RESTORE_STACK; if (ec_tell(dec) > 8*len) return OPUS_INTERNAL_ERROR; diff --git a/third_party/opus/src/celt/celt_encoder.c b/third_party/opus/src/celt/celt_encoder.c index d6f8afc20bfb..637d442cf7d6 100644 --- a/third_party/opus/src/celt/celt_encoder.c +++ b/third_party/opus/src/celt/celt_encoder.c @@ -1719,8 +1719,11 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample, st->arch); compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch); amp2Log2(mode, effEnd, end, bandE, bandLogE2, C); - for (i=0;iupsample, st->arch); @@ -1856,8 +1859,11 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch); amp2Log2(mode, effEnd, end, bandE, bandLogE, C); /* Compensate for the scaling of short vs long mdcts */ - for (i=0;i> 16; if (!VERIFY_INT(res)) { - fprintf (stderr, "MULT32_32_Q16: output is not int: %d*%d=%d\n", a, b, (int)res); + fprintf (stderr, "MULT32_32_Q16: output is not int: %lld*%lld=%lld\n", (long long)a, (long long)b, (long long)res); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -491,7 +491,7 @@ static OPUS_INLINE int MULT16_32_QX_(int a, opus_int64 b, int Q, char *file, int celt_assert(0); #endif } - if (ABS32(b)>=((opus_val32)(1)<<(15+Q))) + if (ABS32(b)>=((opus_int64)(1)<<(16+Q))) { fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line); #ifdef FIXED_DEBUG_ASSERT @@ -524,7 +524,7 @@ static OPUS_INLINE int MULT16_32_PX_(int a, opus_int64 b, int Q, char *file, int celt_assert(0); #endif } - if (ABS32(b)>=((opus_int64)(1)<<(15+Q))) + if (ABS32(b)>=((opus_int64)(1)<<(16+Q))) { fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d in %s: line %d\n\n", Q, (int)a, (int)b,file, line); #ifdef FIXED_DEBUG_ASSERT @@ -831,6 +831,6 @@ static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x) #undef PRINT_MIPS -#define PRINT_MIPS(file) do {fprintf (file, "total complexity = %llu MIPS\n", celt_mips);} while (0); +#define PRINT_MIPS(file) do {fprintf (file, "total complexity = %llu MIPS\n", (unsigned long long)celt_mips);} while (0); #endif diff --git a/third_party/opus/src/celt/float_cast.h b/third_party/opus/src/celt/float_cast.h index 9d34976ee216..8915a5fd7f6d 100644 --- a/third_party/opus/src/celt/float_cast.h +++ b/third_party/opus/src/celt/float_cast.h @@ -99,7 +99,7 @@ static OPUS_INLINE opus_int32 float2int(float x) {return _mm_cvt_ss2si(_mm_set_s return intgr ; } -#elif defined(HAVE_LRINTF) +#elif defined(HAVE_LRINTF) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* These defines enable functionality introduced with the 1999 ISO C ** standard. They must be defined before the inclusion of math.h to @@ -117,7 +117,7 @@ static OPUS_INLINE opus_int32 float2int(float x) {return _mm_cvt_ss2si(_mm_set_s #include #define float2int(x) lrintf(x) -#elif (defined(HAVE_LRINT)) +#elif defined(HAVE_LRINT) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L #define _ISOC9X_SOURCE 1 #define _ISOC99_SOURCE 1 diff --git a/third_party/opus/src/celt/kiss_fft.h b/third_party/opus/src/celt/kiss_fft.h index bffa2bfad639..1cd9d6ffc712 100644 --- a/third_party/opus/src/celt/kiss_fft.h +++ b/third_party/opus/src/celt/kiss_fft.h @@ -52,6 +52,10 @@ extern "C" { # define kiss_fft_scalar opus_int32 # define kiss_twiddle_scalar opus_int16 +/* Some 32-bit CPUs would load/store a kiss_twiddle_cpx with a single memory + * access, and could benefit from additional alignment. + */ +#define KISS_TWIDDLE_CPX_ALIGNMENT (sizeof(opus_int32)) #else # ifndef kiss_fft_scalar @@ -62,6 +66,13 @@ extern "C" { # endif #endif +#if defined(__GNUC__) && defined(KISS_TWIDDLE_CPX_ALIGNMENT) +#define KISS_TWIDDLE_CPX_ALIGNED \ + __attribute__((aligned(KISS_TWIDDLE_CPX_ALIGNMENT))) +#else +#define KISS_TWIDDLE_CPX_ALIGNED +#endif + typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; @@ -70,7 +81,7 @@ typedef struct { typedef struct { kiss_twiddle_scalar r; kiss_twiddle_scalar i; -}kiss_twiddle_cpx; +} KISS_TWIDDLE_CPX_ALIGNED kiss_twiddle_cpx; #define MAXFACTORS 8 /* e.g. an fft of length 128 has 4 factors diff --git a/third_party/opus/src/celt/mathops.h b/third_party/opus/src/celt/mathops.h index fe29dac1c21d..478ac9187c6f 100644 --- a/third_party/opus/src/celt/mathops.h +++ b/third_party/opus/src/celt/mathops.h @@ -153,7 +153,7 @@ static OPUS_INLINE float celt_exp2(float x) float f; opus_uint32 i; } res; - integer = floor(x); + integer = (int)floor(x); if (integer < -50) return 0; frac = x-integer; diff --git a/third_party/opus/src/celt/meson.build b/third_party/opus/src/celt/meson.build index 370ea1fe0ac7..ad95d949bee0 100644 --- a/third_party/opus/src/celt/meson.build +++ b/third_party/opus/src/celt/meson.build @@ -10,6 +10,10 @@ celt_neon_intr_sources = sources['CELT_SOURCES_ARM_NEON_INTR'] celt_static_libs = [] +if host_cpu_family in ['x86', 'x86_64'] and opus_conf.has('OPUS_HAVE_RTCD') + celt_sources += sources['CELT_SOURCES_X86_RTCD'] +endif + foreach intr_name : ['sse', 'sse2', 'sse4_1', 'neon_intr'] have_intr = get_variable('have_' + intr_name) if not have_intr @@ -30,7 +34,9 @@ if (intrinsics_support.length() + asm_optimization.length() + inline_optimizatio endif if host_cpu_family in ['arm', 'aarch64'] and have_arm_intrinsics_or_asm - celt_sources += sources['CELT_SOURCES_ARM'] + if opus_conf.has('OPUS_HAVE_RTCD') + celt_sources += sources['CELT_SOURCES_ARM_RTCD'] + endif if have_arm_ne10 celt_sources += sources['CELT_SOURCES_ARM_NE10'] endif diff --git a/third_party/opus/src/celt/modes.c b/third_party/opus/src/celt/modes.c index 390c5e8aeb36..23f7cde66251 100644 --- a/third_party/opus/src/celt/modes.c +++ b/third_party/opus/src/celt/modes.c @@ -173,7 +173,10 @@ static void compute_allocation_table(CELTMode *mode) mode->nbAllocVectors = BITALLOC_SIZE; allocVectors = opus_alloc(sizeof(unsigned char)*(BITALLOC_SIZE*mode->nbEBands)); if (allocVectors==NULL) + { + mode->allocVectors = NULL; return; + } /* Check for standard mode */ if (mode->Fs == 400*(opus_int32)mode->shortMdctSize) diff --git a/third_party/opus/src/celt/pitch.c b/third_party/opus/src/celt/pitch.c index 872582a48a55..7998db4164f4 100644 --- a/third_party/opus/src/celt/pitch.c +++ b/third_party/opus/src/celt/pitch.c @@ -161,17 +161,26 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x shift=0; if (C==2) shift++; -#endif for (i=1;i>1;i++) - x_lp[i] = SHR32(HALF32(HALF32(x[0][(2*i-1)]+x[0][(2*i+1)])+x[0][2*i]), shift); - x_lp[0] = SHR32(HALF32(HALF32(x[0][1])+x[0][0]), shift); + x_lp[i] = SHR32(x[0][(2*i-1)], shift+2) + SHR32(x[0][(2*i+1)], shift+2) + SHR32(x[0][2*i], shift+1); + x_lp[0] = SHR32(x[0][1], shift+2) + SHR32(x[0][0], shift+1); if (C==2) { for (i=1;i>1;i++) - x_lp[i] += SHR32(HALF32(HALF32(x[1][(2*i-1)]+x[1][(2*i+1)])+x[1][2*i]), shift); - x_lp[0] += SHR32(HALF32(HALF32(x[1][1])+x[1][0]), shift); + x_lp[i] += SHR32(x[1][(2*i-1)], shift+2) + SHR32(x[1][(2*i+1)], shift+2) + SHR32(x[1][2*i], shift+1); + x_lp[0] += SHR32(x[1][1], shift+2) + SHR32(x[1][0], shift+1); } - +#else + for (i=1;i>1;i++) + x_lp[i] = .25f*x[0][(2*i-1)] + .25f*x[0][(2*i+1)] + .5f*x[0][2*i]; + x_lp[0] = .25f*x[0][1] + .5f*x[0][0]; + if (C==2) + { + for (i=1;i>1;i++) + x_lp[i] += .25f*x[1][(2*i-1)] + .25f*x[1][(2*i+1)] + .5f*x[1][2*i]; + x_lp[0] += .25f*x[1][1] + .5f*x[1][0]; + } +#endif _celt_autocorr(x_lp, ac, NULL, 0, 4, len>>1, arch); @@ -249,7 +258,7 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 maxcorr=1; #endif celt_assert(max_pitch>0); - celt_sig_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); + celt_sig_assert(((size_t)_x&3)==0); for (i=0;i (depth_threshold*band_width<>4 && j<=signalBandwidth)) diff --git a/third_party/opus/src/celt/tests/test_unit_dft.c b/third_party/opus/src/celt/tests/test_unit_dft.c index 70f8f4937ba8..ae9a7b56872e 100644 --- a/third_party/opus/src/celt/tests/test_unit_dft.c +++ b/third_party/opus/src/celt/tests/test_unit_dft.c @@ -144,8 +144,9 @@ void test1d(int nfft,int isinverse,int arch) int main(int argc,char ** argv) { + int arch; ALLOC_STACK; - int arch = opus_select_arch(); + arch = opus_select_arch(); if (argc>1) { int k; diff --git a/third_party/opus/src/celt/tests/test_unit_entropy.c b/third_party/opus/src/celt/tests/test_unit_entropy.c index 7f674529dfd6..b1619b74defa 100644 --- a/third_party/opus/src/celt/tests/test_unit_entropy.c +++ b/third_party/opus/src/celt/tests/test_unit_entropy.c @@ -104,7 +104,7 @@ int main(int _argc,char **_argv){ nbits=ec_tell_frac(&enc); ec_enc_done(&enc); fprintf(stderr, - "Encoded %0.2lf bits of entropy to %0.2lf bits (%0.3lf%% wasted).\n", + "Encoded %0.2f bits of entropy to %0.2f bits (%0.3f%% wasted).\n", entropy,ldexp(nbits,-3),100*(nbits-ldexp(entropy,3))/nbits); fprintf(stderr,"Packed to %li bytes.\n",(long)ec_range_bytes(&enc)); ec_dec_init(&dec,ptr,DATA_SIZE); @@ -129,7 +129,7 @@ int main(int _argc,char **_argv){ nbits2=ec_tell_frac(&dec); if(nbits!=nbits2){ fprintf(stderr, - "Reported number of bits used was %0.2lf, should be %0.2lf.\n", + "Reported number of bits used was %0.2f, should be %0.2f.\n", ldexp(nbits2,-3),ldexp(nbits,-3)); ret=-1; } diff --git a/third_party/opus/src/celt/tests/test_unit_mdct.c b/third_party/opus/src/celt/tests/test_unit_mdct.c index 4a563ccfe3c9..844c5b481f50 100644 --- a/third_party/opus/src/celt/tests/test_unit_mdct.c +++ b/third_party/opus/src/celt/tests/test_unit_mdct.c @@ -184,8 +184,9 @@ void test1d(int nfft,int isinverse,int arch) int main(int argc,char ** argv) { + int arch; ALLOC_STACK; - int arch = opus_select_arch(); + arch = opus_select_arch(); if (argc>1) { int k; diff --git a/third_party/opus/src/celt/x86/celt_lpc_sse.h b/third_party/opus/src/celt/x86/celt_lpc_sse.h index 7d1ecf753328..90e69ecf633a 100644 --- a/third_party/opus/src/celt/x86/celt_lpc_sse.h +++ b/third_party/opus/src/celt/x86/celt_lpc_sse.h @@ -33,7 +33,6 @@ #endif #if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT) -#define OVERRIDE_CELT_FIR void celt_fir_sse4_1( const opus_val16 *x, @@ -44,10 +43,11 @@ void celt_fir_sse4_1( int arch); #if defined(OPUS_X86_PRESUME_SSE4_1) +#define OVERRIDE_CELT_FIR #define celt_fir(x, num, y, N, ord, arch) \ ((void)arch, celt_fir_sse4_1(x, num, y, N, ord, arch)) -#else +#elif defined(OPUS_HAVE_RTCD) extern void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])( const opus_val16 *x, @@ -57,6 +57,7 @@ extern void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])( int ord, int arch); +#define OVERRIDE_CELT_FIR # define celt_fir(x, num, y, N, ord, arch) \ ((*CELT_FIR_IMPL[(arch) & OPUS_ARCHMASK])(x, num, y, N, ord, arch)) diff --git a/third_party/opus/src/celt/x86/pitch_sse.h b/third_party/opus/src/celt/x86/pitch_sse.h index f7a014b6e03a..964aef50dbeb 100644 --- a/third_party/opus/src/celt/x86/pitch_sse.h +++ b/third_party/opus/src/celt/x86/pitch_sse.h @@ -63,7 +63,7 @@ void xcorr_kernel_sse( #define xcorr_kernel(x, y, sum, len, arch) \ ((void)arch, xcorr_kernel_sse(x, y, sum, len)) -#elif (defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)) || (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) +#elif defined(OPUS_HAVE_RTCD) && ((defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)) || (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT))) extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( const opus_val16 *x, @@ -115,8 +115,8 @@ opus_val32 celt_inner_prod_sse( ((void)arch, celt_inner_prod_sse(x, y, N)) -#elif ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) || \ - (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) +#elif defined(OPUS_HAVE_RTCD) && (((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) || \ + (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT))) extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( const opus_val16 *x, diff --git a/third_party/opus/src/celt/x86/pitch_sse4_1.c b/third_party/opus/src/celt/x86/pitch_sse4_1.c index a092c68b2433..2bc578304faf 100644 --- a/third_party/opus/src/celt/x86/pitch_sse4_1.c +++ b/third_party/opus/src/celt/x86/pitch_sse4_1.c @@ -117,6 +117,14 @@ void xcorr_kernel_sse4_1(const opus_val16 * x, const opus_val16 * y, opus_val32 __m128i sum0, sum1, sum2, sum3, vecSum; __m128i initSum; +#ifdef OPUS_CHECK_ASM + opus_val32 sum_c[4]; + for (j=0;j<4;j++) { + sum_c[j] = sum[j]; + } + xcorr_kernel_c(x, y, sum_c, len); +#endif + celt_assert(len >= 3); sum0 = _mm_setzero_si128(); @@ -177,19 +185,56 @@ void xcorr_kernel_sse4_1(const opus_val16 * x, const opus_val16 * y, opus_val32 vecSum = _mm_add_epi32(vecSum, sum2); } - for (;jint: Number of channels (1 or 2) in input signal - * @param [in] application int: Coding mode (@ref OPUS_APPLICATION_VOIP/@ref OPUS_APPLICATION_AUDIO/@ref OPUS_APPLICATION_RESTRICTED_LOWDELAY) + * @param [in] application int: Coding mode (one of @ref OPUS_APPLICATION_VOIP, @ref OPUS_APPLICATION_AUDIO, or @ref OPUS_APPLICATION_RESTRICTED_LOWDELAY) * @param [out] error int*: @ref opus_errorcodes * @note Regardless of the sampling rate and number channels selected, the Opus encoder * can switch to a lower audio bandwidth or number of channels if the bitrate @@ -222,7 +222,7 @@ OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusEncoder *opus_encoder_create( * This must be one of 8000, 12000, 16000, * 24000, or 48000. * @param [in] channels int: Number of channels (1 or 2) in input signal - * @param [in] application int: Coding mode (OPUS_APPLICATION_VOIP/OPUS_APPLICATION_AUDIO/OPUS_APPLICATION_RESTRICTED_LOWDELAY) + * @param [in] application int: Coding mode (one of OPUS_APPLICATION_VOIP, OPUS_APPLICATION_AUDIO, or OPUS_APPLICATION_RESTRICTED_LOWDELAY) * @retval #OPUS_OK Success or @ref opus_errorcodes */ OPUS_EXPORT int opus_encoder_init( diff --git a/third_party/opus/src/include/opus_custom.h b/third_party/opus/src/include/opus_custom.h index 2227be011bd2..2f22d4b35a7c 100644 --- a/third_party/opus/src/include/opus_custom.h +++ b/third_party/opus/src/include/opus_custom.h @@ -104,7 +104,8 @@ typedef struct OpusCustomDecoder OpusCustomDecoder; /** The mode contains all the information necessary to create an encoder. Both the encoder and decoder need to be initialized with exactly the same mode, otherwise the output will be - corrupted. + corrupted. The mode MUST NOT BE DESTROYED until the encoders and + decoders that use it are destroyed as well. @brief Mode configuration */ typedef struct OpusCustomMode OpusCustomMode; diff --git a/third_party/opus/src/include/opus_defines.h b/third_party/opus/src/include/opus_defines.h index ceee5b840cf5..94b9e0d9fcb1 100644 --- a/third_party/opus/src/include/opus_defines.h +++ b/third_party/opus/src/include/opus_defines.h @@ -482,7 +482,8 @@ extern "C" { * @param[in] x opus_int32: Allowed values: *
*
0
Disable inband FEC (default).
- *
1
Enable inband FEC.
+ *
1
Inband FEC enabled. If the packet loss rate is sufficiently high, Opus will automatically switch to SILK even at high rates to enable use of that FEC.
+ *
2
Inband FEC enabled, but does not necessarily switch to SILK if we have music.
*
* @hideinitializer */ #define OPUS_SET_INBAND_FEC(x) OPUS_SET_INBAND_FEC_REQUEST, __opus_check_int(x) @@ -491,7 +492,8 @@ extern "C" { * @param[out] x opus_int32 *: Returns one of the following values: *
*
0
Inband FEC disabled (default).
- *
1
Inband FEC enabled.
+ *
1
Inband FEC enabled. If the packet loss rate is sufficiently high, Opus will automatically switch to SILK even at high rates to enable use of that FEC.
+ *
2
Inband FEC enabled, but does not necessarily switch to SILK if we have music.
*
* @hideinitializer */ #define OPUS_GET_INBAND_FEC(x) OPUS_GET_INBAND_FEC_REQUEST, __opus_check_int_ptr(x) diff --git a/third_party/opus/src/meson.build b/third_party/opus/src/meson.build index 41f69353f047..ed66d3807edc 100644 --- a/third_party/opus/src/meson.build +++ b/third_party/opus/src/meson.build @@ -532,9 +532,9 @@ if not opt_intrinsics.disabled() endif # opt_rtcd else if opt_intrinsics.enabled() - error('intrinsics option enabled, but no intrinsics support for ' + host_machine.get_cpu()) + error('intrinsics option enabled, but no intrinsics support for ' + host_cpu_family) endif - warning('No intrinsics support for ' + host_machine.get_cpu()) + warning('No intrinsics support for ' + host_cpu_family) endif endif diff --git a/third_party/opus/src/meson/get-version.py b/third_party/opus/src/meson/get-version.py index 0e8b8623a2c7..d3835f134c81 100755 --- a/third_party/opus/src/meson/get-version.py +++ b/third_party/opus/src/meson/get-version.py @@ -31,7 +31,7 @@ # check if git checkout git_dir = os.path.join(srcroot, '.git') - is_git = os.path.isdir(git_dir) + is_git = os.path.isdir(git_dir) or os.path.isfile(git_dir) have_git = shutil.which('git') is not None if is_git and have_git: diff --git a/third_party/opus/src/opus.m4 b/third_party/opus/src/opus.m4 index 47f5ec49668c..263470d40fe4 100644 --- a/third_party/opus/src/opus.m4 +++ b/third_party/opus/src/opus.m4 @@ -63,7 +63,7 @@ dnl #include #include -int main () +int main (void) { system("touch conf.opustest"); return 0; diff --git a/third_party/opus/src/silk/MacroDebug.h b/third_party/opus/src/silk/MacroDebug.h index 8dd4ce2ee275..3110da9a7560 100644 --- a/third_party/opus/src/silk/MacroDebug.h +++ b/third_party/opus/src/silk/MacroDebug.h @@ -55,7 +55,7 @@ static OPUS_INLINE opus_int16 silk_ADD16_(opus_int16 a, opus_int16 b, char *file static OPUS_INLINE opus_int32 silk_ADD32_(opus_int32 a, opus_int32 b, char *file, int line){ opus_int32 ret; - ret = a + b; + ret = (opus_int32)((opus_uint32)a + (opus_uint32)b); if ( ret != silk_ADD_SAT32( a, b ) ) { fprintf (stderr, "silk_ADD32(%d, %d) in %s: line %d\n", a, b, file, line); @@ -101,9 +101,9 @@ static OPUS_INLINE opus_int16 silk_SUB16_(opus_int16 a, opus_int16 b, char *file #undef silk_SUB32 #define silk_SUB32(a,b) silk_SUB32_((a), (b), __FILE__, __LINE__) static OPUS_INLINE opus_int32 silk_SUB32_(opus_int32 a, opus_int32 b, char *file, int line){ - opus_int32 ret; + opus_int64 ret; - ret = a - b; + ret = a - (opus_int64)b; if ( ret != silk_SUB_SAT32( a, b ) ) { fprintf (stderr, "silk_SUB32(%d, %d) in %s: line %d\n", a, b, file, line); @@ -257,7 +257,7 @@ static OPUS_INLINE opus_int64 silk_SUB_SAT64_( opus_int64 a64, opus_int64 b64, c static OPUS_INLINE opus_int32 silk_MUL_(opus_int32 a32, opus_int32 b32, char *file, int line){ opus_int32 ret; opus_int64 ret64; - ret = a32 * b32; + ret = (opus_int32)((opus_uint32)a32 * (opus_uint32)b32); ret64 = (opus_int64)a32 * (opus_int64)b32; if ( (opus_int64)ret != ret64 ) { @@ -333,8 +333,8 @@ static OPUS_INLINE opus_int32 silk_SMULWB_(opus_int32 a32, opus_int32 b32, char #define silk_SMLAWB(a,b,c) silk_SMLAWB_((a), (b), (c), __FILE__, __LINE__) static OPUS_INLINE opus_int32 silk_SMLAWB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ opus_int32 ret; - ret = silk_ADD32( a32, silk_SMULWB( b32, c32 ) ); - if ( silk_ADD32( a32, silk_SMULWB( b32, c32 ) ) != silk_ADD_SAT32( a32, silk_SMULWB( b32, c32 ) ) ) + ret = silk_ADD32_ovflw( a32, silk_SMULWB( b32, c32 ) ); + if ( ret != silk_ADD_SAT32( a32, silk_SMULWB( b32, c32 ) ) ) { fprintf (stderr, "silk_SMLAWB(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); #ifdef FIXED_DEBUG_ASSERT @@ -465,7 +465,7 @@ static OPUS_INLINE opus_int32 silk_SMULWW_(opus_int32 a32, opus_int32 b32, char if ( fail ) { - fprintf (stderr, "silk_SMULWT(%d, %d) in %s: line %d\n", a32, b32, file, line); + fprintf (stderr, "silk_SMULWW(%d, %d) in %s: line %d\n", a32, b32, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif @@ -491,12 +491,6 @@ static OPUS_INLINE opus_int32 silk_SMLAWW_(opus_int32 a32, opus_int32 b32, opus_ return ret; } -/* Multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode) */ -#undef silk_MLA_ovflw -#define silk_MLA_ovflw(a32, b32, c32) ((a32) + ((b32) * (c32))) -#undef silk_SMLABB_ovflw -#define silk_SMLABB_ovflw(a32, b32, c32) ((a32) + ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32))) - /* no checking needed for silk_SMULL no checking needed for silk_SMLAL no checking needed for silk_SMLALBB @@ -546,10 +540,10 @@ static OPUS_INLINE opus_int32 silk_DIV32_16_(opus_int32 a32, opus_int32 b32, cha static OPUS_INLINE opus_int8 silk_LSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){ opus_int8 ret; int fail = 0; - ret = a << shift; + ret = (opus_int8)((opus_uint8)a << shift); fail |= shift < 0; fail |= shift >= 8; - fail |= (opus_int64)ret != ((opus_int64)a) << shift; + fail |= (opus_int64)ret != (opus_int64)(((opus_uint64)a) << shift); if ( fail ) { fprintf (stderr, "silk_LSHIFT8(%d, %d) in %s: line %d\n", a, shift, file, line); @@ -565,10 +559,10 @@ static OPUS_INLINE opus_int8 silk_LSHIFT8_(opus_int8 a, opus_int32 shift, char * static OPUS_INLINE opus_int16 silk_LSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){ opus_int16 ret; int fail = 0; - ret = a << shift; + ret = (opus_int16)((opus_uint16)a << shift); fail |= shift < 0; fail |= shift >= 16; - fail |= (opus_int64)ret != ((opus_int64)a) << shift; + fail |= (opus_int64)ret != (opus_int64)(((opus_uint64)a) << shift); if ( fail ) { fprintf (stderr, "silk_LSHIFT16(%d, %d) in %s: line %d\n", a, shift, file, line); @@ -584,10 +578,10 @@ static OPUS_INLINE opus_int16 silk_LSHIFT16_(opus_int16 a, opus_int32 shift, cha static OPUS_INLINE opus_int32 silk_LSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){ opus_int32 ret; int fail = 0; - ret = a << shift; + ret = (opus_int32)((opus_uint32)a << shift); fail |= shift < 0; fail |= shift >= 32; - fail |= (opus_int64)ret != ((opus_int64)a) << shift; + fail |= (opus_int64)ret != (opus_int64)(((opus_uint64)a) << shift); if ( fail ) { fprintf (stderr, "silk_LSHIFT32(%d, %d) in %s: line %d\n", a, shift, file, line); @@ -603,7 +597,7 @@ static OPUS_INLINE opus_int32 silk_LSHIFT32_(opus_int32 a, opus_int32 shift, cha static OPUS_INLINE opus_int64 silk_LSHIFT64_(opus_int64 a, opus_int shift, char *file, int line){ opus_int64 ret; int fail = 0; - ret = a << shift; + ret = (opus_int64)((opus_uint64)a << shift); fail |= shift < 0; fail |= shift >= 64; fail |= (ret>>shift) != ((opus_int64)a); @@ -714,8 +708,8 @@ static OPUS_INLINE opus_uint32 silk_RSHIFT_uint_(opus_uint32 a, opus_int32 shift #define silk_ADD_LSHIFT(a,b,c) silk_ADD_LSHIFT_((a), (b), (c), __FILE__, __LINE__) static OPUS_INLINE int silk_ADD_LSHIFT_(int a, int b, int shift, char *file, int line){ opus_int16 ret; - ret = a + (b << shift); - if ( (shift < 0) || (shift>15) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) ) + ret = a + (opus_int16)((opus_uint16)b << shift); + if ( (shift < 0) || (shift>15) || ((opus_int64)ret != (opus_int64)a + (opus_int64)(((opus_uint64)b) << shift)) ) { fprintf (stderr, "silk_ADD_LSHIFT(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); #ifdef FIXED_DEBUG_ASSERT @@ -729,8 +723,8 @@ static OPUS_INLINE int silk_ADD_LSHIFT_(int a, int b, int shift, char *file, int #define silk_ADD_LSHIFT32(a,b,c) silk_ADD_LSHIFT32_((a), (b), (c), __FILE__, __LINE__) static OPUS_INLINE opus_int32 silk_ADD_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ opus_int32 ret; - ret = a + (b << shift); - if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) ) + ret = silk_ADD32_ovflw(a, (opus_int32)((opus_uint32)b << shift)); + if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a + (opus_int64)(((opus_uint64)b) << shift)) ) { fprintf (stderr, "silk_ADD_LSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); #ifdef FIXED_DEBUG_ASSERT @@ -774,7 +768,7 @@ static OPUS_INLINE int silk_ADD_RSHIFT_(int a, int b, int shift, char *file, int #define silk_ADD_RSHIFT32(a,b,c) silk_ADD_RSHIFT32_((a), (b), (c), __FILE__, __LINE__) static OPUS_INLINE opus_int32 silk_ADD_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ opus_int32 ret; - ret = a + (b >> shift); + ret = silk_ADD32_ovflw(a, (b >> shift)); if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) ) { fprintf (stderr, "silk_ADD_RSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); @@ -804,8 +798,8 @@ static OPUS_INLINE opus_uint32 silk_ADD_RSHIFT_uint_(opus_uint32 a, opus_uint32 #define silk_SUB_LSHIFT32(a,b,c) silk_SUB_LSHIFT32_((a), (b), (c), __FILE__, __LINE__) static OPUS_INLINE opus_int32 silk_SUB_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ opus_int32 ret; - ret = a - (b << shift); - if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a - (((opus_int64)b) << shift)) ) + ret = silk_SUB32_ovflw(a, (opus_int32)((opus_uint32)b << shift)); + if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a - (opus_int64)(((opus_uint64)b) << shift)) ) { fprintf (stderr, "silk_SUB_LSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); #ifdef FIXED_DEBUG_ASSERT @@ -819,7 +813,7 @@ static OPUS_INLINE opus_int32 silk_SUB_LSHIFT32_(opus_int32 a, opus_int32 b, opu #define silk_SUB_RSHIFT32(a,b,c) silk_SUB_RSHIFT32_((a), (b), (c), __FILE__, __LINE__) static OPUS_INLINE opus_int32 silk_SUB_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ opus_int32 ret; - ret = a - (b >> shift); + ret = silk_SUB32_ovflw(a, (b >> shift)); if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a - (((opus_int64)b) >> shift)) ) { fprintf (stderr, "silk_SUB_RSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); @@ -835,7 +829,7 @@ static OPUS_INLINE opus_int32 silk_SUB_RSHIFT32_(opus_int32 a, opus_int32 b, opu static OPUS_INLINE opus_int32 silk_RSHIFT_ROUND_(opus_int32 a, opus_int32 shift, char *file, int line){ opus_int32 ret; ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1; - /* the marco definition can't handle a shift of zero */ + /* the macro definition can't handle a shift of zero */ if ( (shift <= 0) || (shift>31) || ((opus_int64)ret != ((opus_int64)a + ((opus_int64)1 << (shift - 1))) >> shift) ) { fprintf (stderr, "silk_RSHIFT_ROUND(%d, %d) in %s: line %d\n", a, shift, file, line); @@ -850,7 +844,7 @@ static OPUS_INLINE opus_int32 silk_RSHIFT_ROUND_(opus_int32 a, opus_int32 shift, #define silk_RSHIFT_ROUND64(a,b) silk_RSHIFT_ROUND64_((a), (b), __FILE__, __LINE__) static OPUS_INLINE opus_int64 silk_RSHIFT_ROUND64_(opus_int64 a, opus_int32 shift, char *file, int line){ opus_int64 ret; - /* the marco definition can't handle a shift of zero */ + /* the macro definition can't handle a shift of zero */ if ( (shift <= 0) || (shift>=64) ) { fprintf (stderr, "silk_RSHIFT_ROUND64(%lld, %d) in %s: line %d\n", (long long)a, shift, file, line); diff --git a/third_party/opus/src/silk/NSQ.c b/third_party/opus/src/silk/NSQ.c index 1d64d8e257b8..45dd45ce8dc6 100644 --- a/third_party/opus/src/silk/NSQ.c +++ b/third_party/opus/src/silk/NSQ.c @@ -75,21 +75,21 @@ static OPUS_INLINE void silk_noise_shape_quantizer( void silk_NSQ_c ( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ const opus_int16 x16[], /* I Input */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ ) { opus_int k, lag, start_idx, LSF_interpolation_flag; @@ -173,9 +173,9 @@ void silk_NSQ_c RESTORE_STACK; } -/***********************************/ -/* silk_noise_shape_quantizer */ -/***********************************/ +/******************************/ +/* silk_noise_shape_quantizer */ +/******************************/ #if !defined(OPUS_X86_MAY_HAVE_SSE4_1) static OPUS_INLINE @@ -262,7 +262,7 @@ void silk_noise_shape_quantizer( tmp1 = silk_SUB32( tmp1, n_LF_Q12 ); /* Q12 */ if( lag > 0 ) { /* Symmetric, packed FIR coefficients */ - n_LTP_Q13 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); + n_LTP_Q13 = silk_SMULWB( silk_ADD_SAT32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); n_LTP_Q13 = silk_SMLAWT( n_LTP_Q13, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); n_LTP_Q13 = silk_LSHIFT( n_LTP_Q13, 1 ); shp_lag_ptr++; diff --git a/third_party/opus/src/silk/NSQ_del_dec.c b/third_party/opus/src/silk/NSQ_del_dec.c index fc6500c9afb4..e9ece38f7e24 100644 --- a/third_party/opus/src/silk/NSQ_del_dec.c +++ b/third_party/opus/src/silk/NSQ_del_dec.c @@ -115,21 +115,21 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( ); void silk_NSQ_del_dec_c( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ const opus_int16 x16[], /* I Input */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ ) { opus_int i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr; diff --git a/third_party/opus/src/silk/SigProc_FIX.h b/third_party/opus/src/silk/SigProc_FIX.h index f9ae326326d3..fbdfa82e263a 100644 --- a/third_party/opus/src/silk/SigProc_FIX.h +++ b/third_party/opus/src/silk/SigProc_FIX.h @@ -381,7 +381,7 @@ opus_int32 silk_inner_prod_aligned_scale( const opus_int len /* I vector lengths */ ); -opus_int64 silk_inner_prod16_aligned_64_c( +opus_int64 silk_inner_prod16_c( const opus_int16 *inVec1, /* I input vector 1 */ const opus_int16 *inVec2, /* I input vector 2 */ const opus_int len /* I vector lengths */ @@ -609,12 +609,14 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b) /* the following seems faster on x86 */ #define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32) -#if !defined(OPUS_X86_MAY_HAVE_SSE4_1) +#if !defined(OVERRIDE_silk_burg_modified) #define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \ ((void)(arch), silk_burg_modified_c(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch)) +#endif -#define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \ - ((void)(arch),silk_inner_prod16_aligned_64_c(inVec1, inVec2, len)) +#if !defined(OVERRIDE_silk_inner_prod16) +#define silk_inner_prod16(inVec1, inVec2, len, arch) \ + ((void)(arch),silk_inner_prod16_c(inVec1, inVec2, len)) #endif #include "Inlines.h" diff --git a/third_party/opus/src/silk/VQ_WMat_EC.c b/third_party/opus/src/silk/VQ_WMat_EC.c index 0f3d545c4ef8..245a7e4b06d1 100644 --- a/third_party/opus/src/silk/VQ_WMat_EC.c +++ b/third_party/opus/src/silk/VQ_WMat_EC.c @@ -64,7 +64,7 @@ void silk_VQ_WMat_EC_c( *rate_dist_Q8 = silk_int32_MAX; *res_nrg_Q15 = silk_int32_MAX; cb_row_Q7 = cb_Q7; - /* In things go really bad, at least *ind is set to something safe. */ + /* If things go really bad, at least *ind is set to something safe. */ *ind = 0; for( k = 0; k < L; k++ ) { opus_int32 penalty; @@ -115,7 +115,7 @@ void silk_VQ_WMat_EC_c( if( sum1_Q15 >= 0 ) { /* Translate residual energy to bits using high-rate assumption (6 dB ==> 1 bit/sample) */ bits_res_Q8 = silk_SMULBB( subfr_len, silk_lin2log( sum1_Q15 + penalty) - (15 << 7) ); - /* In the following line we reduce the codelength component by half ("-1"); seems to slghtly improve quality */ + /* In the following line we reduce the codelength component by half ("-1"); seems to slightly improve quality */ bits_tot_Q8 = silk_ADD_LSHIFT32( bits_res_Q8, cl_Q5[ k ], 3-1 ); if( bits_tot_Q8 <= *rate_dist_Q8 ) { *rate_dist_Q8 = bits_tot_Q8; diff --git a/third_party/opus/src/silk/control_codec.c b/third_party/opus/src/silk/control_codec.c index 52aa8fded355..784ffe66d665 100644 --- a/third_party/opus/src/silk/control_codec.c +++ b/third_party/opus/src/silk/control_codec.c @@ -415,7 +415,7 @@ static OPUS_INLINE opus_int silk_setup_LBRR( /* Previous packet did not have LBRR, and was therefore coded at a higher bitrate */ psEncC->LBRR_GainIncreases = 7; } else { - psEncC->LBRR_GainIncreases = silk_max_int( 7 - silk_SMULWB( (opus_int32)psEncC->PacketLoss_perc, SILK_FIX_CONST( 0.4, 16 ) ), 2 ); + psEncC->LBRR_GainIncreases = silk_max_int( 7 - silk_SMULWB( (opus_int32)psEncC->PacketLoss_perc, SILK_FIX_CONST( 0.2, 16 ) ), 3 ); } } diff --git a/third_party/opus/src/silk/enc_API.c b/third_party/opus/src/silk/enc_API.c index 55a33f37e980..548e07364d22 100644 --- a/third_party/opus/src/silk/enc_API.c +++ b/third_party/opus/src/silk/enc_API.c @@ -270,6 +270,7 @@ opus_int silk_Encode( /* O Returns error co psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 ); ALLOC( buf, nSamplesFromInputMax, opus_int16 ); while( 1 ) { + int curr_nBitsUsedLBRR = 0; nSamplesToBuffer = psEnc->state_Fxx[ 0 ].sCmn.frame_length - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx; nSamplesToBuffer = silk_min( nSamplesToBuffer, nSamplesToBufferMax ); nSamplesFromInput = silk_DIV32_16( nSamplesToBuffer * psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 ); @@ -342,6 +343,7 @@ opus_int silk_Encode( /* O Returns error co opus_uint8 iCDF[ 2 ] = { 0, 0 }; iCDF[ 0 ] = 256 - silk_RSHIFT( 256, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal ); ec_enc_icdf( psRangeEnc, 0, iCDF, 8 ); + curr_nBitsUsedLBRR = ec_tell( psRangeEnc ); /* Encode any LBRR data from previous packet */ /* Encode LBRR flags */ @@ -386,8 +388,7 @@ opus_int silk_Encode( /* O Returns error co for( n = 0; n < encControl->nChannelsInternal; n++ ) { silk_memset( psEnc->state_Fxx[ n ].sCmn.LBRR_flags, 0, sizeof( psEnc->state_Fxx[ n ].sCmn.LBRR_flags ) ); } - - psEnc->nBitsUsedLBRR = ec_tell( psRangeEnc ); + curr_nBitsUsedLBRR = ec_tell( psRangeEnc ) - curr_nBitsUsedLBRR; } silk_HP_variable_cutoff( psEnc->state_Fxx ); @@ -396,6 +397,16 @@ opus_int silk_Encode( /* O Returns error co nBits = silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 ); /* Subtract bits used for LBRR */ if( !prefillFlag ) { + /* psEnc->nBitsUsedLBRR is an exponential moving average of the LBRR usage, + except that for the first LBRR frame it does no averaging and for the first + frame after after LBRR, it goes back to zero immediately. */ + if ( curr_nBitsUsedLBRR < 10 ) { + psEnc->nBitsUsedLBRR = 0; + } else if ( psEnc->nBitsUsedLBRR < 10) { + psEnc->nBitsUsedLBRR = curr_nBitsUsedLBRR; + } else { + psEnc->nBitsUsedLBRR = ( psEnc->nBitsUsedLBRR + curr_nBitsUsedLBRR ) / 2; + } nBits -= psEnc->nBitsUsedLBRR; } /* Divide by number of uncoded frames left in packet */ diff --git a/third_party/opus/src/silk/fixed/LTP_scale_ctrl_FIX.c b/third_party/opus/src/silk/fixed/LTP_scale_ctrl_FIX.c index 3dcedef89189..db1016e0b20c 100644 --- a/third_party/opus/src/silk/fixed/LTP_scale_ctrl_FIX.c +++ b/third_party/opus/src/silk/fixed/LTP_scale_ctrl_FIX.c @@ -42,9 +42,14 @@ void silk_LTP_scale_ctrl_FIX( if( condCoding == CODE_INDEPENDENTLY ) { /* Only scale if first frame in packet */ - round_loss = psEnc->sCmn.PacketLoss_perc + psEnc->sCmn.nFramesPerPacket; - psEnc->sCmn.indices.LTP_scaleIndex = (opus_int8)silk_LIMIT( - silk_SMULWB( silk_SMULBB( round_loss, psEncCtrl->LTPredCodGain_Q7 ), SILK_FIX_CONST( 0.1, 9 ) ), 0, 2 ); + round_loss = psEnc->sCmn.PacketLoss_perc * psEnc->sCmn.nFramesPerPacket; + if ( psEnc->sCmn.LBRR_flag ) { + /* LBRR reduces the effective loss. In practice, it does not square the loss because + losses aren't independent, but that still seems to work best. We also never go below 2%. */ + round_loss = 2 + silk_SMULBB( round_loss, round_loss ) / 100; + } + psEnc->sCmn.indices.LTP_scaleIndex = silk_SMULBB( psEncCtrl->LTPredCodGain_Q7, round_loss ) > silk_log2lin( 128*7 + 2900-psEnc->sCmn.SNR_dB_Q7 ); + psEnc->sCmn.indices.LTP_scaleIndex += silk_SMULBB( psEncCtrl->LTPredCodGain_Q7, round_loss ) > silk_log2lin( 128*7 + 3900-psEnc->sCmn.SNR_dB_Q7 ); } else { /* Default is minimum scaling */ psEnc->sCmn.indices.LTP_scaleIndex = 0; diff --git a/third_party/opus/src/silk/fixed/burg_modified_FIX.c b/third_party/opus/src/silk/fixed/burg_modified_FIX.c index 274d4b28e112..185a12b178f7 100644 --- a/third_party/opus/src/silk/fixed/burg_modified_FIX.c +++ b/third_party/opus/src/silk/fixed/burg_modified_FIX.c @@ -68,7 +68,7 @@ void silk_burg_modified_c( celt_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); /* Compute autocorrelations, added over subframes */ - C0_64 = silk_inner_prod16_aligned_64( x, x, subfr_length*nb_subfr, arch ); + C0_64 = silk_inner_prod16( x, x, subfr_length*nb_subfr, arch ); lz = silk_CLZ64(C0_64); rshifts = 32 + 1 + N_BITS_HEAD_ROOM - lz; if (rshifts > MAX_RSHIFTS) rshifts = MAX_RSHIFTS; @@ -87,7 +87,7 @@ void silk_burg_modified_c( x_ptr = x + s * subfr_length; for( n = 1; n < D + 1; n++ ) { C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64( - silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts ); + silk_inner_prod16( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts ); } } } else { @@ -150,7 +150,7 @@ void silk_burg_modified_c( C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */ - /* We sometimes have get overflows in the multiplications (even beyond +/- 2^32), + /* We sometimes get overflows in the multiplications (even beyond +/- 2^32), but they cancel each other and the real result seems to always fit in a 32-bit signed integer. This was determined experimentally, not theoretically (unfortunately). */ tmp1 = silk_MLA_ovflw( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */ @@ -253,7 +253,7 @@ void silk_burg_modified_c( if( rshifts > 0 ) { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; - C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D, arch ), rshifts ); + C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16( x_ptr, x_ptr, D, arch ), rshifts ); } } else { for( s = 0; s < nb_subfr; s++ ) { diff --git a/third_party/opus/src/silk/fixed/find_pred_coefs_FIX.c b/third_party/opus/src/silk/fixed/find_pred_coefs_FIX.c index 606d86334760..ad363fb7bca3 100644 --- a/third_party/opus/src/silk/fixed/find_pred_coefs_FIX.c +++ b/third_party/opus/src/silk/fixed/find_pred_coefs_FIX.c @@ -42,7 +42,8 @@ void silk_find_pred_coefs_FIX( { opus_int i; opus_int32 invGains_Q16[ MAX_NB_SUBFR ], local_gains[ MAX_NB_SUBFR ]; - opus_int16 NLSF_Q15[ MAX_LPC_ORDER ]; + /* Set to NLSF_Q15 to zero so we don't copy junk to the state. */ + opus_int16 NLSF_Q15[ MAX_LPC_ORDER ]={0}; const opus_int16 *x_ptr; opus_int16 *x_pre_ptr; VARDECL( opus_int16, LPC_in_pre ); diff --git a/third_party/opus/src/silk/fixed/vector_ops_FIX.c b/third_party/opus/src/silk/fixed/vector_ops_FIX.c index d94980014f62..dcf84070a6b0 100644 --- a/third_party/opus/src/silk/fixed/vector_ops_FIX.c +++ b/third_party/opus/src/silk/fixed/vector_ops_FIX.c @@ -87,7 +87,7 @@ opus_int32 silk_inner_prod_aligned( #endif } -opus_int64 silk_inner_prod16_aligned_64_c( +opus_int64 silk_inner_prod16_c( const opus_int16 *inVec1, /* I input vector 1 */ const opus_int16 *inVec2, /* I input vector 2 */ const opus_int len /* I vector lengths */ diff --git a/third_party/opus/src/silk/fixed/x86/burg_modified_FIX_sse4_1.c b/third_party/opus/src/silk/fixed/x86/burg_modified_FIX_sse4_1.c index bbb1ce0fcc45..e58bf079e5a2 100644 --- a/third_party/opus/src/silk/fixed/x86/burg_modified_FIX_sse4_1.c +++ b/third_party/opus/src/silk/fixed/x86/burg_modified_FIX_sse4_1.c @@ -1,5 +1,5 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang +/* Copyright (c) 2014-2020, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang FrancisQuiers Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -42,7 +42,7 @@ #define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */ #define QA 25 -#define N_BITS_HEAD_ROOM 2 +#define N_BITS_HEAD_ROOM 3 #define MIN_RSHIFTS -16 #define MAX_RSHIFTS (32 - QA) @@ -59,7 +59,7 @@ void silk_burg_modified_sse4_1( int arch /* I Run-time architecture */ ) { - opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain; + opus_int k, n, s, lz, rshifts, reached_max_gain; opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2; const opus_int16 *x_ptr; opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ]; @@ -68,6 +68,7 @@ void silk_burg_modified_sse4_1( opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ]; opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ]; opus_int32 xcorr[ SILK_MAX_ORDER_LPC ]; + opus_int64 C0_64; __m128i FIRST_3210, LAST_3210, ATMP_3210, TMP1_3210, TMP2_3210, T1_3210, T2_3210, PTR_3210, SUBFR_3210, X1_3210, X2_3210; __m128i CONST1 = _mm_set1_epi32(1); @@ -75,23 +76,18 @@ void silk_burg_modified_sse4_1( celt_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); /* Compute autocorrelations, added over subframes */ - silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length ); - if( rshifts > MAX_RSHIFTS ) { - C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS ); - silk_assert( C0 > 0 ); - rshifts = MAX_RSHIFTS; + C0_64 = silk_inner_prod16( x, x, subfr_length*nb_subfr, arch ); + lz = silk_CLZ64(C0_64); + rshifts = 32 + 1 + N_BITS_HEAD_ROOM - lz; + if (rshifts > MAX_RSHIFTS) rshifts = MAX_RSHIFTS; + if (rshifts < MIN_RSHIFTS) rshifts = MIN_RSHIFTS; + + if (rshifts > 0) { + C0 = (opus_int32)silk_RSHIFT64(C0_64, rshifts ); } else { - lz = silk_CLZ32( C0 ) - 1; - rshifts_extra = N_BITS_HEAD_ROOM - lz; - if( rshifts_extra > 0 ) { - rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts ); - C0 = silk_RSHIFT32( C0, rshifts_extra ); - } else { - rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts ); - C0 = silk_LSHIFT32( C0, -rshifts_extra ); - } - rshifts += rshifts_extra; + C0 = silk_LSHIFT32((opus_int32)C0_64, -rshifts ); } + CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); if( rshifts > 0 ) { @@ -99,7 +95,7 @@ void silk_burg_modified_sse4_1( x_ptr = x + s * subfr_length; for( n = 1; n < D + 1; n++ ) { C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64( - silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts ); + silk_inner_prod16( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts ); } } } else { @@ -203,8 +199,11 @@ void silk_burg_modified_sse4_1( C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */ - tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */ - tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */ + /* We sometimes get overflows in the multiplications (even beyond +/- 2^32), + but they cancel each other and the real result seems to always fit in a 32-bit + signed integer. This was determined experimentally, not theoretically (unfortunately). */ + tmp1 = silk_MLA_ovflw( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */ + tmp2 = silk_MLA_ovflw( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */ } tmp1 = -tmp1; /* Q17 */ @@ -350,7 +349,7 @@ void silk_burg_modified_sse4_1( if( rshifts > 0 ) { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; - C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D, arch ), rshifts ); + C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16( x_ptr, x_ptr, D, arch ), rshifts ); } } else { for( s = 0; s < nb_subfr; s++ ) { @@ -374,4 +373,28 @@ void silk_burg_modified_sse4_1( *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */ *res_nrg_Q = -rshifts; } + +#ifdef OPUS_CHECK_ASM + { + opus_int32 res_nrg_c = 0; + opus_int res_nrg_Q_c = 0; + opus_int32 A_Q16_c[ MAX_LPC_ORDER ] = {0}; + + silk_burg_modified_c( + &res_nrg_c, + &res_nrg_Q_c, + A_Q16_c, + x, + minInvGain_Q30, + subfr_length, + nb_subfr, + D, + 0 + ); + + silk_assert( *res_nrg == res_nrg_c ); + silk_assert( *res_nrg_Q == res_nrg_Q_c ); + silk_assert( !memcmp( A_Q16, A_Q16_c, D * sizeof( *A_Q16 ) ) ); + } +#endif } diff --git a/third_party/opus/src/silk/fixed/x86/prefilter_FIX_sse.c b/third_party/opus/src/silk/fixed/x86/prefilter_FIX_sse.c deleted file mode 100644 index 555432cd9657..000000000000 --- a/third_party/opus/src/silk/fixed/x86/prefilter_FIX_sse.c +++ /dev/null @@ -1,160 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include -#include -#include "main.h" -#include "celt/x86/x86cpu.h" - -void silk_warped_LPC_analysis_filter_FIX_sse4_1( - opus_int32 state[], /* I/O State [order + 1] */ - opus_int32 res_Q2[], /* O Residual signal [length] */ - const opus_int16 coef_Q13[], /* I Coefficients [order] */ - const opus_int16 input[], /* I Input signal [length] */ - const opus_int16 lambda_Q16, /* I Warping factor */ - const opus_int length, /* I Length of input signal */ - const opus_int order /* I Filter order (even) */ -) -{ - opus_int n, i; - opus_int32 acc_Q11, tmp1, tmp2; - - /* Order must be even */ - celt_assert( ( order & 1 ) == 0 ); - - if (order == 10) - { - if (0 == lambda_Q16) - { - __m128i coef_Q13_3210, coef_Q13_7654; - __m128i coef_Q13_0123, coef_Q13_4567; - __m128i state_0123, state_4567; - __m128i xmm_product1, xmm_product2; - __m128i xmm_tempa, xmm_tempb; - - register opus_int32 sum; - register opus_int32 state_8, state_9, state_a; - register opus_int64 coef_Q13_8, coef_Q13_9; - - celt_assert( length > 0 ); - - coef_Q13_3210 = OP_CVTEPI16_EPI32_M64( &coef_Q13[ 0 ] ); - coef_Q13_7654 = OP_CVTEPI16_EPI32_M64( &coef_Q13[ 4 ] ); - - coef_Q13_0123 = _mm_shuffle_epi32( coef_Q13_3210, _MM_SHUFFLE( 0, 1, 2, 3 ) ); - coef_Q13_4567 = _mm_shuffle_epi32( coef_Q13_7654, _MM_SHUFFLE( 0, 1, 2, 3 ) ); - - coef_Q13_8 = (opus_int64) coef_Q13[ 8 ]; - coef_Q13_9 = (opus_int64) coef_Q13[ 9 ]; - - state_0123 = _mm_loadu_si128( (__m128i *)(&state[ 0 ] ) ); - state_4567 = _mm_loadu_si128( (__m128i *)(&state[ 4 ] ) ); - - state_0123 = _mm_shuffle_epi32( state_0123, _MM_SHUFFLE( 0, 1, 2, 3 ) ); - state_4567 = _mm_shuffle_epi32( state_4567, _MM_SHUFFLE( 0, 1, 2, 3 ) ); - - state_8 = state[ 8 ]; - state_9 = state[ 9 ]; - state_a = 0; - - for( n = 0; n < length; n++ ) - { - xmm_product1 = _mm_mul_epi32( coef_Q13_0123, state_0123 ); /* 64-bit multiply, only 2 pairs */ - xmm_product2 = _mm_mul_epi32( coef_Q13_4567, state_4567 ); - - xmm_tempa = _mm_shuffle_epi32( state_0123, _MM_SHUFFLE( 0, 1, 2, 3 ) ); - xmm_tempb = _mm_shuffle_epi32( state_4567, _MM_SHUFFLE( 0, 1, 2, 3 ) ); - - xmm_product1 = _mm_srli_epi64( xmm_product1, 16 ); /* >> 16, zero extending works */ - xmm_product2 = _mm_srli_epi64( xmm_product2, 16 ); - - xmm_tempa = _mm_mul_epi32( coef_Q13_3210, xmm_tempa ); - xmm_tempb = _mm_mul_epi32( coef_Q13_7654, xmm_tempb ); - - xmm_tempa = _mm_srli_epi64( xmm_tempa, 16 ); - xmm_tempb = _mm_srli_epi64( xmm_tempb, 16 ); - - xmm_tempa = _mm_add_epi32( xmm_tempa, xmm_product1 ); - xmm_tempb = _mm_add_epi32( xmm_tempb, xmm_product2 ); - xmm_tempa = _mm_add_epi32( xmm_tempa, xmm_tempb ); - - sum = (opus_int32)((coef_Q13_8 * state_8) >> 16); - sum += (opus_int32)((coef_Q13_9 * state_9) >> 16); - - xmm_tempa = _mm_add_epi32( xmm_tempa, _mm_shuffle_epi32( xmm_tempa, _MM_SHUFFLE( 0, 0, 0, 2 ) ) ); - sum += _mm_cvtsi128_si32( xmm_tempa); - res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( ( 5 + sum ), 9); - - /* move right */ - state_a = state_9; - state_9 = state_8; - state_8 = _mm_cvtsi128_si32( state_4567 ); - state_4567 = _mm_alignr_epi8( state_0123, state_4567, 4 ); - - state_0123 = _mm_alignr_epi8( _mm_cvtsi32_si128( silk_LSHIFT( input[ n ], 14 ) ), state_0123, 4 ); - } - - _mm_storeu_si128( (__m128i *)( &state[ 0 ] ), _mm_shuffle_epi32( state_0123, _MM_SHUFFLE( 0, 1, 2, 3 ) ) ); - _mm_storeu_si128( (__m128i *)( &state[ 4 ] ), _mm_shuffle_epi32( state_4567, _MM_SHUFFLE( 0, 1, 2, 3 ) ) ); - state[ 8 ] = state_8; - state[ 9 ] = state_9; - state[ 10 ] = state_a; - - return; - } - } - - for( n = 0; n < length; n++ ) { - /* Output of lowpass section */ - tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 ); - state[ 0 ] = silk_LSHIFT( input[ n ], 14 ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 ); - state[ 1 ] = tmp2; - acc_Q11 = silk_RSHIFT( order, 1 ); - acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] ); - /* Loop over allpass sections */ - for( i = 2; i < order; i += 2 ) { - /* Output of allpass section */ - tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 ); - state[ i ] = tmp1; - acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 ); - state[ i + 1 ] = tmp2; - acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] ); - } - state[ order ] = tmp1; - acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] ); - res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 ); - } -} diff --git a/third_party/opus/src/silk/fixed/x86/vector_ops_FIX_sse4_1.c b/third_party/opus/src/silk/fixed/x86/vector_ops_FIX_sse4_1.c index c1e90564d0ed..a46289bbc4ba 100644 --- a/third_party/opus/src/silk/fixed/x86/vector_ops_FIX_sse4_1.c +++ b/third_party/opus/src/silk/fixed/x86/vector_ops_FIX_sse4_1.c @@ -36,40 +36,38 @@ #include "SigProc_FIX.h" #include "pitch.h" +#include "celt/x86/x86cpu.h" -opus_int64 silk_inner_prod16_aligned_64_sse4_1( +opus_int64 silk_inner_prod16_sse4_1( const opus_int16 *inVec1, /* I input vector 1 */ const opus_int16 *inVec2, /* I input vector 2 */ const opus_int len /* I vector lengths */ ) { - opus_int i, dataSize8; + opus_int i, dataSize4; opus_int64 sum; - __m128i xmm_tempa; - __m128i inVec1_76543210, acc1; - __m128i inVec2_76543210, acc2; + __m128i xmm_prod_20, xmm_prod_31; + __m128i inVec1_3210, acc1; + __m128i inVec2_3210, acc2; sum = 0; - dataSize8 = len & ~7; + dataSize4 = len & ~3; acc1 = _mm_setzero_si128(); acc2 = _mm_setzero_si128(); - for( i = 0; i < dataSize8; i += 8 ) { - inVec1_76543210 = _mm_loadu_si128( (__m128i *)(&inVec1[i + 0] ) ); - inVec2_76543210 = _mm_loadu_si128( (__m128i *)(&inVec2[i + 0] ) ); + for( i = 0; i < dataSize4; i += 4 ) { + inVec1_3210 = OP_CVTEPI16_EPI32_M64( &inVec1[i + 0] ); + inVec2_3210 = OP_CVTEPI16_EPI32_M64( &inVec2[i + 0] ); + xmm_prod_20 = _mm_mul_epi32( inVec1_3210, inVec2_3210 ); - /* only when all 4 operands are -32768 (0x8000), this results in wrap around */ - inVec1_76543210 = _mm_madd_epi16( inVec1_76543210, inVec2_76543210 ); + inVec1_3210 = _mm_shuffle_epi32( inVec1_3210, _MM_SHUFFLE( 0, 3, 2, 1 ) ); + inVec2_3210 = _mm_shuffle_epi32( inVec2_3210, _MM_SHUFFLE( 0, 3, 2, 1 ) ); + xmm_prod_31 = _mm_mul_epi32( inVec1_3210, inVec2_3210 ); - xmm_tempa = _mm_cvtepi32_epi64( inVec1_76543210 ); - /* equal shift right 8 bytes */ - inVec1_76543210 = _mm_shuffle_epi32( inVec1_76543210, _MM_SHUFFLE( 0, 0, 3, 2 ) ); - inVec1_76543210 = _mm_cvtepi32_epi64( inVec1_76543210 ); - - acc1 = _mm_add_epi64( acc1, xmm_tempa ); - acc2 = _mm_add_epi64( acc2, inVec1_76543210 ); + acc1 = _mm_add_epi64( acc1, xmm_prod_20 ); + acc2 = _mm_add_epi64( acc2, xmm_prod_31 ); } acc1 = _mm_add_epi64( acc1, acc2 ); @@ -81,8 +79,15 @@ opus_int64 silk_inner_prod16_aligned_64_sse4_1( _mm_storel_epi64( (__m128i *)&sum, acc1 ); for( ; i < len; i++ ) { - sum = silk_SMLABB( sum, inVec1[ i ], inVec2[ i ] ); + sum = silk_SMLALBB( sum, inVec1[ i ], inVec2[ i ] ); + } + +#ifdef OPUS_CHECK_ASM + { + opus_int64 sum_c = silk_inner_prod16_c( inVec1, inVec2, len ); + silk_assert( sum == sum_c ); } +#endif return sum; } diff --git a/third_party/opus/src/silk/float/LTP_scale_ctrl_FLP.c b/third_party/opus/src/silk/float/LTP_scale_ctrl_FLP.c index 8dbe29d0fa6b..6f30ff095f31 100644 --- a/third_party/opus/src/silk/float/LTP_scale_ctrl_FLP.c +++ b/third_party/opus/src/silk/float/LTP_scale_ctrl_FLP.c @@ -41,8 +41,14 @@ void silk_LTP_scale_ctrl_FLP( if( condCoding == CODE_INDEPENDENTLY ) { /* Only scale if first frame in packet */ - round_loss = psEnc->sCmn.PacketLoss_perc + psEnc->sCmn.nFramesPerPacket; - psEnc->sCmn.indices.LTP_scaleIndex = (opus_int8)silk_LIMIT( round_loss * psEncCtrl->LTPredCodGain * 0.1f, 0.0f, 2.0f ); + round_loss = psEnc->sCmn.PacketLoss_perc * psEnc->sCmn.nFramesPerPacket; + if ( psEnc->sCmn.LBRR_flag ) { + /* LBRR reduces the effective loss. In practice, it does not square the loss because + losses aren't independent, but that still seems to work best. We also never go below 2%. */ + round_loss = 2 + silk_SMULBB( round_loss, round_loss) / 100; + } + psEnc->sCmn.indices.LTP_scaleIndex = silk_SMULBB( psEncCtrl->LTPredCodGain, round_loss ) > silk_log2lin( 2900 - psEnc->sCmn.SNR_dB_Q7 ); + psEnc->sCmn.indices.LTP_scaleIndex += silk_SMULBB( psEncCtrl->LTPredCodGain, round_loss ) > silk_log2lin( 3900 - psEnc->sCmn.SNR_dB_Q7 ); } else { /* Default is minimum scaling */ psEnc->sCmn.indices.LTP_scaleIndex = 0; diff --git a/third_party/opus/src/silk/float/find_pred_coefs_FLP.c b/third_party/opus/src/silk/float/find_pred_coefs_FLP.c index dcf7c5202d0c..6f7907889375 100644 --- a/third_party/opus/src/silk/float/find_pred_coefs_FLP.c +++ b/third_party/opus/src/silk/float/find_pred_coefs_FLP.c @@ -44,7 +44,8 @@ void silk_find_pred_coefs_FLP( silk_float XXLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ]; silk_float xXLTP[ MAX_NB_SUBFR * LTP_ORDER ]; silk_float invGains[ MAX_NB_SUBFR ]; - opus_int16 NLSF_Q15[ MAX_LPC_ORDER ]; + /* Set to NLSF_Q15 to zero so we don't copy junk to the state. */ + opus_int16 NLSF_Q15[ MAX_LPC_ORDER ]={0}; const silk_float *x_ptr; silk_float *x_pre_ptr, LPC_in_pre[ MAX_NB_SUBFR * MAX_LPC_ORDER + MAX_FRAME_LENGTH ]; silk_float minInvGain; diff --git a/third_party/opus/src/silk/float/wrappers_FLP.c b/third_party/opus/src/silk/float/wrappers_FLP.c index ad90b874a4d8..c0c183e35a12 100644 --- a/third_party/opus/src/silk/float/wrappers_FLP.c +++ b/third_party/opus/src/silk/float/wrappers_FLP.c @@ -190,12 +190,14 @@ void silk_quant_LTP_gains_FLP( opus_int32 XX_Q17[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ]; opus_int32 xX_Q17[ MAX_NB_SUBFR * LTP_ORDER ]; - for( i = 0; i < nb_subfr * LTP_ORDER * LTP_ORDER; i++ ) { + i = 0; + do { XX_Q17[ i ] = (opus_int32)silk_float2int( XX[ i ] * 131072.0f ); - } - for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) { + } while ( ++i < nb_subfr * LTP_ORDER * LTP_ORDER ); + i = 0; + do { xX_Q17[ i ] = (opus_int32)silk_float2int( xX[ i ] * 131072.0f ); - } + } while ( ++i < nb_subfr * LTP_ORDER ); silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, sum_log_gain_Q7, &pred_gain_dB_Q7, XX_Q17, xX_Q17, subfr_len, nb_subfr, arch ); diff --git a/third_party/opus/src/silk/main.h b/third_party/opus/src/silk/main.h index 1a33eed549b6..a5f568758ffb 100644 --- a/third_party/opus/src/silk/main.h +++ b/third_party/opus/src/silk/main.h @@ -247,21 +247,21 @@ void silk_VQ_WMat_EC_c( /************************************/ void silk_NSQ_c( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int16 x16[], /* I Input */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int16 x16[], /* I Input */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ ); #if !defined(OVERRIDE_silk_NSQ) @@ -273,21 +273,21 @@ void silk_NSQ_c( /* Noise shaping using delayed decision */ void silk_NSQ_del_dec_c( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int16 x16[], /* I Input */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int16 x16[], /* I Input */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ ); #if !defined(OVERRIDE_silk_NSQ_del_dec) diff --git a/third_party/opus/src/silk/meson.build b/third_party/opus/src/silk/meson.build index 706923727aa7..917048b2dc69 100644 --- a/third_party/opus/src/silk/meson.build +++ b/third_party/opus/src/silk/meson.build @@ -21,6 +21,16 @@ endif silk_includes = [opus_includes, include_directories('float', 'fixed')] silk_static_libs = [] +if host_cpu_family in ['x86', 'x86_64'] and opus_conf.has('OPUS_HAVE_RTCD') + silk_sources += sources['SILK_SOURCES_X86_RTCD'] +endif + +if host_cpu_family in ['arm', 'aarch64'] and have_arm_intrinsics_or_asm + if opus_conf.has('OPUS_HAVE_RTCD') + silk_sources += sources['SILK_SOURCES_ARM_RTCD'] + endif +endif + foreach intr_name : ['sse4_1', 'neon_intr'] have_intr = get_variable('have_' + intr_name) if not have_intr diff --git a/third_party/opus/src/silk/stereo_LR_to_MS.c b/third_party/opus/src/silk/stereo_LR_to_MS.c index c8226663c81b..751452cb1913 100644 --- a/third_party/opus/src/silk/stereo_LR_to_MS.c +++ b/third_party/opus/src/silk/stereo_LR_to_MS.c @@ -77,7 +77,7 @@ void silk_stereo_LR_to_MS( ALLOC( LP_mid, frame_length, opus_int16 ); ALLOC( HP_mid, frame_length, opus_int16 ); for( n = 0; n < frame_length; n++ ) { - sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 ); + sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT32( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 ); LP_mid[ n ] = sum; HP_mid[ n ] = mid[ n + 1 ] - sum; } @@ -86,7 +86,7 @@ void silk_stereo_LR_to_MS( ALLOC( LP_side, frame_length, opus_int16 ); ALLOC( HP_side, frame_length, opus_int16 ); for( n = 0; n < frame_length; n++ ) { - sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( side[ n ] + (opus_int32)side[ n + 2 ], side[ n + 1 ], 1 ), 2 ); + sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT32( side[ n ] + (opus_int32)side[ n + 2 ], side[ n + 1 ], 1 ), 2 ); LP_side[ n ] = sum; HP_side[ n ] = side[ n + 1 ] - sum; } @@ -207,7 +207,7 @@ void silk_stereo_LR_to_MS( pred0_Q13 += delta0_Q13; pred1_Q13 += delta1_Q13; w_Q24 += deltaw_Q24; - sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */ + sum = silk_LSHIFT( silk_ADD_LSHIFT32( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */ sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 ); /* Q8 */ sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); @@ -217,7 +217,7 @@ void silk_stereo_LR_to_MS( pred1_Q13 = -pred_Q13[ 1 ]; w_Q24 = silk_LSHIFT( width_Q14, 10 ); for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) { - sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */ + sum = silk_LSHIFT( silk_ADD_LSHIFT32( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */ sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 ); /* Q8 */ sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); diff --git a/third_party/opus/src/silk/stereo_MS_to_LR.c b/third_party/opus/src/silk/stereo_MS_to_LR.c index 62521a4f352f..1e01bb6eb374 100644 --- a/third_party/opus/src/silk/stereo_MS_to_LR.c +++ b/third_party/opus/src/silk/stereo_MS_to_LR.c @@ -59,7 +59,7 @@ void silk_stereo_MS_to_LR( for( n = 0; n < STEREO_INTERP_LEN_MS * fs_kHz; n++ ) { pred0_Q13 += delta0_Q13; pred1_Q13 += delta1_Q13; - sum = silk_LSHIFT( silk_ADD_LSHIFT( x1[ n ] + x1[ n + 2 ], x1[ n + 1 ], 1 ), 9 ); /* Q11 */ + sum = silk_LSHIFT( silk_ADD_LSHIFT32( x1[ n ] + (opus_int32)x1[ n + 2 ], x1[ n + 1 ], 1 ), 9 ); /* Q11 */ sum = silk_SMLAWB( silk_LSHIFT( (opus_int32)x2[ n + 1 ], 8 ), sum, pred0_Q13 ); /* Q8 */ sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)x1[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ x2[ n + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); @@ -67,7 +67,7 @@ void silk_stereo_MS_to_LR( pred0_Q13 = pred_Q13[ 0 ]; pred1_Q13 = pred_Q13[ 1 ]; for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) { - sum = silk_LSHIFT( silk_ADD_LSHIFT( x1[ n ] + x1[ n + 2 ], x1[ n + 1 ], 1 ), 9 ); /* Q11 */ + sum = silk_LSHIFT( silk_ADD_LSHIFT32( x1[ n ] + (opus_int32)x1[ n + 2 ], x1[ n + 1 ], 1 ), 9 ); /* Q11 */ sum = silk_SMLAWB( silk_LSHIFT( (opus_int32)x2[ n + 1 ], 8 ), sum, pred0_Q13 ); /* Q8 */ sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)x1[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ x2[ n + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); diff --git a/third_party/opus/src/silk/tests/test_unit_LPC_inv_pred_gain.c b/third_party/opus/src/silk/tests/test_unit_LPC_inv_pred_gain.c index 67067cead702..7ca902ad9f4d 100644 --- a/third_party/opus/src/silk/tests/test_unit_LPC_inv_pred_gain.c +++ b/third_party/opus/src/silk/tests/test_unit_LPC_inv_pred_gain.c @@ -43,6 +43,7 @@ int check_stability(opus_int16 *A_Q12, int order) { int i; int j; int sum_a, sum_abs_a; + double y[SILK_MAX_ORDER_LPC] = {0}; sum_a = sum_abs_a = 0; for( j = 0; j < order; j++ ) { sum_a += A_Q12[ j ]; @@ -57,7 +58,6 @@ int check_stability(opus_int16 *A_Q12, int order) { if( sum_abs_a < 4096 ) { return 1; } - double y[SILK_MAX_ORDER_LPC] = {0}; y[0] = 1; for( i = 0; i < 10000; i++ ) { double sum = 0; diff --git a/third_party/opus/src/silk/x86/NSQ_del_dec_sse4_1.c b/third_party/opus/src/silk/x86/NSQ_del_dec_sse4_1.c index 2c75ede2dde8..a58a76cd8d21 100644 --- a/third_party/opus/src/silk/x86/NSQ_del_dec_sse4_1.c +++ b/third_party/opus/src/silk/x86/NSQ_del_dec_sse4_1.c @@ -1,5 +1,5 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang +/* Copyright (c) 2014-2020, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang FrancisQuiers Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -46,6 +46,7 @@ typedef struct { opus_int32 Shape_Q14[ DECISION_DELAY ]; opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ]; opus_int32 LF_AR_Q14; + opus_int32 Diff_Q14; opus_int32 Seed; opus_int32 SeedInit; opus_int32 RD_Q10; @@ -56,6 +57,7 @@ typedef struct { opus_int32 RD_Q10; opus_int32 xq_Q14; opus_int32 LF_AR_Q14; + opus_int32 Diff_Q14; opus_int32 sLTP_shp_Q14; opus_int32 LPC_exc_Q14; } NSQ_sample_struct; @@ -66,7 +68,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( const silk_encoder_state *psEncC, /* I Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - const opus_int32 x_Q3[], /* I Input in Q3 */ + const opus_int16 x16[], /* I Input */ opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ @@ -112,21 +114,21 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( ); void silk_NSQ_del_dec_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int16 x16[], /* I Input */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ ) { opus_int i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr; @@ -142,8 +144,39 @@ void silk_NSQ_del_dec_sse4_1( VARDECL( opus_int32, delayedGain_Q10 ); VARDECL( NSQ_del_dec_struct, psDelDec ); NSQ_del_dec_struct *psDD; +#ifdef OPUS_CHECK_ASM + silk_nsq_state NSQ_c; + SideInfoIndices psIndices_c; + opus_int8 pulses_c[ MAX_FRAME_LENGTH ]; + const opus_int8 *const pulses_a = pulses; +#endif SAVE_STACK; +#ifdef OPUS_CHECK_ASM + ( void )pulses_a; + silk_memcpy( &NSQ_c, NSQ, sizeof( NSQ_c ) ); + silk_memcpy( &psIndices_c, psIndices, sizeof( psIndices_c ) ); + silk_assert( psEncC->nb_subfr * psEncC->subfr_length <= MAX_FRAME_LENGTH ); + silk_memcpy( pulses_c, pulses, psEncC->nb_subfr * psEncC->subfr_length * sizeof( pulses[0] ) ); + silk_NSQ_del_dec_c( + psEncC, + &NSQ_c, + &psIndices_c, + x16, + pulses_c, + PredCoef_Q12, + LTPCoef_Q14, + AR_Q13, + HarmShapeGain_Q14, + Tilt_Q14, + LF_shp_Q14, + Gains_Q16, + pitchL, + Lambda_Q10, + LTP_scale_Q14 + ); +#endif + /* Set unvoiced lag to the previous one, overwrite later for voiced */ lag = NSQ->lagPrev; @@ -158,6 +191,7 @@ void silk_NSQ_del_dec_sse4_1( psDD->SeedInit = psDD->Seed; psDD->RD_Q10 = 0; psDD->LF_AR_Q14 = NSQ->sLF_AR_shp_Q14; + psDD->Diff_Q14 = NSQ->sDiff_shp_Q14; psDD->Shape_Q14[ 0 ] = NSQ->sLTP_shp_Q14[ psEncC->ltp_mem_length - 1 ]; silk_memcpy( psDD->sLPC_Q14, NSQ->sLPC_Q14, NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); silk_memcpy( psDD->sAR2_Q14, NSQ->sAR2_Q14, sizeof( NSQ->sAR2_Q14 ) ); @@ -185,8 +219,7 @@ void silk_NSQ_del_dec_sse4_1( LSF_interpolation_flag = 1; } - ALLOC( sLTP_Q15, - psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); + ALLOC( sLTP_Q15, psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 ); ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 ); ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 ); @@ -198,7 +231,7 @@ void silk_NSQ_del_dec_sse4_1( for( k = 0; k < psEncC->nb_subfr; k++ ) { A_Q12 = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ]; B_Q14 = <PCoef_Q14[ k * LTP_ORDER ]; - AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ]; + AR_shp_Q13 = &AR_Q13[ k * MAX_SHAPE_LPC_ORDER ]; /* Noise shape parameters */ silk_assert( HarmShapeGain_Q14[ k ] >= 0 ); @@ -257,7 +290,7 @@ void silk_NSQ_del_dec_sse4_1( } } - silk_nsq_del_dec_scale_states_sse4_1( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, + silk_nsq_del_dec_scale_states_sse4_1( psEncC, NSQ, psDelDec, x16, x_sc_Q10, sLTP, sLTP_Q15, k, psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay ); silk_noise_shape_quantizer_del_dec_sse4_1( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, @@ -265,7 +298,7 @@ void silk_NSQ_del_dec_sse4_1( Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay ); - x_Q3 += psEncC->subfr_length; + x16 += psEncC->subfr_length; pulses += psEncC->subfr_length; pxq += psEncC->subfr_length; } @@ -288,6 +321,7 @@ void silk_NSQ_del_dec_sse4_1( for( i = 0; i < decisionDelay; i++ ) { last_smple_idx = ( last_smple_idx - 1 ) % DECISION_DELAY; if( last_smple_idx < 0 ) last_smple_idx += DECISION_DELAY; + pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) ); @@ -298,11 +332,19 @@ void silk_NSQ_del_dec_sse4_1( /* Update states */ NSQ->sLF_AR_shp_Q14 = psDD->LF_AR_Q14; + NSQ->sDiff_shp_Q14 = psDD->Diff_Q14; NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ]; /* Save quantized speech signal */ silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); + +#ifdef OPUS_CHECK_ASM + silk_assert( !memcmp( &NSQ_c, NSQ, sizeof( NSQ_c ) ) ); + silk_assert( !memcmp( &psIndices_c, psIndices, sizeof( psIndices_c ) ) ); + silk_assert( !memcmp( pulses_c, pulses_a, psEncC->nb_subfr * psEncC->subfr_length * sizeof( pulses[0] ) ) ); +#endif + RESTORE_STACK; } @@ -345,6 +387,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14; + int rdo_offset; + VARDECL( NSQ_sample_pair, psSampleState ); NSQ_del_dec_struct *psDD; NSQ_sample_struct *psSS; @@ -356,6 +400,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( celt_assert( nStatesDelayedDecision > 0 ); ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair ); + rdo_offset = (Lambda_Q10 >> 1) - 512; + shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); @@ -407,8 +453,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( /* Long-term shaping */ if( lag > 0 ) { /* Symmetric, packed FIR coefficients */ - n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); - n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); + n_LTP_Q14 = silk_SMULWB( silk_ADD_SAT32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); + n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 ); /* Q12 -> Q14 */ shp_lag_ptr++; } else { @@ -478,7 +524,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); - /* setp 4 */ + /* step 4 */ psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -15 ] ) ); psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_CDEF ); @@ -511,9 +557,9 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */ /* Noise shape feedback */ - silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ + celt_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ /* Output of lowpass section */ - tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 ); + tmp2 = silk_SMLAWB( psDD->Diff_Q14, psDD->sAR2_Q14[ 0 ], warping_Q16 ); /* Output of allpass section */ tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 ); psDD->sAR2_Q14[ 0 ] = tmp2; @@ -543,9 +589,9 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( /* Input minus prediction plus noise feedback */ /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP */ - tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 ); /* Q14 */ + tmp1 = silk_ADD_SAT32( n_AR_Q14, n_LF_Q14 ); /* Q14 */ tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 ); /* Q13 */ - tmp1 = silk_SUB32( tmp2, tmp1 ); /* Q13 */ + tmp1 = silk_SUB_SAT32( tmp2, tmp1 ); /* Q13 */ tmp1 = silk_RSHIFT_ROUND( tmp1, 4 ); /* Q10 */ r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 ); /* residual error Q10 */ @@ -559,6 +605,18 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( /* Find two quantization level candidates and measure their rate-distortion */ q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); + if (Lambda_Q10 > 2048) { + /* For aggressive RDO, the bias becomes more than one pulse. */ + if (q1_Q10 > rdo_offset) { + q1_Q0 = silk_RSHIFT( q1_Q10 - rdo_offset, 10 ); + } else if (q1_Q10 < -rdo_offset) { + q1_Q0 = silk_RSHIFT( q1_Q10 + rdo_offset, 10 ); + } else if (q1_Q10 < 0) { + q1_Q0 = -1; + } else { + q1_Q0 = 0; + } + } if( q1_Q0 > 0 ) { q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); @@ -612,8 +670,9 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); /* Update states */ - sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); - psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); + psSS[ 0 ].Diff_Q14 = silk_SUB_LSHIFT32( xq_Q14, x_Q10[ i ], 4 ); + sLF_AR_shp_Q14 = silk_SUB32( psSS[ 0 ].Diff_Q14, n_AR_Q14 ); + psSS[ 0 ].sLTP_shp_Q14 = silk_SUB_SAT32( sLF_AR_shp_Q14, n_LF_Q14 ); psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14; psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14; psSS[ 0 ].xq_Q14 = xq_Q14; @@ -626,14 +685,14 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( exc_Q14 = -exc_Q14; } - /* Add predictions */ LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); /* Update states */ - sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); - psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); + psSS[ 1 ].Diff_Q14 = silk_SUB_LSHIFT32( xq_Q14, x_Q10[ i ], 4 ); + sLF_AR_shp_Q14 = silk_SUB32( psSS[ 1 ].Diff_Q14, n_AR_Q14 ); + psSS[ 1 ].sLTP_shp_Q14 = silk_SUB_SAT32( sLF_AR_shp_Q14, n_LF_Q14 ); psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14; psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14; psSS[ 1 ].xq_Q14 = xq_Q14; @@ -705,6 +764,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( psDD = &psDelDec[ k ]; psSS = &psSampleState[ k ][ 0 ]; psDD->LF_AR_Q14 = psSS->LF_AR_Q14; + psDD->Diff_Q14 = psSS->Diff_Q14; psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14; psDD->Xq_Q14[ *smpl_buf_idx ] = psSS->xq_Q14; psDD->Q_Q10[ *smpl_buf_idx ] = psSS->Q_Q10; @@ -728,7 +788,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( const silk_encoder_state *psEncC, /* I Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - const opus_int32 x_Q3[], /* I Input in Q3 */ + const opus_int16 x16[], /* I Input */ opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ @@ -742,51 +802,41 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( ) { opus_int i, k, lag; - opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; + opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q26; NSQ_del_dec_struct *psDD; - __m128i xmm_inv_gain_Q23, xmm_x_Q3_x2x0, xmm_x_Q3_x3x1; + __m128i xmm_inv_gain_Q26, xmm_x16_x2x0, xmm_x16_x3x1; lag = pitchL[ subfr ]; inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); - silk_assert( inv_gain_Q31 != 0 ); - /* Calculate gain adjustment factor */ - if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { - gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); - } else { - gain_adj_Q16 = (opus_int32)1 << 16; - } - /* Scale input */ - inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); + inv_gain_Q26 = silk_RSHIFT_ROUND( inv_gain_Q31, 5 ); - /* prepare inv_gain_Q23 in packed 4 32-bits */ - xmm_inv_gain_Q23 = _mm_set1_epi32(inv_gain_Q23); + /* prepare inv_gain_Q26 in packed 4 32-bits */ + xmm_inv_gain_Q26 = _mm_set1_epi32(inv_gain_Q26); for( i = 0; i < psEncC->subfr_length - 3; i += 4 ) { - xmm_x_Q3_x2x0 = _mm_loadu_si128( (__m128i *)(&(x_Q3[ i ] ) ) ); + xmm_x16_x2x0 = OP_CVTEPI16_EPI32_M64( &(x16[ i ] ) ); + /* equal shift right 4 bytes*/ - xmm_x_Q3_x3x1 = _mm_shuffle_epi32( xmm_x_Q3_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); + xmm_x16_x3x1 = _mm_shuffle_epi32( xmm_x16_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); - xmm_x_Q3_x2x0 = _mm_mul_epi32( xmm_x_Q3_x2x0, xmm_inv_gain_Q23 ); - xmm_x_Q3_x3x1 = _mm_mul_epi32( xmm_x_Q3_x3x1, xmm_inv_gain_Q23 ); + xmm_x16_x2x0 = _mm_mul_epi32( xmm_x16_x2x0, xmm_inv_gain_Q26 ); + xmm_x16_x3x1 = _mm_mul_epi32( xmm_x16_x3x1, xmm_inv_gain_Q26 ); - xmm_x_Q3_x2x0 = _mm_srli_epi64( xmm_x_Q3_x2x0, 16 ); - xmm_x_Q3_x3x1 = _mm_slli_epi64( xmm_x_Q3_x3x1, 16 ); + xmm_x16_x2x0 = _mm_srli_epi64( xmm_x16_x2x0, 16 ); + xmm_x16_x3x1 = _mm_slli_epi64( xmm_x16_x3x1, 16 ); - xmm_x_Q3_x2x0 = _mm_blend_epi16( xmm_x_Q3_x2x0, xmm_x_Q3_x3x1, 0xCC ); + xmm_x16_x2x0 = _mm_blend_epi16( xmm_x16_x2x0, xmm_x16_x3x1, 0xCC ); - _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ])), xmm_x_Q3_x2x0 ); + _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ] ) ), xmm_x16_x2x0 ); } for( ; i < psEncC->subfr_length; i++ ) { - x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); + x_sc_Q10[ i ] = silk_SMULWW( x16[ i ], inv_gain_Q26 ); } - /* Save inverse gain */ - NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; - /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */ if( NSQ->rewhite_flag ) { if( subfr == 0 ) { @@ -800,7 +850,9 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( } /* Adjust for changing gain */ - if( gain_adj_Q16 != (opus_int32)1 << 16 ) { + if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { + gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); + /* Scale long-term shaping state */ { __m128i xmm_gain_adj_Q16, xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1; @@ -841,6 +893,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( /* Scale scalar states */ psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 ); + psDD->Diff_Q14 = silk_SMULWW( gain_adj_Q16, psDD->Diff_Q14 ); /* Scale short-term prediction and shaping states */ for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { @@ -855,5 +908,8 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( } } } + + /* Save inverse gain */ + NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; } } diff --git a/third_party/opus/src/silk/x86/NSQ_sse4_1.c b/third_party/opus/src/silk/x86/NSQ_sse4_1.c index b0315e35fc89..d5ae1d3b1cd5 100644 --- a/third_party/opus/src/silk/x86/NSQ_sse4_1.c +++ b/third_party/opus/src/silk/x86/NSQ_sse4_1.c @@ -1,5 +1,5 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang +/* Copyright (c) 2014-2020, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang FrancisQuiers Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -37,17 +37,17 @@ #include "stack_alloc.h" static OPUS_INLINE void silk_nsq_scale_states_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - const opus_int32 x_Q3[], /* I input in Q3 */ - opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ - const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ - opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ - opus_int subfr, /* I subframe number */ - const opus_int LTP_scale_Q14, /* I */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ - const opus_int signal_type /* I Signal type */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + const opus_int16 x16[], /* I input */ + opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ + const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ + opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ + opus_int subfr, /* I subframe number */ + const opus_int LTP_scale_Q14, /* I */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ + const opus_int signal_type /* I Signal type */ ); static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( @@ -65,27 +65,28 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( opus_int Tilt_Q14, /* I Spectral tilt */ opus_int32 LF_shp_Q14, /* I */ opus_int32 Gain_Q16, /* I */ + opus_int Lambda_Q10, /* I */ opus_int offset_Q10, /* I */ opus_int length, /* I Input length */ opus_int32 table[][4] /* I */ ); void silk_NSQ_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int16 x16[], /* I Input */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ ) { opus_int k, lag, start_idx, LSF_interpolation_flag; @@ -101,8 +102,41 @@ void silk_NSQ_sse4_1( opus_int32 tmp1; opus_int32 q1_Q10, q2_Q10, rd1_Q20, rd2_Q20; +#ifdef OPUS_CHECK_ASM + silk_nsq_state NSQ_c; + SideInfoIndices psIndices_c; + opus_int8 pulses_c[ MAX_FRAME_LENGTH ]; + const opus_int8 *const pulses_a = pulses; +#endif + SAVE_STACK; +#ifdef OPUS_CHECK_ASM + ( void )pulses_a; + silk_memcpy( &NSQ_c, NSQ, sizeof( NSQ_c ) ); + silk_memcpy( &psIndices_c, psIndices, sizeof( psIndices_c ) ); + silk_assert( psEncC->nb_subfr * psEncC->subfr_length <= MAX_FRAME_LENGTH ); + silk_memcpy( pulses_c, pulses, psEncC->nb_subfr * psEncC->subfr_length * sizeof( pulses[0] ) ); + + silk_NSQ_c( + psEncC, + &NSQ_c, + &psIndices_c, + x16, + pulses_c, + PredCoef_Q12, + LTPCoef_Q14, + AR_Q13, + HarmShapeGain_Q14, + Tilt_Q14, + LF_shp_Q14, + Gains_Q16, + pitchL, + Lambda_Q10, + LTP_scale_Q14 + ); +#endif + NSQ->rand_seed = psIndices->Seed; /* Set unvoiced lag to the previous one, overwrite later for voiced */ @@ -172,8 +206,7 @@ void silk_NSQ_sse4_1( LSF_interpolation_flag = 1; } - ALLOC( sLTP_Q15, - psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); + ALLOC( sLTP_Q15, psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 ); ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 ); /* Set up pointers to start of sub frame */ @@ -183,7 +216,7 @@ void silk_NSQ_sse4_1( for( k = 0; k < psEncC->nb_subfr; k++ ) { A_Q12 = &PredCoef_Q12[ (( k >> 1 ) | ( 1 - LSF_interpolation_flag )) * MAX_LPC_ORDER ]; B_Q14 = <PCoef_Q14[ k * LTP_ORDER ]; - AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ]; + AR_shp_Q13 = &AR_Q13[ k * MAX_SHAPE_LPC_ORDER ]; /* Noise shape parameters */ silk_assert( HarmShapeGain_Q14[ k ] >= 0 ); @@ -209,12 +242,12 @@ void silk_NSQ_sse4_1( } } - silk_nsq_scale_states_sse4_1( psEncC, NSQ, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType ); + silk_nsq_scale_states_sse4_1( psEncC, NSQ, x16, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType ); if ( opus_likely( ( 10 == psEncC->shapingLPCOrder ) && ( 16 == psEncC->predictLPCOrder) ) ) { silk_noise_shape_quantizer_10_16_sse4_1( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14, - AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], + AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, &(table[32]) ); } else @@ -224,7 +257,7 @@ void silk_NSQ_sse4_1( offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->arch ); } - x_Q3 += psEncC->subfr_length; + x16 += psEncC->subfr_length; pulses += psEncC->subfr_length; pxq += psEncC->subfr_length; } @@ -235,12 +268,19 @@ void silk_NSQ_sse4_1( /* Save quantized speech and noise shaping signals */ silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); + +#ifdef OPUS_CHECK_ASM + silk_assert( !memcmp( &NSQ_c, NSQ, sizeof( NSQ_c ) ) ); + silk_assert( !memcmp( &psIndices_c, psIndices, sizeof( psIndices_c ) ) ); + silk_assert( !memcmp( pulses_c, pulses_a, psEncC->nb_subfr * psEncC->subfr_length * sizeof( pulses[0] ) ) ); +#endif + RESTORE_STACK; } -/***********************************/ -/* silk_noise_shape_quantizer_10_16 */ -/***********************************/ +/************************************/ +/* silk_noise_shape_quantizer_10_16 */ +/************************************/ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( silk_nsq_state *NSQ, /* I/O NSQ state */ opus_int signalType, /* I Signal type */ @@ -256,6 +296,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( opus_int Tilt_Q14, /* I Spectral tilt */ opus_int32 LF_shp_Q14, /* I */ opus_int32 Gain_Q16, /* I */ + opus_int Lambda_Q10, /* I */ opus_int offset_Q10, /* I */ opus_int length, /* I Input length */ opus_int32 table[][4] /* I */ @@ -264,7 +305,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( opus_int i; opus_int32 LTP_pred_Q13, LPC_pred_Q10, n_AR_Q12, n_LTP_Q13; opus_int32 n_LF_Q12, r_Q10, q1_Q0, q1_Q10, q2_Q10; - opus_int32 exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; + opus_int32 exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10, sDiff_shp_Q14; opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; opus_int32 *psLPC_Q14, *shp_lag_ptr, *pred_lag_ptr; @@ -279,6 +320,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( __m128i sAR2_Q14_hi_76543210, sAR2_Q14_lo_76543210; __m128i AR_shp_Q13_76543210; + int rdo_offset = (Lambda_Q10 >> 1) - 512; + shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); @@ -288,6 +331,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( sLF_AR_shp_Q14 = NSQ->sLF_AR_shp_Q14; xq_Q14 = psLPC_Q14[ 0 ]; + sDiff_shp_Q14 = NSQ->sDiff_shp_Q14; LTP_pred_Q13 = 0; /* load a_Q12 */ @@ -430,8 +474,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( sAR2_Q14_hi_76543210 = _mm_slli_si128( sAR2_Q14_hi_76543210, 2 ); sAR2_Q14_lo_76543210 = _mm_slli_si128( sAR2_Q14_lo_76543210, 2 ); - sAR2_Q14_hi_76543210 = _mm_insert_epi16( sAR2_Q14_hi_76543210, (xq_Q14 >> 16), 0 ); - sAR2_Q14_lo_76543210 = _mm_insert_epi16( sAR2_Q14_lo_76543210, (xq_Q14), 0 ); + sAR2_Q14_hi_76543210 = _mm_insert_epi16( sAR2_Q14_hi_76543210, (sDiff_shp_Q14 >> 16), 0 ); + sAR2_Q14_lo_76543210 = _mm_insert_epi16( sAR2_Q14_lo_76543210, (sDiff_shp_Q14), 0 ); /* high part, use pmaddwd, results in 4 32-bit */ xmm_hi_07 = _mm_madd_epi16( sAR2_Q14_hi_76543210, AR_shp_Q13_76543210 ); @@ -462,14 +506,14 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 ); n_LF_Q12 = silk_SMLAWT( n_LF_Q12, sLF_AR_shp_Q14, LF_shp_Q14 ); - silk_assert( lag > 0 || signalType != TYPE_VOICED ); + celt_assert( lag > 0 || signalType != TYPE_VOICED ); /* Combine prediction and noise shaping signals */ tmp1 = silk_SUB32( silk_LSHIFT32( LPC_pred_Q10, 2 ), n_AR_Q12 ); /* Q12 */ tmp1 = silk_SUB32( tmp1, n_LF_Q12 ); /* Q12 */ if( lag > 0 ) { /* Symmetric, packed FIR coefficients */ - n_LTP_Q13 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); + n_LTP_Q13 = silk_SMULWB( silk_ADD_SAT32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); n_LTP_Q13 = silk_SMLAWT( n_LTP_Q13, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); n_LTP_Q13 = silk_LSHIFT( n_LTP_Q13, 1 ); shp_lag_ptr++; @@ -495,6 +539,18 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( /* Find two quantization level candidates and measure their rate-distortion */ q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); + if (Lambda_Q10 > 2048) { + /* For aggressive RDO, the bias becomes more than one pulse. */ + if (q1_Q10 > rdo_offset) { + q1_Q0 = silk_RSHIFT( q1_Q10 - rdo_offset, 10 ); + } else if (q1_Q10 < -rdo_offset) { + q1_Q0 = silk_RSHIFT( q1_Q10 + rdo_offset, 10 ); + } else if (q1_Q10 < 0) { + q1_Q0 = -1; + } else { + q1_Q0 = 0; + } + } q1_Q10 = table[q1_Q0][0]; q2_Q10 = table[q1_Q0][1]; @@ -519,7 +575,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( /* Update states */ psLPC_Q14++; *psLPC_Q14 = xq_Q14; - sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, n_AR_Q12, 2 ); + NSQ->sDiff_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, x_sc_Q10[ i ], 4 ); + sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( NSQ->sDiff_shp_Q14, n_AR_Q12, 2 ); NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx ] = silk_SUB_LSHIFT32( sLF_AR_shp_Q14, n_LF_Q12, 2 ); sLTP_Q15[ NSQ->sLTP_buf_idx ] = silk_LSHIFT( LPC_exc_Q14, 1 ); @@ -600,64 +657,54 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( } static OPUS_INLINE void silk_nsq_scale_states_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - const opus_int32 x_Q3[], /* I input in Q3 */ - opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ - const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ - opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ - opus_int subfr, /* I subframe number */ - const opus_int LTP_scale_Q14, /* I */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ - const opus_int signal_type /* I Signal type */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + const opus_int16 x16[], /* I input */ + opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ + const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ + opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ + opus_int subfr, /* I subframe number */ + const opus_int LTP_scale_Q14, /* I */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ + const opus_int signal_type /* I Signal type */ ) { opus_int i, lag; - opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; - __m128i xmm_inv_gain_Q23, xmm_x_Q3_x2x0, xmm_x_Q3_x3x1; + opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q26; + __m128i xmm_inv_gain_Q26, xmm_x16_x2x0, xmm_x16_x3x1; lag = pitchL[ subfr ]; inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); silk_assert( inv_gain_Q31 != 0 ); - /* Calculate gain adjustment factor */ - if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { - gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); - } else { - gain_adj_Q16 = (opus_int32)1 << 16; - } - /* Scale input */ - inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); + inv_gain_Q26 = silk_RSHIFT_ROUND( inv_gain_Q31, 5 ); - /* prepare inv_gain_Q23 in packed 4 32-bits */ - xmm_inv_gain_Q23 = _mm_set1_epi32(inv_gain_Q23); + /* prepare inv_gain_Q26 in packed 4 32-bits */ + xmm_inv_gain_Q26 = _mm_set1_epi32(inv_gain_Q26); for( i = 0; i < psEncC->subfr_length - 3; i += 4 ) { - xmm_x_Q3_x2x0 = _mm_loadu_si128( (__m128i *)(&(x_Q3[ i ] ) ) ); + xmm_x16_x2x0 = OP_CVTEPI16_EPI32_M64( &(x16[ i ] ) ); /* equal shift right 4 bytes*/ - xmm_x_Q3_x3x1 = _mm_shuffle_epi32( xmm_x_Q3_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); + xmm_x16_x3x1 = _mm_shuffle_epi32( xmm_x16_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); - xmm_x_Q3_x2x0 = _mm_mul_epi32( xmm_x_Q3_x2x0, xmm_inv_gain_Q23 ); - xmm_x_Q3_x3x1 = _mm_mul_epi32( xmm_x_Q3_x3x1, xmm_inv_gain_Q23 ); + xmm_x16_x2x0 = _mm_mul_epi32( xmm_x16_x2x0, xmm_inv_gain_Q26 ); + xmm_x16_x3x1 = _mm_mul_epi32( xmm_x16_x3x1, xmm_inv_gain_Q26 ); - xmm_x_Q3_x2x0 = _mm_srli_epi64( xmm_x_Q3_x2x0, 16 ); - xmm_x_Q3_x3x1 = _mm_slli_epi64( xmm_x_Q3_x3x1, 16 ); + xmm_x16_x2x0 = _mm_srli_epi64( xmm_x16_x2x0, 16 ); + xmm_x16_x3x1 = _mm_slli_epi64( xmm_x16_x3x1, 16 ); - xmm_x_Q3_x2x0 = _mm_blend_epi16( xmm_x_Q3_x2x0, xmm_x_Q3_x3x1, 0xCC ); + xmm_x16_x2x0 = _mm_blend_epi16( xmm_x16_x2x0, xmm_x16_x3x1, 0xCC ); - _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ] ) ), xmm_x_Q3_x2x0 ); + _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ] ) ), xmm_x16_x2x0 ); } for( ; i < psEncC->subfr_length; i++ ) { - x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); + x_sc_Q10[ i ] = silk_SMULWW( x16[ i ], inv_gain_Q26 ); } - /* Save inverse gain */ - NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; - /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */ if( NSQ->rewhite_flag ) { if( subfr == 0 ) { @@ -671,9 +718,11 @@ static OPUS_INLINE void silk_nsq_scale_states_sse4_1( } /* Adjust for changing gain */ - if( gain_adj_Q16 != (opus_int32)1 << 16 ) { - /* Scale long-term shaping state */ + if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { __m128i xmm_gain_adj_Q16, xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1; + gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); + + /* Scale long-term shaping state */ /* prepare gain_adj_Q16 in packed 4 32-bits */ xmm_gain_adj_Q16 = _mm_set1_epi32(gain_adj_Q16); @@ -707,6 +756,7 @@ static OPUS_INLINE void silk_nsq_scale_states_sse4_1( } NSQ->sLF_AR_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sLF_AR_shp_Q14 ); + NSQ->sDiff_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sDiff_shp_Q14 ); /* Scale short-term prediction and shaping states */ for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { @@ -715,5 +765,8 @@ static OPUS_INLINE void silk_nsq_scale_states_sse4_1( for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { NSQ->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sAR2_Q14[ i ] ); } + + /* Save inverse gain */ + NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; } } diff --git a/third_party/opus/src/silk/x86/SigProc_FIX_sse.h b/third_party/opus/src/silk/x86/SigProc_FIX_sse.h index 61efa8da4152..89a5ec888a11 100644 --- a/third_party/opus/src/silk/x86/SigProc_FIX_sse.h +++ b/third_party/opus/src/silk/x86/SigProc_FIX_sse.h @@ -26,13 +26,13 @@ */ #ifndef SIGPROC_FIX_SSE_H -#define SIGPROC_FIX_SSE_H +# define SIGPROC_FIX_SSE_H -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif +# ifdef HAVE_CONFIG_H +# include "config.h" +# endif -#if defined(OPUS_X86_MAY_HAVE_SSE4_1) +# if defined(OPUS_X86_MAY_HAVE_SSE4_1) void silk_burg_modified_sse4_1( opus_int32 *res_nrg, /* O Residual energy */ opus_int *res_nrg_Q, /* O Residual energy Q value */ @@ -45,11 +45,13 @@ void silk_burg_modified_sse4_1( int arch /* I Run-time architecture */ ); -#if defined(OPUS_X86_PRESUME_SSE4_1) -#define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \ - ((void)(arch), silk_burg_modified_sse4_1(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch)) +# if defined(OPUS_X86_PRESUME_SSE4_1) + +# define OVERRIDE_silk_burg_modified +# define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \ + ((void)(arch), silk_burg_modified_sse4_1(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch)) -#else +# elif defined(OPUS_HAVE_RTCD) extern void (*const SILK_BURG_MODIFIED_IMPL[OPUS_ARCHMASK + 1])( opus_int32 *res_nrg, /* O Residual energy */ @@ -62,33 +64,36 @@ extern void (*const SILK_BURG_MODIFIED_IMPL[OPUS_ARCHMASK + 1])( const opus_int D, /* I Order */ int arch /* I Run-time architecture */); -# define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \ - ((*SILK_BURG_MODIFIED_IMPL[(arch) & OPUS_ARCHMASK])(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch)) +# define OVERRIDE_silk_burg_modified +# define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \ + ((*SILK_BURG_MODIFIED_IMPL[(arch) & OPUS_ARCHMASK])(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch)) -#endif +# endif -opus_int64 silk_inner_prod16_aligned_64_sse4_1( +opus_int64 silk_inner_prod16_sse4_1( const opus_int16 *inVec1, const opus_int16 *inVec2, const opus_int len ); -#if defined(OPUS_X86_PRESUME_SSE4_1) +# if defined(OPUS_X86_PRESUME_SSE4_1) -#define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \ - ((void)(arch),silk_inner_prod16_aligned_64_sse4_1(inVec1, inVec2, len)) +# define OVERRIDE_silk_inner_prod16 +# define silk_inner_prod16(inVec1, inVec2, len, arch) \ + ((void)(arch),silk_inner_prod16_sse4_1(inVec1, inVec2, len)) -#else +# elif defined(OPUS_HAVE_RTCD) -extern opus_int64 (*const SILK_INNER_PROD16_ALIGNED_64_IMPL[OPUS_ARCHMASK + 1])( +extern opus_int64 (*const SILK_INNER_PROD16_IMPL[OPUS_ARCHMASK + 1])( const opus_int16 *inVec1, const opus_int16 *inVec2, const opus_int len); -# define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \ - ((*SILK_INNER_PROD16_ALIGNED_64_IMPL[(arch) & OPUS_ARCHMASK])(inVec1, inVec2, len)) +# define OVERRIDE_silk_inner_prod16 +# define silk_inner_prod16(inVec1, inVec2, len, arch) \ + ((*SILK_INNER_PROD16_IMPL[(arch) & OPUS_ARCHMASK])(inVec1, inVec2, len)) -#endif -#endif +# endif +# endif #endif diff --git a/third_party/opus/src/silk/x86/VAD_sse4_1.c b/third_party/opus/src/silk/x86/VAD_sse4_1.c index d02ddf4ad081..e7eaf9714a74 100644 --- a/third_party/opus/src/silk/x86/VAD_sse4_1.c +++ b/third_party/opus/src/silk/x86/VAD_sse4_1.c @@ -1,5 +1,5 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang +/* Copyright (c) 2014-2020, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang FrancisQuiers Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -63,6 +63,14 @@ opus_int silk_VAD_GetSA_Q8_sse4_1( /* O Return value, 0 if s SAVE_STACK; +#ifdef OPUS_CHECK_ASM + silk_encoder_state psEncC_c; + opus_int ret_c; + + silk_memcpy( &psEncC_c, psEncC, sizeof( psEncC_c ) ); + ret_c = silk_VAD_GetSA_Q8_c( &psEncC_c, pIn ); +#endif + /* Safety checks */ silk_assert( VAD_N_BANDS == 4 ); celt_assert( MAX_FRAME_LENGTH >= psEncC->frame_length ); @@ -233,15 +241,14 @@ opus_int silk_VAD_GetSA_Q8_sse4_1( /* O Return value, 0 if s speech_nrg += ( b + 1 ) * silk_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 ); } + if( psEncC->frame_length == 20 * psEncC->fs_kHz ) { + speech_nrg = silk_RSHIFT32( speech_nrg, 1 ); + } /* Power scaling */ if( speech_nrg <= 0 ) { SA_Q15 = silk_RSHIFT( SA_Q15, 1 ); - } else if( speech_nrg < 32768 ) { - if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { - speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 16 ); - } else { - speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 15 ); - } + } else if( speech_nrg < 16384 ) { + speech_nrg = silk_LSHIFT32( speech_nrg, 16 ); /* square-root */ speech_nrg = silk_SQRT_APPROX( speech_nrg ); @@ -272,6 +279,11 @@ opus_int silk_VAD_GetSA_Q8_sse4_1( /* O Return value, 0 if s psEncC->input_quality_bands_Q15[ b ] = silk_sigm_Q15( silk_RSHIFT( SNR_Q7 - 16 * 128, 4 ) ); } +#ifdef OPUS_CHECK_ASM + silk_assert( ret == ret_c ); + silk_assert( !memcmp( &psEncC_c, psEncC, sizeof( psEncC_c ) ) ); +#endif + RESTORE_STACK; return( ret ); } diff --git a/third_party/opus/src/silk/x86/VQ_WMat_EC_sse4_1.c b/third_party/opus/src/silk/x86/VQ_WMat_EC_sse4_1.c index 74d6c6d0ec6e..2c7d18d05eab 100644 --- a/third_party/opus/src/silk/x86/VQ_WMat_EC_sse4_1.c +++ b/third_party/opus/src/silk/x86/VQ_WMat_EC_sse4_1.c @@ -1,5 +1,5 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang +/* Copyright (c) 2014-2020, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang FrancisQuiers Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -38,105 +38,136 @@ /* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */ void silk_VQ_WMat_EC_sse4_1( opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ + opus_int32 *res_nrg_Q15, /* O best residual energy */ + opus_int32 *rate_dist_Q8, /* O best total bitrate */ opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int16 *in_Q14, /* I input vector to be quantized */ - const opus_int32 *W_Q18, /* I weighting matrix */ + const opus_int32 *XX_Q17, /* I correlation matrix */ + const opus_int32 *xX_Q17, /* I correlation vector */ const opus_int8 *cb_Q7, /* I codebook */ const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ + const opus_int subfr_len, /* I number of samples per subframe */ const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - opus_int L /* I number of vectors in codebook */ + const opus_int L /* I number of vectors in codebook */ ) { opus_int k, gain_tmp_Q7; const opus_int8 *cb_row_Q7; - opus_int16 diff_Q14[ 5 ]; - opus_int32 sum1_Q14, sum2_Q16; + opus_int32 neg_xX_Q24[ 5 ]; + opus_int32 sum1_Q15, sum2_Q24; + opus_int32 bits_res_Q8, bits_tot_Q8; + __m128i v_XX_31_Q17, v_XX_42_Q17, v_cb_row_31_Q7, v_cb_row_42_Q7, v_acc1_Q24, v_acc2_Q24; + + /* Negate and convert to new Q domain */ + neg_xX_Q24[ 0 ] = -silk_LSHIFT32( xX_Q17[ 0 ], 7 ); + neg_xX_Q24[ 1 ] = -silk_LSHIFT32( xX_Q17[ 1 ], 7 ); + neg_xX_Q24[ 2 ] = -silk_LSHIFT32( xX_Q17[ 2 ], 7 ); + neg_xX_Q24[ 3 ] = -silk_LSHIFT32( xX_Q17[ 3 ], 7 ); + neg_xX_Q24[ 4 ] = -silk_LSHIFT32( xX_Q17[ 4 ], 7 ); + + v_XX_31_Q17 = _mm_loadu_si128( (__m128i *)(&XX_Q17[ 1 ] ) ); + v_XX_42_Q17 = _mm_shuffle_epi32( v_XX_31_Q17, _MM_SHUFFLE( 0, 3, 2, 1 ) ); - __m128i C_tmp1, C_tmp2, C_tmp3, C_tmp4, C_tmp5; /* Loop over codebook */ - *rate_dist_Q14 = silk_int32_MAX; + *rate_dist_Q8 = silk_int32_MAX; + *res_nrg_Q15 = silk_int32_MAX; cb_row_Q7 = cb_Q7; + /* If things go really bad, at least *ind is set to something safe. */ + *ind = 0; for( k = 0; k < L; k++ ) { + opus_int32 penalty; gain_tmp_Q7 = cb_gain_Q7[k]; - - diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 ); - - C_tmp1 = OP_CVTEPI16_EPI32_M64( &in_Q14[ 1 ] ); - C_tmp2 = OP_CVTEPI8_EPI32_M32( &cb_row_Q7[ 1 ] ); - C_tmp2 = _mm_slli_epi32( C_tmp2, 7 ); - C_tmp1 = _mm_sub_epi32( C_tmp1, C_tmp2 ); - - diff_Q14[ 1 ] = _mm_extract_epi16( C_tmp1, 0 ); - diff_Q14[ 2 ] = _mm_extract_epi16( C_tmp1, 2 ); - diff_Q14[ 3 ] = _mm_extract_epi16( C_tmp1, 4 ); - diff_Q14[ 4 ] = _mm_extract_epi16( C_tmp1, 6 ); - /* Weighted rate */ - sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] ); + /* Quantization error: 1 - 2 * xX * cb + cb' * XX * cb */ + sum1_Q15 = SILK_FIX_CONST( 1.001, 15 ); /* Penalty for too large gain */ - sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 ); - - silk_assert( sum1_Q14 >= 0 ); - - /* first row of W_Q18 */ - C_tmp3 = _mm_loadu_si128( (__m128i *)(&W_Q18[ 1 ] ) ); - C_tmp4 = _mm_mul_epi32( C_tmp3, C_tmp1 ); - C_tmp4 = _mm_srli_si128( C_tmp4, 2 ); - - C_tmp1 = _mm_shuffle_epi32( C_tmp1, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* shift right 4 bytes */ - C_tmp3 = _mm_shuffle_epi32( C_tmp3, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* shift right 4 bytes */ - - C_tmp5 = _mm_mul_epi32( C_tmp3, C_tmp1 ); - C_tmp5 = _mm_srli_si128( C_tmp5, 2 ); - - C_tmp5 = _mm_add_epi32( C_tmp4, C_tmp5 ); - C_tmp5 = _mm_slli_epi32( C_tmp5, 1 ); - - C_tmp5 = _mm_add_epi32( C_tmp5, _mm_shuffle_epi32( C_tmp5, _MM_SHUFFLE( 0, 0, 0, 2 ) ) ); - sum2_Q16 = _mm_cvtsi128_si32( C_tmp5 ); - - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 0 ], diff_Q14[ 0 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 0 ] ); - - /* second row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 7 ], diff_Q14[ 2 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 8 ], diff_Q14[ 3 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 9 ], diff_Q14[ 4 ] ); - sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 6 ], diff_Q14[ 1 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 1 ] ); - - /* third row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 13 ], diff_Q14[ 3 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 14 ], diff_Q14[ 4 ] ); - sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 12 ], diff_Q14[ 2 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 2 ] ); - - /* fourth row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 19 ], diff_Q14[ 4 ] ); - sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 18 ], diff_Q14[ 3 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 3 ] ); - - /* last row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 24 ], diff_Q14[ 4 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 4 ] ); - - silk_assert( sum1_Q14 >= 0 ); + penalty = silk_LSHIFT32( silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 11 ); + + /* first row of XX_Q17 */ + v_cb_row_31_Q7 = OP_CVTEPI8_EPI32_M32( &cb_row_Q7[ 1 ] ); + v_cb_row_42_Q7 = _mm_shuffle_epi32( v_cb_row_31_Q7, _MM_SHUFFLE( 0, 3, 2, 1 ) ); + v_cb_row_31_Q7 = _mm_mul_epi32( v_XX_31_Q17, v_cb_row_31_Q7 ); + v_cb_row_42_Q7 = _mm_mul_epi32( v_XX_42_Q17, v_cb_row_42_Q7 ); + v_acc1_Q24 = _mm_add_epi64( v_cb_row_31_Q7, v_cb_row_42_Q7); + v_acc2_Q24 = _mm_shuffle_epi32( v_acc1_Q24, _MM_SHUFFLE( 1, 0, 3, 2 ) ); + v_acc1_Q24 = _mm_add_epi64( v_acc1_Q24, v_acc2_Q24); + sum2_Q24 = _mm_cvtsi128_si32( v_acc1_Q24 ); + sum2_Q24 = silk_ADD32( neg_xX_Q24[ 0 ], sum2_Q24 ); + sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 ); + sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 0 ], cb_row_Q7[ 0 ] ); + sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 0 ] ); + + /* second row of XX_Q17 */ + sum2_Q24 = silk_MLA( neg_xX_Q24[ 1 ], XX_Q17[ 7 ], cb_row_Q7[ 2 ] ); + sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 8 ], cb_row_Q7[ 3 ] ); + sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 9 ], cb_row_Q7[ 4 ] ); + sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 ); + sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 6 ], cb_row_Q7[ 1 ] ); + sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 1 ] ); + + /* third row of XX_Q17 */ + sum2_Q24 = silk_MLA( neg_xX_Q24[ 2 ], XX_Q17[ 13 ], cb_row_Q7[ 3 ] ); + sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 14 ], cb_row_Q7[ 4 ] ); + sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 ); + sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 12 ], cb_row_Q7[ 2 ] ); + sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 2 ] ); + + /* fourth row of XX_Q17 */ + sum2_Q24 = silk_MLA( neg_xX_Q24[ 3 ], XX_Q17[ 19 ], cb_row_Q7[ 4 ] ); + sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 ); + sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 18 ], cb_row_Q7[ 3 ] ); + sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 3 ] ); + + /* last row of XX_Q17 */ + sum2_Q24 = silk_LSHIFT32( neg_xX_Q24[ 4 ], 1 ); + sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 24 ], cb_row_Q7[ 4 ] ); + sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 4 ] ); /* find best */ - if( sum1_Q14 < *rate_dist_Q14 ) { - *rate_dist_Q14 = sum1_Q14; - *ind = (opus_int8)k; - *gain_Q7 = gain_tmp_Q7; + if( sum1_Q15 >= 0 ) { + /* Translate residual energy to bits using high-rate assumption (6 dB ==> 1 bit/sample) */ + bits_res_Q8 = silk_SMULBB( subfr_len, silk_lin2log( sum1_Q15 + penalty) - (15 << 7) ); + /* In the following line we reduce the codelength component by half ("-1"); seems to slightly improve quality */ + bits_tot_Q8 = silk_ADD_LSHIFT32( bits_res_Q8, cl_Q5[ k ], 3-1 ); + if( bits_tot_Q8 <= *rate_dist_Q8 ) { + *rate_dist_Q8 = bits_tot_Q8; + *res_nrg_Q15 = sum1_Q15 + penalty; + *ind = (opus_int8)k; + *gain_Q7 = gain_tmp_Q7; + } } /* Go to next cbk vector */ cb_row_Q7 += LTP_ORDER; } + +#ifdef OPUS_CHECK_ASM + { + opus_int8 ind_c = 0; + opus_int32 res_nrg_Q15_c = 0; + opus_int32 rate_dist_Q8_c = 0; + opus_int gain_Q7_c = 0; + + silk_VQ_WMat_EC_c( + &ind_c, + &res_nrg_Q15_c, + &rate_dist_Q8_c, + &gain_Q7_c, + XX_Q17, + xX_Q17, + cb_Q7, + cb_gain_Q7, + cl_Q5, + subfr_len, + max_gain_Q7, + L + ); + + silk_assert( *ind == ind_c ); + silk_assert( *res_nrg_Q15 == res_nrg_Q15_c ); + silk_assert( *rate_dist_Q8 == rate_dist_Q8_c ); + silk_assert( *gain_Q7 == gain_Q7_c ); + } +#endif } diff --git a/third_party/opus/src/silk/x86/main_sse.h b/third_party/opus/src/silk/x86/main_sse.h index 2f15d4486978..a01d7f6c75eb 100644 --- a/third_party/opus/src/silk/x86/main_sse.h +++ b/third_party/opus/src/silk/x86/main_sse.h @@ -26,171 +26,169 @@ */ #ifndef MAIN_SSE_H -#define MAIN_SSE_H +# define MAIN_SSE_H -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif +# ifdef HAVE_CONFIG_H +# include "config.h" +# endif # if defined(OPUS_X86_MAY_HAVE_SSE4_1) -#if 0 /* FIXME: SSE disabled until silk_VQ_WMat_EC_sse4_1() gets updated. */ -# define OVERRIDE_silk_VQ_WMat_EC - void silk_VQ_WMat_EC_sse4_1( opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ + opus_int32 *res_nrg_Q15, /* O best residual energy */ + opus_int32 *rate_dist_Q8, /* O best total bitrate */ opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int16 *in_Q14, /* I input vector to be quantized */ - const opus_int32 *W_Q18, /* I weighting matrix */ + const opus_int32 *XX_Q17, /* I correlation matrix */ + const opus_int32 *xX_Q17, /* I correlation vector */ const opus_int8 *cb_Q7, /* I codebook */ const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ + const opus_int subfr_len, /* I number of samples per subframe */ const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - opus_int L /* I number of vectors in codebook */ + const opus_int L /* I number of vectors in codebook */ ); -#if defined OPUS_X86_PRESUME_SSE4_1 +# if defined OPUS_X86_PRESUME_SSE4_1 -#define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ - mu_Q9, max_gain_Q7, L, arch) \ - ((void)(arch),silk_VQ_WMat_EC_sse4_1(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ - mu_Q9, max_gain_Q7, L)) +# define OVERRIDE_silk_VQ_WMat_EC +# define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \ + subfr_len, max_gain_Q7, L, arch) \ + ((void)(arch),silk_VQ_WMat_EC_sse4_1(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \ + subfr_len, max_gain_Q7, L)) -#else +# elif defined(OPUS_HAVE_RTCD) extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])( opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ + opus_int32 *res_nrg_Q15, /* O best residual energy */ + opus_int32 *rate_dist_Q8, /* O best total bitrate */ opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int16 *in_Q14, /* I input vector to be quantized */ - const opus_int32 *W_Q18, /* I weighting matrix */ + const opus_int32 *XX_Q17, /* I correlation matrix */ + const opus_int32 *xX_Q17, /* I correlation vector */ const opus_int8 *cb_Q7, /* I codebook */ const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ + const opus_int subfr_len, /* I number of samples per subframe */ const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - opus_int L /* I number of vectors in codebook */ + const opus_int L /* I number of vectors in codebook */ ); -# define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ - mu_Q9, max_gain_Q7, L, arch) \ - ((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ - mu_Q9, max_gain_Q7, L)) +# define OVERRIDE_silk_VQ_WMat_EC +# define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \ + subfr_len, max_gain_Q7, L, arch) \ + ((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \ + subfr_len, max_gain_Q7, L)) -#endif -#endif - -#if 0 /* FIXME: SSE disabled until the NSQ code gets updated. */ -# define OVERRIDE_silk_NSQ +# endif void silk_NSQ_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int16 x16[], /* I Input */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ ); -#if defined OPUS_X86_PRESUME_SSE4_1 +# if defined OPUS_X86_PRESUME_SSE4_1 -#define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ +# define OVERRIDE_silk_NSQ +# define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ + HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ ((void)(arch),silk_NSQ_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) -#else +# elif defined(OPUS_HAVE_RTCD) extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int16 x16[], /* I Input */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ ); -# define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ +# define OVERRIDE_silk_NSQ +# define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ + HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ ((*SILK_NSQ_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) -#endif - -# define OVERRIDE_silk_NSQ_del_dec +# endif void silk_NSQ_del_dec_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int16 x16[], /* I Input */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ ); -#if defined OPUS_X86_PRESUME_SSE4_1 +# if defined OPUS_X86_PRESUME_SSE4_1 -#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ - ((void)(arch),silk_NSQ_del_dec_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ +# define OVERRIDE_silk_NSQ_del_dec +# define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \ + HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ + ((void)(arch),silk_NSQ_del_dec_sse4_1(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \ HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) -#else +# elif defined(OPUS_HAVE_RTCD) extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int16 x16[], /* I Input */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ ); -# define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ - ((*SILK_NSQ_DEL_DEC_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ +# define OVERRIDE_silk_NSQ_del_dec +# define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \ + HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ + ((*SILK_NSQ_DEL_DEC_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \ HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) -#endif -#endif +# endif void silk_noise_shape_quantizer( silk_nsq_state *NSQ, /* I/O NSQ state */ @@ -223,26 +221,27 @@ void silk_VAD_GetNoiseLevels( silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ ); -# define OVERRIDE_silk_VAD_GetSA_Q8 - opus_int silk_VAD_GetSA_Q8_sse4_1( silk_encoder_state *psEnC, const opus_int16 pIn[] ); -#if defined(OPUS_X86_PRESUME_SSE4_1) -#define silk_VAD_GetSA_Q8(psEnC, pIn, arch) ((void)(arch),silk_VAD_GetSA_Q8_sse4_1(psEnC, pIn)) +# if defined(OPUS_X86_PRESUME_SSE4_1) -#else +# define OVERRIDE_silk_VAD_GetSA_Q8 +# define silk_VAD_GetSA_Q8(psEnC, pIn, arch) ((void)(arch),silk_VAD_GetSA_Q8_sse4_1(psEnC, pIn)) -# define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \ - ((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn)) +# elif defined(OPUS_HAVE_RTCD) extern opus_int (*const SILK_VAD_GETSA_Q8_IMPL[OPUS_ARCHMASK + 1])( silk_encoder_state *psEnC, const opus_int16 pIn[]); -#endif +# define OVERRIDE_silk_VAD_GetSA_Q8 +# define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \ + ((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn)) + +# endif # endif #endif diff --git a/third_party/opus/src/silk/x86/x86_silk_map.c b/third_party/opus/src/silk/x86/x86_silk_map.c index 32dcc3cab7a9..70f60078cfa4 100644 --- a/third_party/opus/src/silk/x86/x86_silk_map.c +++ b/third_party/opus/src/silk/x86/x86_silk_map.c @@ -35,22 +35,22 @@ #include "pitch.h" #include "main.h" -#if !defined(OPUS_X86_PRESUME_SSE4_1) +#if defined(OPUS_HAVE_RTCD) && !defined(OPUS_X86_PRESUME_SSE4_1) #if defined(FIXED_POINT) #include "fixed/main_FIX.h" -opus_int64 (*const SILK_INNER_PROD16_ALIGNED_64_IMPL[ OPUS_ARCHMASK + 1 ] )( +opus_int64 (*const SILK_INNER_PROD16_IMPL[ OPUS_ARCHMASK + 1 ] )( const opus_int16 *inVec1, const opus_int16 *inVec2, const opus_int len ) = { - silk_inner_prod16_aligned_64_c, /* non-sse */ - silk_inner_prod16_aligned_64_c, - silk_inner_prod16_aligned_64_c, - MAY_HAVE_SSE4_1( silk_inner_prod16_aligned_64 ), /* sse4.1 */ - MAY_HAVE_SSE4_1( silk_inner_prod16_aligned_64 ) /* avx */ + silk_inner_prod16_c, /* non-sse */ + silk_inner_prod16_c, + silk_inner_prod16_c, + MAY_HAVE_SSE4_1( silk_inner_prod16 ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_inner_prod16 ) /* avx */ }; #endif @@ -66,23 +66,22 @@ opus_int (*const SILK_VAD_GETSA_Q8_IMPL[ OPUS_ARCHMASK + 1 ] )( MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 ) /* avx */ }; -#if 0 /* FIXME: SSE disabled until the NSQ code gets updated. */ void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int16 x16[], /* I Input */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ ) = { silk_NSQ_c, /* non-sse */ silk_NSQ_c, @@ -90,21 +89,20 @@ void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )( MAY_HAVE_SSE4_1( silk_NSQ ), /* sse4.1 */ MAY_HAVE_SSE4_1( silk_NSQ ) /* avx */ }; -#endif -#if 0 /* FIXME: SSE disabled until silk_VQ_WMat_EC_sse4_1() gets updated. */ void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )( opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ + opus_int32 *res_nrg_Q15, /* O best residual energy */ + opus_int32 *rate_dist_Q8, /* O best total bitrate */ opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int16 *in_Q14, /* I input vector to be quantized */ - const opus_int32 *W_Q18, /* I weighting matrix */ + const opus_int32 *XX_Q17, /* I correlation matrix */ + const opus_int32 *xX_Q17, /* I correlation vector */ const opus_int8 *cb_Q7, /* I codebook */ const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ + const opus_int subfr_len, /* I number of samples per subframe */ const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - opus_int L /* I number of vectors in codebook */ + const opus_int L /* I number of vectors in codebook */ ) = { silk_VQ_WMat_EC_c, /* non-sse */ silk_VQ_WMat_EC_c, @@ -112,25 +110,23 @@ void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )( MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ), /* sse4.1 */ MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ) /* avx */ }; -#endif -#if 0 /* FIXME: SSE disabled until the NSQ code gets updated. */ void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int16 x16[], /* I Input */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ ) = { silk_NSQ_del_dec_c, /* non-sse */ silk_NSQ_del_dec_c, @@ -138,7 +134,6 @@ void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )( MAY_HAVE_SSE4_1( silk_NSQ_del_dec ), /* sse4.1 */ MAY_HAVE_SSE4_1( silk_NSQ_del_dec ) /* avx */ }; -#endif #if defined(FIXED_POINT) diff --git a/third_party/opus/src/src/opus_decoder.c b/third_party/opus/src/src/opus_decoder.c index 9113638a000b..6520e748ea83 100644 --- a/third_party/opus/src/src/opus_decoder.c +++ b/third_party/opus/src/src/opus_decoder.c @@ -278,7 +278,8 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, ec_dec_init(&dec,(unsigned char*)data,len); } else { audiosize = frame_size; - mode = st->prev_mode; + /* Run PLC using last used mode (CELT if we ended with CELT redundancy) */ + mode = st->prev_redundancy ? MODE_CELT_ONLY : st->prev_mode; bandwidth = 0; if (mode == 0) @@ -419,7 +420,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, start_band = 0; if (!decode_fec && mode != MODE_CELT_ONLY && data != NULL - && ec_tell(&dec)+17+20*(st->mode == MODE_HYBRID) <= 8*len) + && ec_tell(&dec)+17+20*(mode == MODE_HYBRID) <= 8*len) { /* Check if we have a redundant 0-8 kHz band */ if (mode == MODE_HYBRID) @@ -499,6 +500,11 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, /* 5 ms redundant frame for CELT->SILK*/ if (redundancy && celt_to_silk) { + /* If the previous frame did not use CELT (the first redundancy frame in + a transition from SILK may have been lost) then the CELT decoder is + stale at this point and the redundancy audio is not useful, however + the final range is still needed (for testing), so the redundancy is + always decoded but the decoded audio may not be used */ MUST_SUCCEED(celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0))); celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL, 0); @@ -561,7 +567,10 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, smooth_fade(pcm+st->channels*(frame_size-F2_5), redundant_audio+st->channels*F2_5, pcm+st->channels*(frame_size-F2_5), F2_5, st->channels, window, st->Fs); } - if (redundancy && celt_to_silk) + /* 5ms redundant frame for CELT->SILK; ignore if the previous frame did not + use CELT (the first redundancy frame in a transition from SILK may have + been lost) */ + if (redundancy && celt_to_silk && (st->prev_mode != MODE_SILK_ONLY || st->prev_redundancy)) { for (c=0;cchannels;c++) { diff --git a/third_party/opus/src/src/opus_encoder.c b/third_party/opus/src/src/opus_encoder.c index 253fe9e880b7..8c8db5a54681 100644 --- a/third_party/opus/src/src/opus_encoder.c +++ b/third_party/opus/src/src/opus_encoder.c @@ -87,6 +87,7 @@ struct OpusEncoder { int lfe; int arch; int use_dtx; /* general DTX for both SILK and CELT */ + int fec_config; #ifndef DISABLE_FLOAT_API TonalityAnalysisState analysis; #endif @@ -1314,6 +1315,8 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ st->stream_channels = st->force_channels; } else { #ifdef FUZZING + (void)stereo_music_threshold; + (void)stereo_voice_threshold; /* Random mono/stereo decision */ if (st->channels == 2 && (rand()&0x1F)==0) st->stream_channels = 3-st->stream_channels; @@ -1352,6 +1355,8 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ } else if (st->user_forced_mode == OPUS_AUTO) { #ifdef FUZZING + (void)stereo_width; + (void)mode_thresholds; /* Random mode switching */ if ((rand()&0xF)==0) { @@ -1389,8 +1394,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ st->mode = (equiv_rate >= threshold) ? MODE_CELT_ONLY: MODE_SILK_ONLY; - /* When FEC is enabled and there's enough packet loss, use SILK */ - if (st->silk_mode.useInBandFEC && st->silk_mode.packetLossPercentage > (128-voice_est)>>4) + /* When FEC is enabled and there's enough packet loss, use SILK. + Unless the FEC is set to 2, in which case we don't switch to SILK if we're confident we have music. */ + if (st->silk_mode.useInBandFEC && st->silk_mode.packetLossPercentage > (128-voice_est)>>4 && (st->fec_config != 2 || voice_est > 25)) st->mode = MODE_SILK_ONLY; /* When encoding voice and DTX is enabled but the generalized DTX cannot be used, use SILK in order to make use of its DTX. */ @@ -2439,11 +2445,12 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) case OPUS_SET_INBAND_FEC_REQUEST: { opus_int32 value = va_arg(ap, opus_int32); - if(value<0 || value>1) + if(value<0 || value>2) { goto bad_arg; } - st->silk_mode.useInBandFEC = value; + st->fec_config = value; + st->silk_mode.useInBandFEC = (value != 0); } break; case OPUS_GET_INBAND_FEC_REQUEST: @@ -2453,7 +2460,7 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) { goto bad_arg; } - *value = st->silk_mode.useInBandFEC; + *value = st->fec_config; } break; case OPUS_SET_PACKET_LOSS_PERC_REQUEST: diff --git a/third_party/opus/src/src/opus_multistream_encoder.c b/third_party/opus/src/src/opus_multistream_encoder.c index 93204a14c123..213e3eb2c226 100644 --- a/third_party/opus/src/src/opus_multistream_encoder.c +++ b/third_party/opus/src/src/opus_multistream_encoder.c @@ -443,7 +443,8 @@ static int opus_multistream_encoder_init_impl( char *ptr; if ((channels>255) || (channels<1) || (coupled_streams>streams) || - (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams)) + (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams) || + (streams+coupled_streams>channels)) return OPUS_BAD_ARG; st->arch = opus_select_arch(); @@ -459,8 +460,7 @@ static int opus_multistream_encoder_init_impl( st->layout.mapping[i] = mapping[i]; if (!validate_layout(&st->layout)) return OPUS_BAD_ARG; - if (mapping_type == MAPPING_TYPE_SURROUND && - !validate_encoder_layout(&st->layout)) + if (!validate_encoder_layout(&st->layout)) return OPUS_BAD_ARG; if (mapping_type == MAPPING_TYPE_AMBISONICS && !validate_ambisonics(st->layout.nb_channels, NULL, NULL)) @@ -595,7 +595,8 @@ OpusMSEncoder *opus_multistream_encoder_create( int ret; OpusMSEncoder *st; if ((channels>255) || (channels<1) || (coupled_streams>streams) || - (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams)) + (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams) || + (streams+coupled_streams>channels)) { if (error) *error = OPUS_BAD_ARG; diff --git a/third_party/opus/src/tests/opus_encode_regressions.c b/third_party/opus/src/tests/opus_encode_regressions.c index 292347301443..4d506eb60961 100644 --- a/third_party/opus/src/tests/opus_encode_regressions.c +++ b/third_party/opus/src/tests/opus_encode_regressions.c @@ -35,7 +35,6 @@ #include #include #include -#include #include "opus_multistream.h" #include "opus.h" #include "test_opus_common.h" @@ -106,7 +105,7 @@ static int celt_ec_internal_error(void) 1799, 1799, 1799, 1799, -9721 }; err = opus_multistream_encode(enc, pcm, 320, data, 2460); - assert(err > 0); + opus_test_assert(err > 0); } opus_multistream_encoder_ctl(enc, OPUS_SET_SIGNAL(OPUS_SIGNAL_MUSIC)); opus_multistream_encoder_ctl(enc, OPUS_SET_VBR(0)); @@ -144,7 +143,7 @@ static int celt_ec_internal_error(void) -9510, -9510, -9510, -9510, -9510, -9510, -9510 }; err = opus_multistream_encode(enc, pcm, 160, data, 2460); - assert(err > 0); + opus_test_assert(err > 0); } opus_multistream_encoder_ctl(enc, OPUS_SET_SIGNAL(OPUS_SIGNAL_MUSIC)); opus_multistream_encoder_ctl(enc, OPUS_SET_VBR(0)); @@ -182,7 +181,7 @@ static int celt_ec_internal_error(void) -9510, -9510, -9510, -9510, -9510, -9510, -9510 }; err = opus_multistream_encode(enc, pcm, 160, data, 2460); - assert(err > 0); + opus_test_assert(err > 0); } opus_multistream_encoder_ctl(enc, OPUS_SET_SIGNAL(OPUS_SIGNAL_MUSIC)); opus_multistream_encoder_ctl(enc, OPUS_SET_VBR(0)); @@ -220,7 +219,7 @@ static int celt_ec_internal_error(void) -9510, -9510, -9510, -9510, -9510, -9510, -9510 }; err = opus_multistream_encode(enc, pcm, 160, data, 2460); - assert(err > 0); + opus_test_assert(err > 0); } opus_multistream_encoder_ctl(enc, OPUS_SET_SIGNAL(OPUS_SIGNAL_MUSIC)); opus_multistream_encoder_ctl(enc, OPUS_SET_VBR(0)); @@ -256,7 +255,7 @@ static int celt_ec_internal_error(void) 5632 }; err = opus_multistream_encode(enc, pcm, 160, data, 2460); - assert(err > 0); + opus_test_assert(err > 0); } opus_multistream_encoder_ctl(enc, OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE)); opus_multistream_encoder_ctl(enc, OPUS_SET_VBR(0)); @@ -281,7 +280,7 @@ static int celt_ec_internal_error(void) 0, 0, -256, 226 }; err = opus_multistream_encode(enc, pcm, 40, data, 2460); - assert(err > 0); + opus_test_assert(err > 0); /* returns -3 */ } opus_multistream_encoder_destroy(enc); @@ -334,7 +333,7 @@ static int mscbr_encode_fail10(void) 0 }; err = opus_multistream_encode(enc, pcm, 20, data, 627300); - assert(err > 0); + opus_test_assert(err > 0); /* returns -1 */ } opus_multistream_encoder_destroy(enc); @@ -384,7 +383,7 @@ static int mscbr_encode_fail(void) 0 }; err = opus_multistream_encode(enc, pcm, 20, data, 472320); - assert(err > 0); + opus_test_assert(err > 0); /* returns -1 */ } opus_multistream_encoder_destroy(enc); @@ -740,7 +739,7 @@ static int surround_analysis_uninit(void) -20992, 25859, 5372, 12040, 13307, -4355,-30213, -9, -6019 }; err = opus_multistream_encode(enc, pcm, 960, data, 7380); - assert(err > 0); + opus_test_assert(err > 0); } opus_multistream_encoder_ctl(enc, OPUS_SET_SIGNAL(OPUS_SIGNAL_MUSIC)); opus_multistream_encoder_ctl(enc, OPUS_SET_VBR(1)); @@ -885,7 +884,7 @@ static int surround_analysis_uninit(void) }; err = opus_multistream_encode(enc, pcm, 1440, data, 7380); /* reads uninitialized data at src/opus_multistream_encoder.c:293 */ - assert(err > 0); + opus_test_assert(err > 0); } opus_multistream_encoder_destroy(enc); return 0; @@ -935,7 +934,7 @@ static int ec_enc_shrink_assert(void) opus_encoder_ctl(enc, OPUS_SET_PACKET_LOSS_PERC(6)); opus_encoder_ctl(enc, OPUS_SET_BITRATE(6000)); data_len = opus_encode(enc, pcm1, 960, data, 2000); - assert(data_len > 0); + opus_test_assert(data_len > 0); opus_encoder_ctl(enc, OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE)); opus_encoder_ctl(enc, OPUS_SET_PREDICTION_DISABLED(1)); @@ -943,12 +942,12 @@ static int ec_enc_shrink_assert(void) opus_encoder_ctl(enc, OPUS_SET_INBAND_FEC(1)); opus_encoder_ctl(enc, OPUS_SET_BITRATE(15600)); data_len = opus_encode(enc, pcm2, 2880, data, 122); - assert(data_len > 0); + opus_test_assert(data_len > 0); opus_encoder_ctl(enc, OPUS_SET_SIGNAL(OPUS_SIGNAL_MUSIC)); opus_encoder_ctl(enc, OPUS_SET_BITRATE(27000)); data_len = opus_encode(enc, pcm3, 2880, data, 122); /* assertion failure */ - assert(data_len > 0); + opus_test_assert(data_len > 0); opus_encoder_destroy(enc); return 0; @@ -970,7 +969,7 @@ static int ec_enc_shrink_assert2(void) { static const short pcm[960] = { 0 }; data_len = opus_encode(enc, pcm, 960, data, 2000); - assert(data_len > 0); + opus_test_assert(data_len > 0); } opus_encoder_ctl(enc, OPUS_SET_SIGNAL(OPUS_SIGNAL_MUSIC)); { @@ -980,7 +979,7 @@ static int ec_enc_shrink_assert2(void) -32768, -32768, 0, 0, -32768, -32768, 0, 0, -32768, -32768 }; data_len = opus_encode(enc, pcm, 480, data, 19); - assert(data_len > 0); + opus_test_assert(data_len > 0); } opus_encoder_destroy(enc); return 0; @@ -1009,14 +1008,14 @@ static int silk_gain_assert(void) opus_encoder_ctl(enc, OPUS_SET_MAX_BANDWIDTH(OPUS_BANDWIDTH_NARROWBAND)); opus_encoder_ctl(enc, OPUS_SET_BITRATE(6000)); data_len = opus_encode(enc, pcm1, 160, data, 1000); - assert(data_len > 0); + opus_test_assert(data_len > 0); opus_encoder_ctl(enc, OPUS_SET_VBR(0)); opus_encoder_ctl(enc, OPUS_SET_COMPLEXITY(0)); opus_encoder_ctl(enc, OPUS_SET_MAX_BANDWIDTH(OPUS_BANDWIDTH_MEDIUMBAND)); opus_encoder_ctl(enc, OPUS_SET_BITRATE(2867)); data_len = opus_encode(enc, pcm2, 960, data, 1000); - assert(data_len > 0); + opus_test_assert(data_len > 0); opus_encoder_destroy(enc); return 0; diff --git a/third_party/opus/src/tests/test_opus_api.c b/third_party/opus/src/tests/test_opus_api.c index fb385c63044d..0e7ed2cc9d7c 100644 --- a/third_party/opus/src/tests/test_opus_api.c +++ b/third_party/opus/src/tests/test_opus_api.c @@ -1298,7 +1298,7 @@ opus_int32 test_enc_api(void) err=opus_encoder_ctl(enc,OPUS_GET_INBAND_FEC(null_int_ptr)); if(err!=OPUS_BAD_ARG)test_failed(); cfgs++; - CHECK_SETGET(OPUS_SET_INBAND_FEC(i),OPUS_GET_INBAND_FEC(&i),-1,2, + CHECK_SETGET(OPUS_SET_INBAND_FEC(i),OPUS_GET_INBAND_FEC(&i),-1,3, 1,0, " OPUS_SET_INBAND_FEC .......................... OK.\n", " OPUS_GET_INBAND_FEC .......................... OK.\n") diff --git a/third_party/opus/src/tests/test_opus_common.h b/third_party/opus/src/tests/test_opus_common.h index d96c7d84a38e..5fb924f4aa95 100644 --- a/third_party/opus/src/tests/test_opus_common.h +++ b/third_party/opus/src/tests/test_opus_common.h @@ -81,5 +81,5 @@ static OPUS_INLINE void _test_failed(const char *file, int line) abort(); } #define test_failed() _test_failed(__FILE__, __LINE__); - +#define opus_test_assert(cond) {if (!(cond)) {test_failed();}} void regression_test(void); diff --git a/third_party/opus/src/tests/test_opus_encode.c b/third_party/opus/src/tests/test_opus_encode.c index 00795a1e29f1..d6e8e2d379f1 100644 --- a/third_party/opus/src/tests/test_opus_encode.c +++ b/third_party/opus/src/tests/test_opus_encode.c @@ -297,6 +297,7 @@ int run_test1(int no_fuzz) /*FIXME: encoder api tests, fs!=48k, mono, VBR*/ fprintf(stdout," Encode+Decode tests.\n"); + fflush(stdout); enc = opus_encoder_create(48000, 2, OPUS_APPLICATION_VOIP, &err); if(err != OPUS_OK || enc==NULL)test_failed(); @@ -466,6 +467,7 @@ int run_test1(int no_fuzz) count++; }while(i<(SSAMPLES-MAX_FRAME_SAMP)); fprintf(stdout," Mode %s FB encode %s, %6d bps OK.\n",mstrings[modes[j]],rc==0?" VBR":rc==1?"CVBR":" CBR",rate); + fflush(stdout); } } @@ -543,6 +545,7 @@ int run_test1(int no_fuzz) count++; }while(i<(SSAMPLES/12-MAX_FRAME_SAMP)); fprintf(stdout," Mode %s NB dual-mono MS encode %s, %6d bps OK.\n",mstrings[modes[j]],rc==0?" VBR":rc==1?"CVBR":" CBR",rate); + fflush(stdout); } } @@ -612,6 +615,7 @@ int run_test1(int no_fuzz) i+=frame_size; }while(i #include #include #include diff --git a/third_party/opus/src/win32/genversion.bat b/third_party/opus/src/win32/genversion.bat index aea55739342d..1def7460b5e0 100755 --- a/third_party/opus/src/win32/genversion.bat +++ b/third_party/opus/src/win32/genversion.bat @@ -1,37 +1,37 @@ -@echo off - -setlocal enableextensions enabledelayedexpansion - -for /f %%v in ('cd "%~dp0.." ^&^& git status ^>NUL 2^>NUL ^&^& git describe --tags --match "v*" --dirty 2^>NUL') do set version=%%v - -if not "%version%"=="" set version=!version:~1! && goto :gotversion - -if exist "%~dp0..\package_version" goto :getversion - -echo Git cannot be found, nor can package_version. Generating unknown version. - -set version=unknown - -goto :gotversion - -:getversion - -for /f "delims== tokens=2" %%v in (%~dps0..\package_version) do set version=%%v -set version=!version:"=! - -:gotversion - -set version=!version: =! -set version_out=#define %~2 "%version%" - -echo %version_out%> "%~1_temp" - -echo n | comp "%~1_temp" "%~1" > NUL 2> NUL - -if not errorlevel 1 goto exit - -copy /y "%~1_temp" "%~1" - -:exit - -del "%~1_temp" +@echo off + +setlocal enableextensions enabledelayedexpansion + +for /f %%v in ('cd "%~dp0.." ^&^& git status ^>NUL 2^>NUL ^&^& git describe --tags --match "v*" --dirty 2^>NUL') do set version=%%v + +if not "%version%"=="" set version=!version:~1! && goto :gotversion + +if exist "%~dp0..\package_version" goto :getversion + +echo Git cannot be found, nor can package_version. Generating unknown version. + +set version=unknown + +goto :gotversion + +:getversion + +for /f "delims== tokens=2" %%v in (%~dps0..\package_version) do set version=%%v +set version=!version:"=! + +:gotversion + +set version=!version: =! +set version_out=#define %~2 "%version%" + +echo %version_out%> "%~1_temp" + +echo n | comp "%~1_temp" "%~1" > NUL 2> NUL + +if not errorlevel 1 goto exit + +copy /y "%~1_temp" "%~1" + +:exit + +del "%~1_temp" diff --git a/third_party/opus/tests/opus_benchmark.cc b/third_party/opus/tests/opus_benchmark.cc index 5bdc10fe5e91..0c7ac005a5cf 100644 --- a/third_party/opus/tests/opus_benchmark.cc +++ b/third_party/opus/tests/opus_benchmark.cc @@ -1,4 +1,4 @@ -// Copyright 2019 The Chromium Authors. All rights reserved. +// Copyright 2019 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file.