Skip to content

Commit

Permalink
Optimized SRTP stream lookup with SSE2.
Browse files Browse the repository at this point in the history
Stream lookup by SSRC is now performed using SSE2 intrinsics, which
is considerably faster when there are many streams in the list. Although
the lookup still has linear complexity, its absolute times are reduced
and with tens to hundreds elements are lower or comparable with a typical
rb-tree equivalent.

Expected stream lookup performance of scalar array-based implementation
and its SSE2 version compared to the list-based implementation that was
used previously:

SSRCs    speedup (scalar)   speedup (SSE2)

1        0.39x              0.22x
3        0.57x              0.23x
5        0.69x              0.62x
10       0.77x              1.43x
20       0.86x              2.38x
30       0.87x              3.44x
50       1.13x              6.21x
100      1.25x              8.51x
200      1.30x              9.83x

Performance tested on an Intel Core i7 2600K CPU.
  • Loading branch information
Lastique committed Jan 21, 2022
1 parent 372491b commit 810e795
Show file tree
Hide file tree
Showing 3 changed files with 176 additions and 16 deletions.
22 changes: 6 additions & 16 deletions srtp/stream_list.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@
#include "err.h"
#include "alloc.h"

#if defined(__SSE2__)
#include "stream_list_sse2.h"
#else
#include "stream_list_generic.h"
#endif

/*
* Initializes an empty list of streams
*/
Expand All @@ -63,22 +69,6 @@ void srtp_stream_list_init(srtp_stream_list_t *streams)
streams->capacity = 0u;
}

/*
* Returns an index of the stream corresponding to ssrc,
* or >= streams->size if no stream exists for that ssrc.
*/
uint32_t srtp_stream_list_find(const srtp_stream_list_t *streams, uint32_t ssrc)
{
/* walk down list until ssrc is found */
uint32_t pos = 0u, n = streams->size;
for (; pos < n; ++pos) {
if (streams->ssrcs[pos] == ssrc)
break;
}

return pos;
}

/*
* Reserves storage to be able to store at least the specified number
* of elements.
Expand Down
60 changes: 60 additions & 0 deletions srtp/stream_list_generic.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* stream_list_generic.h
*
* SRTP stream list generic implementation
*
* Andrey Semashev
*/
/*
*
* Copyright (c) 2022, Cisco Systems, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* Neither the name of the Cisco Systems, Inc. nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/

/* NOTE: This file is intended to be included only once, in stream_list.c */

/*
* Returns an index of the stream corresponding to ssrc,
* or >= streams->size if no stream exists for that ssrc.
*/
uint32_t srtp_stream_list_find(const srtp_stream_list_t *streams, uint32_t ssrc)
{
/* walk down list until ssrc is found */
uint32_t pos = 0u, n = streams->size;
for (; pos < n; ++pos) {
if (streams->ssrcs[pos] == ssrc)
break;
}

return pos;
}
110 changes: 110 additions & 0 deletions srtp/stream_list_sse2.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/*
* stream_list.c
*
* SRTP stream list SSE2 implementation
*
* Andrey Semashev
*/
/*
*
* Copyright (c) 2022, Cisco Systems, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* Neither the name of the Cisco Systems, Inc. nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/

/* NOTE: This file is intended to be included only once, in stream_list.c */

#include <emmintrin.h>
#if defined(_MSC_VER)
#include <intrin.h>
#endif

/*
* Returns an index of the stream corresponding to ssrc,
* or >= streams->size if no stream exists for that ssrc.
*/
uint32_t srtp_stream_list_find(const srtp_stream_list_t *streams, uint32_t ssrc)
{
const uint32_t *const ssrcs = streams->ssrcs;
const __m128i mm_ssrc = _mm_set1_epi32(ssrc);
uint32_t pos = 0u, n = (streams->size + 7u) & ~(uint32_t)(7u);
for (uint32_t m = n & ~(uint32_t)(15u); pos < m; pos += 16u) {
__m128i mm1 = _mm_loadu_si128((const __m128i *)(ssrcs + pos));
__m128i mm2 = _mm_loadu_si128((const __m128i *)(ssrcs + pos + 4u));
__m128i mm3 = _mm_loadu_si128((const __m128i *)(ssrcs + pos + 8u));
__m128i mm4 = _mm_loadu_si128((const __m128i *)(ssrcs + pos + 12u));
mm1 = _mm_cmpeq_epi32(mm1, mm_ssrc);
mm2 = _mm_cmpeq_epi32(mm2, mm_ssrc);
mm3 = _mm_cmpeq_epi32(mm3, mm_ssrc);
mm4 = _mm_cmpeq_epi32(mm4, mm_ssrc);
mm1 = _mm_packs_epi32(mm1, mm2);
mm3 = _mm_packs_epi32(mm3, mm4);
mm1 = _mm_packs_epi16(mm1, mm3);
uint32_t mask = _mm_movemask_epi8(mm1);
if (mask) {
#if defined(_MSC_VER)
unsigned long bit_pos;
_BitScanForward(&bit_pos, mask);
pos += bit_pos;
#else
pos += __builtin_ctz(mask);
#endif

goto done;
}
}

if (pos < n) {
__m128i mm1 = _mm_loadu_si128((const __m128i *)(ssrcs + pos));
__m128i mm2 = _mm_loadu_si128((const __m128i *)(ssrcs + pos + 4u));
mm1 = _mm_cmpeq_epi32(mm1, mm_ssrc);
mm2 = _mm_cmpeq_epi32(mm2, mm_ssrc);
mm1 = _mm_packs_epi32(mm1, mm2);

uint32_t mask = _mm_movemask_epi8(mm1);
if (mask) {
#if defined(_MSC_VER)
unsigned long bit_pos;
_BitScanForward(&bit_pos, mask);
pos += bit_pos / 2u;
#else
pos += __builtin_ctz(mask) / 2u;
#endif
goto done;
}

pos += 8u;
}

done:
return pos;
}

0 comments on commit 810e795

Please sign in to comment.