Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch to wyhash from CRC for capture tile diff #301

Merged
merged 4 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions module/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ noinst_HEADERS = \
rdpXv.h \
amd64/funcs_amd64.h \
x86/funcs_x86.h \
wyhash.h \
$(EXTRA_HEADERS)

libxorgxrdp_la_LTLIBRARIES = libxorgxrdp.la
Expand Down
54 changes: 40 additions & 14 deletions module/rdpCapture.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ capture
#include "rdpMisc.h"
#include "rdpCapture.h"

#include "wyhash.h"
/* hex digits of pi as a 64 bit int */
#define WYHASH_SEED 0x3243f6a8885a308dull

#if defined(XORGXRDP_GLAMOR)
#include "rdpEgl.h"
#include <glamor.h>
Expand Down Expand Up @@ -586,6 +590,24 @@ isShmStatusActive(enum shared_memory_status status) {
}
}

/******************************************************************************/
/* copy rects with no error checking */
static uint64_t
wyhash_rfx_tile(const uint8_t *src, int src_stride, int x, int y, uint64_t seed)
{
int row;
uint64_t hash;
const uint8_t *s8;
hash = seed;
s8 = src + (y * src_stride) + (x * 4);
for(row = 0; row < 64; row++)
{
hash = wyhash((const void*)s8, 64 * 4, hash, _wyp);
s8 += src_stride;
}
return hash;
metalefty marked this conversation as resolved.
Show resolved Hide resolved
}

/******************************************************************************/
static Bool
rdpCapture0(rdpClientCon *clientCon, RegionPtr in_reg, BoxPtr *out_rects,
Expand Down Expand Up @@ -848,7 +870,7 @@ rdpCapture2(rdpClientCon *clientCon, RegionPtr in_reg, BoxPtr *out_rects,
int dst_stride;
int crc_offset;
int crc_stride;
int crc;
uint64_t crc;
int num_crcs;
int mon_index;

Expand Down Expand Up @@ -887,7 +909,7 @@ rdpCapture2(rdpClientCon *clientCon, RegionPtr in_reg, BoxPtr *out_rects,
/* resize the crc list */
clientCon->num_rfx_crcs_alloc[mon_index] = num_crcs;
free(clientCon->rfx_crcs[mon_index]);
clientCon->rfx_crcs[mon_index] = g_new0(int, num_crcs);
clientCon->rfx_crcs[mon_index] = g_new0(uint64_t, num_crcs);
}

extents_rect = *rdpRegionExtents(in_reg);
Expand All @@ -913,7 +935,8 @@ rdpCapture2(rdpClientCon *clientCon, RegionPtr in_reg, BoxPtr *out_rects,
}
else
{
crc = crc_start();
/* hex digits of pi as a 64 bit int */
crc = WYHASH_SEED;
if (rcode == rgnPART)
{
LLOGLN(10, ("rdpCapture2: rgnPART"));
Expand All @@ -922,28 +945,23 @@ rdpCapture2(rdpClientCon *clientCon, RegionPtr in_reg, BoxPtr *out_rects,
rdpRegionIntersect(&tile_reg, in_reg, &tile_reg);
rects = REGION_RECTS(&tile_reg);
num_rects = REGION_NUM_RECTS(&tile_reg);
crc = crc_process_data(crc, rects,
num_rects * sizeof(BoxRec));
crc = wyhash((const void*)rects, num_rects * sizeof(BoxRec), crc, _wyp);
rdpCopyBox_a8r8g8b8_to_yuvalp(x, y,
src, src_stride,
dst, dst_stride,
rects, num_rects);
crc_dst = dst + (y << 8) * (dst_stride >> 8) + (x << 8);
crc = wyhash((const void*)crc_dst, 64 * 64 * 4, crc, _wyp);
rdpRegionUninit(&tile_reg);
}
else /* rgnIN */
{
LLOGLN(10, ("rdpCapture2: rgnIN"));
rdpCopyBox_a8r8g8b8_to_yuvalp(x, y,
src, src_stride,
dst, dst_stride,
&rect, 1);
crc = wyhash_rfx_tile(src, src_stride, x, y, crc);
}
crc_dst = dst + (y << 8) * (dst_stride >> 8) + (x << 8);
crc = crc_process_data(crc, crc_dst, 64 * 64 * 4);
crc = crc_end(crc);
crc_offset = (y / XRDP_RFX_ALIGN) * crc_stride
crc_offset = (y / XRDP_RFX_ALIGN) * crc_stride
+ (x / XRDP_RFX_ALIGN);
LLOGLN(10, ("rdpCapture2: crc 0x%8.8x 0x%8.8x",
LLOGLN(10, ("rdpCapture2: crc 0x%" PRIx64 " 0x%" PRIx64,
crc, clientCon->rfx_crcs[mon_index][crc_offset]));
if (crc == clientCon->rfx_crcs[mon_index][crc_offset])
{
Expand All @@ -954,6 +972,14 @@ rdpCapture2(rdpClientCon *clientCon, RegionPtr in_reg, BoxPtr *out_rects,
}
else
{
/* lazily only do this if hash wasn't identical */
if (rcode != rgnPART)
{
rdpCopyBox_a8r8g8b8_to_yuvalp(x, y,
src, src_stride,
dst, dst_stride,
&rect, 1);
}
clientCon->rfx_crcs[mon_index][crc_offset] = crc;
(*out_rects)[out_rect_index] = rect;
out_rect_index++;
Expand Down
2 changes: 1 addition & 1 deletion module/rdpClientCon.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ struct _rdpClientCon
RegionPtr dirtyRegion;

int num_rfx_crcs_alloc[16];
int *rfx_crcs[16];
uint64_t *rfx_crcs[16];

/* true = skip drawing */
int suppress_output;
Expand Down
5 changes: 3 additions & 2 deletions module/rdpEgl.c
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,7 @@ rdpEglOut(rdpClientCon *clientCon, struct rdp_egl *egl, RegionPtr in_reg,
/* resize the crc list */
clientCon->num_rfx_crcs_alloc[mon_index] = num_crcs;
free(clientCon->rfx_crcs[mon_index]);
clientCon->rfx_crcs[mon_index] = g_new0(int, num_crcs);
clientCon->rfx_crcs[mon_index] = g_new0(uint64_t, num_crcs);
}
tile_extents_stride = (tile_extents_rect->x2 - tile_extents_rect->x1) / 64;
out_rect_index = 0;
Expand Down Expand Up @@ -614,7 +614,8 @@ rdpEglOut(rdpClientCon *clientCon, struct rdp_egl *egl, RegionPtr in_reg,
crc = crc_end(crc);
if (crc != crcs[(ly / 64) * tile_extents_stride + (lx / 64)])
{
LLOGLN(0, ("rdpEglOut: error crc no match 0x%8.8x 0x%8.8x",
LLOGLN(0, ("rdpEglOut: error crc no match "
"0x%" PRIx64 " 0x%" PRIx64,
crc,
crcs[(ly / 64) * tile_extents_stride + (lx / 64)]));
}
Expand Down
113 changes: 113 additions & 0 deletions module/wyhash.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/* Author: Wang Yi <[email protected]>
chopped down and converted to older C standard for xorgxrdp
*/
#ifndef wyhash_final_version
#define wyhash_final_version
#ifndef WYHASH_CONDOM
#define WYHASH_CONDOM 0
#endif
#include <stdint.h>
#include <string.h>
#if defined(_MSC_VER) && defined(_M_X64)
#include <intrin.h>
#pragma intrinsic(_umul128)
#endif
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
#define _likely_(x) __builtin_expect(x,1)
#define _unlikely_(x) __builtin_expect(x,0)
#else
#define _likely_(x) (x)
#define _unlikely_(x) (x)
#endif
static __inline__ uint64_t _wyrot(uint64_t x) { return (x>>32)|(x<<32); }
static __inline__ void _wymum(uint64_t *A, uint64_t *B){
#if defined(__SIZEOF_INT128__)
__uint128_t r;
r=*A; r*=*B;
#if(WYHASH_CONDOM>1)
*A^=(uint64_t)r; *B^=(uint64_t)(r>>64);
#else
*A=(uint64_t)r; *B=(uint64_t)(r>>64);
#endif
#elif defined(_MSC_VER) && defined(_M_X64)
#if(WYHASH_CONDOM>1)
uint64_t a, b;
a=_umul128(*A,*B,&b);
*A^=a; *B^=b;
#else
*A=_umul128(*A,*B,B);
#endif
#else
uint64_t ha, hb, la, lb, hi, lo;
uint64_t rh, rm0, rm1, rl, t, c;
ha=*A>>32; hb=*B>>32; la=(uint32_t)*A; lb=(uint32_t)*B;
rh=ha*hb; rm0=ha*lb; rm1=hb*la; rl=la*lb; t=rl+(rm0<<32); c=t<rl;
lo=t+(rm1<<32); c+=lo<t; hi=rh+(rm0>>32)+(rm1>>32)+c;
#if(WYHASH_CONDOM>1)
*A^=lo; *B^=hi;
#else
*A=lo; *B=hi;
#endif
#endif
}
static __inline__ uint64_t _wymix(uint64_t A, uint64_t B){ _wymum(&A,&B); return A^B; }
#ifndef WYHASH_LITTLE_ENDIAN
#if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#define WYHASH_LITTLE_ENDIAN 1
#elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#define WYHASH_LITTLE_ENDIAN 0
#endif
#endif
#if (WYHASH_LITTLE_ENDIAN)
static __inline__ uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;}
static __inline__ uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return v;}
#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
static __inline__ uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);}
static __inline__ uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return __builtin_bswap32(v);}
#elif defined(_MSC_VER)
static __inline__ uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);}
static __inline__ uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return _byteswap_ulong(v);}
#endif
static __inline__ uint64_t _wyr3(const uint8_t *p, unsigned k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];}
static __inline__ uint64_t _wyfinish16(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){
#if(WYHASH_CONDOM>0)
uint64_t a, b;
if(_likely_(i<=8)){
if(_likely_(i>=4)){ a=_wyr4(p); b=_wyr4(p+i-4); }
else if (_likely_(i)){ a=_wyr3(p,i); b=0; }
else a=b=0;
}
else{ a=_wyr8(p); b=_wyr8(p+i-8); }
return _wymix(secret[1]^len,_wymix(a^secret[1], b^seed));
#else
#define oneshot_shift ((i<8)*((8-i)<<3))
return _wymix(secret[1]^len,_wymix((_wyr8(p)<<oneshot_shift)^secret[1],(_wyr8(p+i-8)>>oneshot_shift)^seed));
#endif
}

static __inline__ uint64_t _wyfinish(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){
if(_likely_(i<=16)) return _wyfinish16(p,len,seed,secret,i);
return _wyfinish(p+16,len,_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed),secret,i-16);
}

static __inline__ uint64_t wyhash(const void *key, uint64_t len, uint64_t seed, const uint64_t *secret){
const uint8_t *p;
uint64_t i;
uint64_t see1;
p=(const uint8_t *)key;
i=len; seed^=*secret;
if(_unlikely_(i>64)){
see1=seed;
do{
seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed)^_wymix(_wyr8(p+16)^secret[2],_wyr8(p+24)^seed);
see1=_wymix(_wyr8(p+32)^secret[3],_wyr8(p+40)^see1)^_wymix(_wyr8(p+48)^secret[4],_wyr8(p+56)^see1);
p+=64; i-=64;
}while(i>64);
seed^=see1;
}
return _wyfinish(p,len,seed,secret,i);
}
const uint64_t _wyp[5] = {0xa0761d6478bd642full, 0xe7037ed1a0b428dbull, 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull, 0x1d8e4e27c47d124full};
static __inline__ uint64_t wyhash64(uint64_t A, uint64_t B){ A^=_wyp[0]; B^=_wyp[1]; _wymum(&A,&B); return _wymix(A^_wyp[0],B^_wyp[1]);}
static __inline__ uint64_t wyrand(uint64_t *seed){ *seed+=_wyp[0]; return _wymix(*seed,*seed^_wyp[1]);}
#endif
Loading