From 2edbcf829d93e93c9432503b44a7e33b59a068f2 Mon Sep 17 00:00:00 2001 From: Koichiro Iwao Date: Tue, 2 Apr 2024 10:35:28 +0900 Subject: [PATCH 1/4] Add wyhash.h with modifications by @trishume (cherry picked from commit 70e2f3b928b5e8d7898c4d612efca112839ee0de) --- module/Makefile.am | 1 + module/wyhash.h | 113 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 module/wyhash.h diff --git a/module/Makefile.am b/module/Makefile.am index d628df63..c7cd2a14 100644 --- a/module/Makefile.am +++ b/module/Makefile.am @@ -79,6 +79,7 @@ noinst_HEADERS = \ rdpXv.h \ amd64/funcs_amd64.h \ x86/funcs_x86.h \ + wyhash.h \ $(EXTRA_HEADERS) libxorgxrdp_la_LTLIBRARIES = libxorgxrdp.la diff --git a/module/wyhash.h b/module/wyhash.h new file mode 100644 index 00000000..5afc85f3 --- /dev/null +++ b/module/wyhash.h @@ -0,0 +1,113 @@ +/* Author: Wang Yi + chopped down and converted to older C standard for xorgxrdp +*/ +#ifndef wyhash_final_version +#define wyhash_final_version +#ifndef WYHASH_CONDOM +#define WYHASH_CONDOM 0 +#endif +#include +#include +#if defined(_MSC_VER) && defined(_M_X64) + #include + #pragma intrinsic(_umul128) +#endif +#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) + #define _likely_(x) __builtin_expect(x,1) + #define _unlikely_(x) __builtin_expect(x,0) +#else + #define _likely_(x) (x) + #define _unlikely_(x) (x) +#endif +static __inline__ uint64_t _wyrot(uint64_t x) { return (x>>32)|(x<<32); } +static __inline__ void _wymum(uint64_t *A, uint64_t *B){ +#if defined(__SIZEOF_INT128__) + __uint128_t r; + r=*A; r*=*B; + #if(WYHASH_CONDOM>1) + *A^=(uint64_t)r; *B^=(uint64_t)(r>>64); + #else + *A=(uint64_t)r; *B=(uint64_t)(r>>64); + #endif +#elif defined(_MSC_VER) && defined(_M_X64) + #if(WYHASH_CONDOM>1) + uint64_t a, b; + a=_umul128(*A,*B,&b); + *A^=a; *B^=b; + #else + *A=_umul128(*A,*B,B); + #endif +#else + uint64_t ha, hb, la, lb, hi, lo; + uint64_t rh, rm0, rm1, rl, t, c; + ha=*A>>32; hb=*B>>32; la=(uint32_t)*A; lb=(uint32_t)*B; + rh=ha*hb; rm0=ha*lb; rm1=hb*la; rl=la*lb; t=rl+(rm0<<32); c=t>32)+(rm1>>32)+c; + #if(WYHASH_CONDOM>1) + *A^=lo; *B^=hi; + #else + *A=lo; *B=hi; + #endif +#endif +} +static __inline__ uint64_t _wymix(uint64_t A, uint64_t B){ _wymum(&A,&B); return A^B; } +#ifndef WYHASH_LITTLE_ENDIAN + #if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + #define WYHASH_LITTLE_ENDIAN 1 + #elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + #define WYHASH_LITTLE_ENDIAN 0 + #endif +#endif +#if (WYHASH_LITTLE_ENDIAN) +static __inline__ uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;} +static __inline__ uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return v;} +#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) +static __inline__ uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);} +static __inline__ uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return __builtin_bswap32(v);} +#elif defined(_MSC_VER) +static __inline__ uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);} +static __inline__ uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return _byteswap_ulong(v);} +#endif +static __inline__ uint64_t _wyr3(const uint8_t *p, unsigned k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];} +static __inline__ uint64_t _wyfinish16(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){ +#if(WYHASH_CONDOM>0) + uint64_t a, b; + if(_likely_(i<=8)){ + if(_likely_(i>=4)){ a=_wyr4(p); b=_wyr4(p+i-4); } + else if (_likely_(i)){ a=_wyr3(p,i); b=0; } + else a=b=0; + } + else{ a=_wyr8(p); b=_wyr8(p+i-8); } + return _wymix(secret[1]^len,_wymix(a^secret[1], b^seed)); +#else + #define oneshot_shift ((i<8)*((8-i)<<3)) + return _wymix(secret[1]^len,_wymix((_wyr8(p)<>oneshot_shift)^seed)); +#endif +} + +static __inline__ uint64_t _wyfinish(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){ + if(_likely_(i<=16)) return _wyfinish16(p,len,seed,secret,i); + return _wyfinish(p+16,len,_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed),secret,i-16); +} + +static __inline__ uint64_t wyhash(const void *key, uint64_t len, uint64_t seed, const uint64_t *secret){ + const uint8_t *p; + uint64_t i; + uint64_t see1; + p=(const uint8_t *)key; + i=len; seed^=*secret; + if(_unlikely_(i>64)){ + see1=seed; + do{ + seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed)^_wymix(_wyr8(p+16)^secret[2],_wyr8(p+24)^seed); + see1=_wymix(_wyr8(p+32)^secret[3],_wyr8(p+40)^see1)^_wymix(_wyr8(p+48)^secret[4],_wyr8(p+56)^see1); + p+=64; i-=64; + }while(i>64); + seed^=see1; + } + return _wyfinish(p,len,seed,secret,i); +} +const uint64_t _wyp[5] = {0xa0761d6478bd642full, 0xe7037ed1a0b428dbull, 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull, 0x1d8e4e27c47d124full}; +static __inline__ uint64_t wyhash64(uint64_t A, uint64_t B){ A^=_wyp[0]; B^=_wyp[1]; _wymum(&A,&B); return _wymix(A^_wyp[0],B^_wyp[1]);} +static __inline__ uint64_t wyrand(uint64_t *seed){ *seed+=_wyp[0]; return _wymix(*seed,*seed^_wyp[1]);} +#endif From 4afe359d44e2f2a08b50b72aed7e9a77fe16c75b Mon Sep 17 00:00:00 2001 From: Koichiro Iwao Date: Tue, 2 Apr 2024 10:37:44 +0900 Subject: [PATCH 2/4] Switch to wyhash from CRC for capture tile diff Originally suggested by @trishume at #167. (cherry picked from commit 813e613039bb451da2fe5916b87710b93d4e02da) --- module/rdpCapture.c | 19 +++++++++++-------- module/rdpClientCon.h | 2 +- module/rdpEgl.c | 5 +++-- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/module/rdpCapture.c b/module/rdpCapture.c index 577a4a9d..9e629fdc 100644 --- a/module/rdpCapture.c +++ b/module/rdpCapture.c @@ -46,6 +46,10 @@ capture #include "rdpMisc.h" #include "rdpCapture.h" +#include "wyhash.h" +/* hex digits of pi as a 64 bit int */ +#define WYHASH_SEED 0x3243f6a8885a308dull + #if defined(XORGXRDP_GLAMOR) #include "rdpEgl.h" #include @@ -848,7 +852,7 @@ rdpCapture2(rdpClientCon *clientCon, RegionPtr in_reg, BoxPtr *out_rects, int dst_stride; int crc_offset; int crc_stride; - int crc; + uint64_t crc; int num_crcs; int mon_index; @@ -887,7 +891,7 @@ rdpCapture2(rdpClientCon *clientCon, RegionPtr in_reg, BoxPtr *out_rects, /* resize the crc list */ clientCon->num_rfx_crcs_alloc[mon_index] = num_crcs; free(clientCon->rfx_crcs[mon_index]); - clientCon->rfx_crcs[mon_index] = g_new0(int, num_crcs); + clientCon->rfx_crcs[mon_index] = g_new0(uint64_t, num_crcs); } extents_rect = *rdpRegionExtents(in_reg); @@ -913,7 +917,8 @@ rdpCapture2(rdpClientCon *clientCon, RegionPtr in_reg, BoxPtr *out_rects, } else { - crc = crc_start(); + /* hex digits of pi as a 64 bit int */ + crc = WYHASH_SEED; if (rcode == rgnPART) { LLOGLN(10, ("rdpCapture2: rgnPART")); @@ -922,8 +927,7 @@ rdpCapture2(rdpClientCon *clientCon, RegionPtr in_reg, BoxPtr *out_rects, rdpRegionIntersect(&tile_reg, in_reg, &tile_reg); rects = REGION_RECTS(&tile_reg); num_rects = REGION_NUM_RECTS(&tile_reg); - crc = crc_process_data(crc, rects, - num_rects * sizeof(BoxRec)); + crc = wyhash((const void*)rects, num_rects * sizeof(BoxRec), crc, _wyp); rdpCopyBox_a8r8g8b8_to_yuvalp(x, y, src, src_stride, dst, dst_stride, @@ -939,11 +943,10 @@ rdpCapture2(rdpClientCon *clientCon, RegionPtr in_reg, BoxPtr *out_rects, &rect, 1); } crc_dst = dst + (y << 8) * (dst_stride >> 8) + (x << 8); - crc = crc_process_data(crc, crc_dst, 64 * 64 * 4); - crc = crc_end(crc); + crc = wyhash((const void*)crc_dst, 64 * 64 * 4, crc, _wyp); crc_offset = (y / XRDP_RFX_ALIGN) * crc_stride + (x / XRDP_RFX_ALIGN); - LLOGLN(10, ("rdpCapture2: crc 0x%8.8x 0x%8.8x", + LLOGLN(10, ("rdpCapture2: crc 0x%" PRIx64 " 0x%" PRIx64, crc, clientCon->rfx_crcs[mon_index][crc_offset])); if (crc == clientCon->rfx_crcs[mon_index][crc_offset]) { diff --git a/module/rdpClientCon.h b/module/rdpClientCon.h index 14ae84c3..5ff1de21 100644 --- a/module/rdpClientCon.h +++ b/module/rdpClientCon.h @@ -123,7 +123,7 @@ struct _rdpClientCon RegionPtr dirtyRegion; int num_rfx_crcs_alloc[16]; - int *rfx_crcs[16]; + uint64_t *rfx_crcs[16]; int send_key_frame[16]; /* true = skip drawing */ diff --git a/module/rdpEgl.c b/module/rdpEgl.c index b84f65bd..04866d83 100644 --- a/module/rdpEgl.c +++ b/module/rdpEgl.c @@ -576,7 +576,7 @@ rdpEglOut(rdpClientCon *clientCon, struct rdp_egl *egl, RegionPtr in_reg, /* resize the crc list */ clientCon->num_rfx_crcs_alloc[mon_index] = num_crcs; free(clientCon->rfx_crcs[mon_index]); - clientCon->rfx_crcs[mon_index] = g_new0(int, num_crcs); + clientCon->rfx_crcs[mon_index] = g_new0(uint64_t, num_crcs); } tile_extents_stride = (tile_extents_rect->x2 - tile_extents_rect->x1) / 64; out_rect_index = 0; @@ -614,7 +614,8 @@ rdpEglOut(rdpClientCon *clientCon, struct rdp_egl *egl, RegionPtr in_reg, crc = crc_end(crc); if (crc != crcs[(ly / 64) * tile_extents_stride + (lx / 64)]) { - LLOGLN(0, ("rdpEglOut: error crc no match 0x%8.8x 0x%8.8x", + LLOGLN(0, ("rdpEglOut: error crc no match " + "0x%" PRIx64 " 0x%" PRIx64, crc, crcs[(ly / 64) * tile_extents_stride + (lx / 64)])); } From 5be0be8f53f093e933f8e6d759377bb97843e7e2 Mon Sep 17 00:00:00 2001 From: Koichiro Iwao Date: Wed, 3 Apr 2024 15:22:07 +0900 Subject: [PATCH 3/4] Lazy color convert copy after hash in rdpCapture2 Originally developed @trishume. (cherry picked from commit b9475e81be7d879cb00a6fa0ec34f4c4e9405cd6) --- module/rdpCapture.c | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/module/rdpCapture.c b/module/rdpCapture.c index 9e629fdc..e08c01b4 100644 --- a/module/rdpCapture.c +++ b/module/rdpCapture.c @@ -590,6 +590,23 @@ isShmStatusActive(enum shared_memory_status status) { } } +/******************************************************************************/ +/* copy rects with no error checking */ +static uint64_t +wyhash_rfx_tile(const uint8_t *src, int src_stride, int x, int y, uint64_t seed) +{ + int row; + uint64_t hash; + const uint8_t *s8; + hash = seed; + for(row = 0; row < 64; row++) + { + s8 = src + (y+row) * src_stride + x * 4; + hash = wyhash((const void*)s8, 64 * 4, hash, _wyp); + } + return hash; +} + /******************************************************************************/ static Bool rdpCapture0(rdpClientCon *clientCon, RegionPtr in_reg, BoxPtr *out_rects, @@ -932,19 +949,16 @@ rdpCapture2(rdpClientCon *clientCon, RegionPtr in_reg, BoxPtr *out_rects, src, src_stride, dst, dst_stride, rects, num_rects); + crc_dst = dst + (y << 8) * (dst_stride >> 8) + (x << 8); + crc = wyhash((const void*)crc_dst, 64 * 64 * 4, crc, _wyp); rdpRegionUninit(&tile_reg); } else /* rgnIN */ { LLOGLN(10, ("rdpCapture2: rgnIN")); - rdpCopyBox_a8r8g8b8_to_yuvalp(x, y, - src, src_stride, - dst, dst_stride, - &rect, 1); + crc = wyhash_rfx_tile(src, src_stride, x, y, crc); } - crc_dst = dst + (y << 8) * (dst_stride >> 8) + (x << 8); - crc = wyhash((const void*)crc_dst, 64 * 64 * 4, crc, _wyp); - crc_offset = (y / XRDP_RFX_ALIGN) * crc_stride + crc_offset = (y / XRDP_RFX_ALIGN) * crc_stride + (x / XRDP_RFX_ALIGN); LLOGLN(10, ("rdpCapture2: crc 0x%" PRIx64 " 0x%" PRIx64, crc, clientCon->rfx_crcs[mon_index][crc_offset])); @@ -957,6 +971,14 @@ rdpCapture2(rdpClientCon *clientCon, RegionPtr in_reg, BoxPtr *out_rects, } else { + /* lazily only do this if hash wasn't identical */ + if (rcode != rgnPART) + { + rdpCopyBox_a8r8g8b8_to_yuvalp(x, y, + src, src_stride, + dst, dst_stride, + &rect, 1); + } clientCon->rfx_crcs[mon_index][crc_offset] = crc; (*out_rects)[out_rect_index] = rect; out_rect_index++; From ef8147921219bfc34f0ad5bc197abf310589515e Mon Sep 17 00:00:00 2001 From: Koichiro Iwao Date: Mon, 8 Apr 2024 00:05:42 +0900 Subject: [PATCH 4/4] Tighten loop (cherry picked from commit ac6d867948f034fe20ebd59d0e5cfdcbf48784ce) --- module/rdpCapture.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/module/rdpCapture.c b/module/rdpCapture.c index e08c01b4..334a4880 100644 --- a/module/rdpCapture.c +++ b/module/rdpCapture.c @@ -599,10 +599,11 @@ wyhash_rfx_tile(const uint8_t *src, int src_stride, int x, int y, uint64_t seed) uint64_t hash; const uint8_t *s8; hash = seed; + s8 = src + (y * src_stride) + (x * 4); for(row = 0; row < 64; row++) { - s8 = src + (y+row) * src_stride + x * 4; hash = wyhash((const void*)s8, 64 * 4, hash, _wyp); + s8 += src_stride; } return hash; }