From e369e21c112a5b6153ba47ef20cb0fd0ad8db92d Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Sat, 23 Nov 2024 20:16:42 +0000 Subject: [PATCH 1/4] Prototype linear HDR error metrics. Prior to this change the HDR code path computed color error, weight error, and final encoding error, by simply using the LNS encoded data as linear data values. --- Source/astcenc_compress_symbolic.cpp | 3 ++- Source/astcenc_decompress_symbolic.cpp | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/Source/astcenc_compress_symbolic.cpp b/Source/astcenc_compress_symbolic.cpp index 98d249512..eb4fc9172 100644 --- a/Source/astcenc_compress_symbolic.cpp +++ b/Source/astcenc_compress_symbolic.cpp @@ -368,8 +368,9 @@ static float compress_symbolic_block_for_partition_1plane( int max_weight_quant = astc::min(static_cast(QUANT_32), quant_limit); + bool is_hdr = (config.profile == ASTCENC_PRF_HDR) || (config.profile == ASTCENC_PRF_HDR_RGB_LDR_A); auto compute_difference = &compute_symbolic_block_difference_1plane; - if ((partition_count == 1) && !(config.flags & ASTCENC_FLG_MAP_RGBM)) + if ((partition_count == 1) && !(config.flags & ASTCENC_FLG_MAP_RGBM) && !is_hdr) { compute_difference = &compute_symbolic_block_difference_1plane_1partition; } diff --git a/Source/astcenc_decompress_symbolic.cpp b/Source/astcenc_decompress_symbolic.cpp index e7791eef6..583ff3144 100644 --- a/Source/astcenc_decompress_symbolic.cpp +++ b/Source/astcenc_decompress_symbolic.cpp @@ -354,6 +354,7 @@ float compute_symbolic_block_difference_2plane( ep0, ep1); vmask4 u8_mask = get_u8_component_mask(config.profile, blk); + vmask4 lns_mask(rgb_lns, rgb_lns, rgb_lns, a_lns); // Unpack and compute error for each texel in the partition unsigned int texel_count = bsd.texel_count; @@ -362,8 +363,15 @@ float compute_symbolic_block_difference_2plane( vint4 weight = select(vint4(plane1_weights[i]), vint4(plane2_weights[i]), plane2_mask); vint4 colori = lerp_color_int(u8_mask, ep0, ep1, weight); + #if 0 vfloat4 color = int_to_float(colori); vfloat4 oldColor = blk.texel(i); + #else + // TODO: Hack to force linear HDR RGB image error analysis + vfloat4 color = decode_texel(colori, lns_mask); + vfloat4 oldColor = float16_to_float(lns_to_sf16(float_to_int(blk.texel(i)))); + oldColor.set_lane<3>(1.0f); + #endif // Compare error using a perceptual decode metric for RGBM textures if (config.flags & ASTCENC_FLG_MAP_RGBM) @@ -451,6 +459,8 @@ float compute_symbolic_block_difference_1plane( rgb_lns, a_lns, ep0, ep1); + vmask4 lns_mask(rgb_lns, rgb_lns, rgb_lns, a_lns); + // Unpack and compute error for each texel in the partition unsigned int texel_count = pi.partition_texel_count[i]; for (unsigned int j = 0; j < texel_count; j++) @@ -459,8 +469,15 @@ float compute_symbolic_block_difference_1plane( vint4 colori = lerp_color_int(u8_mask, ep0, ep1, vint4(plane1_weights[tix])); + #if 0 vfloat4 color = int_to_float(colori); vfloat4 oldColor = blk.texel(tix); + #else + // TODO: Hack to force linear HDR RGB image error analysis + vfloat4 color = decode_texel(colori, lns_mask); + vfloat4 oldColor = float16_to_float(lns_to_sf16(float_to_int(blk.texel(tix)))); + oldColor.set_lane<3>(1.0f); + #endif // Compare error using a perceptual decode metric for RGBM textures if (config.flags & ASTCENC_FLG_MAP_RGBM) From 2e59e2a06be4b9532714644681f95fd40dbda907 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Sat, 23 Nov 2024 22:58:29 +0000 Subject: [PATCH 2/4] Use relative sum of squares error for HDR textures HDR texture values are stored logarithmically. Using absolute sum of squares on the logarithmic values causes the compressor to spend too much effort preserving imperceptible shifts in dark channel values at the expense of bright values in the same block. This performs poorly in blocks with sharp luminance changes (dark texels) and in blocks with saturated color values (dark channels in bright pixels). Using absolute sum of squares on linearized HDR values avoids the compressor fixating on dark values, but instead causes the compressor to spend too much effort preserving bright values. This is because the errors in the bright channels can be orders of magnitude bigger than the errors in the dark channels, and dark values can end up quantizing close to black. Using relative sum of square on the logarithmic values, proposed by Ryg in the blog below, encourages the compressor to find a balance of relative error across the whole block, favoring neither light nor dark channels. https://fgiesen.wordpress.com/2024/11/14/mrsse/ --- Source/astcenc_compress_symbolic.cpp | 4 +++ Source/astcenc_decompress_symbolic.cpp | 46 ++++++++++++++------------ 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/Source/astcenc_compress_symbolic.cpp b/Source/astcenc_compress_symbolic.cpp index eb4fc9172..946caa275 100644 --- a/Source/astcenc_compress_symbolic.cpp +++ b/Source/astcenc_compress_symbolic.cpp @@ -633,6 +633,7 @@ static float compress_symbolic_block_for_partition_1plane( if (errorval < best_errorval_in_scb) { + trace_add_data("select", "1"); best_errorval_in_scb = errorval; workscb.errorval = errorval; scb = workscb; @@ -681,6 +682,7 @@ static float compress_symbolic_block_for_partition_1plane( if (errorval < best_errorval_in_scb) { + trace_add_data("select", "1"); best_errorval_in_scb = errorval; workscb.errorval = errorval; scb = workscb; @@ -967,6 +969,7 @@ static float compress_symbolic_block_for_partition_2planes( if (errorval < best_errorval_in_scb) { + trace_add_data("select", "1"); best_errorval_in_scb = errorval; workscb.errorval = errorval; scb = workscb; @@ -1016,6 +1019,7 @@ static float compress_symbolic_block_for_partition_2planes( if (errorval < best_errorval_in_scb) { + trace_add_data("select", "1"); best_errorval_in_scb = errorval; workscb.errorval = errorval; scb = workscb; diff --git a/Source/astcenc_decompress_symbolic.cpp b/Source/astcenc_decompress_symbolic.cpp index 583ff3144..f7d739cf6 100644 --- a/Source/astcenc_decompress_symbolic.cpp +++ b/Source/astcenc_decompress_symbolic.cpp @@ -363,15 +363,8 @@ float compute_symbolic_block_difference_2plane( vint4 weight = select(vint4(plane1_weights[i]), vint4(plane2_weights[i]), plane2_mask); vint4 colori = lerp_color_int(u8_mask, ep0, ep1, weight); - #if 0 vfloat4 color = int_to_float(colori); vfloat4 oldColor = blk.texel(i); - #else - // TODO: Hack to force linear HDR RGB image error analysis - vfloat4 color = decode_texel(colori, lns_mask); - vfloat4 oldColor = float16_to_float(lns_to_sf16(float_to_int(blk.texel(i)))); - oldColor.set_lane<3>(1.0f); - #endif // Compare error using a perceptual decode metric for RGBM textures if (config.flags & ASTCENC_FLG_MAP_RGBM) @@ -403,11 +396,19 @@ float compute_symbolic_block_difference_2plane( ); } - vfloat4 error = oldColor - color; - error = min(abs(error), 1e15f); - error = error * error; + // Compute sum of squared errors, weighted by channel weight + vfloat4 error = (oldColor - color); + error = dot(error, error * blk.channel_weight); - summa += min(dot(error, blk.channel_weight), ERROR_CALC_DEFAULT); + // Convert this relative sum of squared error for HDR to avoid light + // channels dominating the error calculations. + // See https://fgiesen.wordpress.com/2024/11/14/mrsse/ + if (any(lns_mask)) + { + error = error / (dot(oldColor, oldColor) + 1e-10f); + } + + summa += min(error, ERROR_CALC_DEFAULT); } return summa.lane<0>(); @@ -469,15 +470,8 @@ float compute_symbolic_block_difference_1plane( vint4 colori = lerp_color_int(u8_mask, ep0, ep1, vint4(plane1_weights[tix])); - #if 0 vfloat4 color = int_to_float(colori); vfloat4 oldColor = blk.texel(tix); - #else - // TODO: Hack to force linear HDR RGB image error analysis - vfloat4 color = decode_texel(colori, lns_mask); - vfloat4 oldColor = float16_to_float(lns_to_sf16(float_to_int(blk.texel(tix)))); - oldColor.set_lane<3>(1.0f); - #endif // Compare error using a perceptual decode metric for RGBM textures if (config.flags & ASTCENC_FLG_MAP_RGBM) @@ -509,11 +503,19 @@ float compute_symbolic_block_difference_1plane( ); } - vfloat4 error = oldColor - color; - error = min(abs(error), 1e15f); - error = error * error; + // Compute sum of squared errors, weighted by channel weight + vfloat4 error = (oldColor - color); + error = dot(error, error * blk.channel_weight); + + // Convert this relative sum of squared error for HDR to avoid light + // channels dominating the error calculations + // See https://fgiesen.wordpress.com/2024/11/14/mrsse/ + if (any(lns_mask)) + { + error = error / (dot(oldColor, oldColor) + 1e-10f); + } - summa += min(dot(error, blk.channel_weight), ERROR_CALC_DEFAULT); + summa += min(error, ERROR_CALC_DEFAULT); } } From 1a3d5580091911be88b7954424943370f43fb4dc Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Sat, 23 Nov 2024 23:25:38 +0000 Subject: [PATCH 3/4] Remove trace points --- Source/astcenc_compress_symbolic.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Source/astcenc_compress_symbolic.cpp b/Source/astcenc_compress_symbolic.cpp index 946caa275..eb4fc9172 100644 --- a/Source/astcenc_compress_symbolic.cpp +++ b/Source/astcenc_compress_symbolic.cpp @@ -633,7 +633,6 @@ static float compress_symbolic_block_for_partition_1plane( if (errorval < best_errorval_in_scb) { - trace_add_data("select", "1"); best_errorval_in_scb = errorval; workscb.errorval = errorval; scb = workscb; @@ -682,7 +681,6 @@ static float compress_symbolic_block_for_partition_1plane( if (errorval < best_errorval_in_scb) { - trace_add_data("select", "1"); best_errorval_in_scb = errorval; workscb.errorval = errorval; scb = workscb; @@ -969,7 +967,6 @@ static float compress_symbolic_block_for_partition_2planes( if (errorval < best_errorval_in_scb) { - trace_add_data("select", "1"); best_errorval_in_scb = errorval; workscb.errorval = errorval; scb = workscb; @@ -1019,7 +1016,6 @@ static float compress_symbolic_block_for_partition_2planes( if (errorval < best_errorval_in_scb) { - trace_add_data("select", "1"); best_errorval_in_scb = errorval; workscb.errorval = errorval; scb = workscb; From f959f7624ee18fc4b7dfd1e4df22f1bfbe15a978 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Sat, 23 Nov 2024 23:28:41 +0000 Subject: [PATCH 4/4] Use scalar test not any(vmask) --- Source/astcenc_decompress_symbolic.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Source/astcenc_decompress_symbolic.cpp b/Source/astcenc_decompress_symbolic.cpp index f7d739cf6..91e6b444c 100644 --- a/Source/astcenc_decompress_symbolic.cpp +++ b/Source/astcenc_decompress_symbolic.cpp @@ -354,7 +354,7 @@ float compute_symbolic_block_difference_2plane( ep0, ep1); vmask4 u8_mask = get_u8_component_mask(config.profile, blk); - vmask4 lns_mask(rgb_lns, rgb_lns, rgb_lns, a_lns); + bool any_lns = rgb_lns || a_lns; // Unpack and compute error for each texel in the partition unsigned int texel_count = bsd.texel_count; @@ -403,8 +403,9 @@ float compute_symbolic_block_difference_2plane( // Convert this relative sum of squared error for HDR to avoid light // channels dominating the error calculations. // See https://fgiesen.wordpress.com/2024/11/14/mrsse/ - if (any(lns_mask)) + if (any_lns) { + // TODO: Divisor could be precomputed at load time error = error / (dot(oldColor, oldColor) + 1e-10f); } @@ -460,7 +461,7 @@ float compute_symbolic_block_difference_1plane( rgb_lns, a_lns, ep0, ep1); - vmask4 lns_mask(rgb_lns, rgb_lns, rgb_lns, a_lns); + bool any_lns = rgb_lns || a_lns; // Unpack and compute error for each texel in the partition unsigned int texel_count = pi.partition_texel_count[i]; @@ -510,8 +511,9 @@ float compute_symbolic_block_difference_1plane( // Convert this relative sum of squared error for HDR to avoid light // channels dominating the error calculations // See https://fgiesen.wordpress.com/2024/11/14/mrsse/ - if (any(lns_mask)) + if (any_lns) { + // TODO: Divisor could be precomputed at load time error = error / (dot(oldColor, oldColor) + 1e-10f); }