Skip to content

Commit

Permalink
Remove global -ffast-math flag, but apply fast math to just color_hel…
Browse files Browse the repository at this point in the history
…pers.h/color_helpers.cpp

Turn on FTZ/DAZ inside create_color_mgmt_luts()
  • Loading branch information
sharkautarch committed Aug 30, 2024
1 parent 467e12c commit 916e09f
Show file tree
Hide file tree
Showing 7 changed files with 79 additions and 8 deletions.
4 changes: 0 additions & 4 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,6 @@ add_project_arguments(cppc.get_supported_arguments([
'-Wno-missing-braces',
]), language: 'cpp')

add_project_arguments(cppc.get_supported_arguments([
'-ffast-math',
]), language: 'cpp')

pipewire_dep = dependency('libpipewire-0.3', required: get_option('pipewire'))
librt_dep = cppc.find_library('rt', required : get_option('pipewire'))
hwdata_dep = dependency('hwdata', required : false)
Expand Down
61 changes: 61 additions & 0 deletions src/Utils/Directives.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#pragma once

namespace gamescope::Directives {

struct FlagSwitcher {
unsigned long long m_csr;
FlagSwitcher();

~FlagSwitcher();
};
}

#if defined(__x86__) || defined(__x86_64__)
# include <xmmintrin.h>
# include <pmmintrin.h>
# define SET_FAST_MATH_FLAGS gamescope::Directives::FlagSwitcher switcher{};
# define SET_FLUSH_AND_ZERO_TO_ON(csr) _mm_setcsr( csr | (_MM_DENORMALS_ZERO_ON | _MM_FLUSH_ZERO_ON) )

gamescope::Directives::FlagSwitcher::FlagSwitcher() : m_csr{_mm_getcsr()} {
SET_FLUSH_AND_ZERO_TO_ON( static_cast<unsigned int>(m_csr) );
}

gamescope::Directives::FlagSwitcher::~FlagSwitcher() {
_mm_setcsr( static_cast<unsigned int>(m_csr) );
}

#elif defined(__aarch64__) && __has_builtin(__builtin_aarch64_get_fpcr64) && __has_builtin(__builtin_aarch64_set_fpcr64)
# define SET_FAST_MATH_FLAGS gamescope::Directives::FlagSwitcher switcher{};

static constexpr unsigned long long fz_bit = 0x1'00'00'00;
//based on this stuff: https://github.com/DLTcollab/sse2neon/blob/706d3b58025364c2371cafcf9b16e32ff7e630ed/sse2neon.h#L2433
//and this: https://stackoverflow.com/a/59001820
static constexpr unsigned long long fz16_bit = 0x8'00'00;

gamescope::Directives::FlagSwitcher::FlagSwitcher() : m_csr{__builtin_aarch64_get_fpcr64()} {
__builtin_aarch64_set_fpcr64(m_csr | fz_bit | fz16_bit);
}

gamescope::Directives::FlagSwitcher::~FlagSwitcher() {
__builtin_aarch64_set_fpcr64(m_csr);
}

#else
# define SET_FAST_MATH_FLAGS

#endif

#ifdef __clang__
# define FAST_MATH_ON _Pragma("float_control(push)"); \
_Pragma("float_control(precise, off)") //https://clang.llvm.org/docs/LanguageExtensions.html#extensions-to-specify-floating-point-flags
# define FAST_MATH_OFF _Pragma("float_control(pop)")

#elif defined(__GNUC__)
# define FAST_MATH_ON _Pragma("GCC push_options"); \
_Pragma("GCC optimize(\"-ffast-math\")")
# define FAST_MATH_OFF _Pragma("GCC pop_options")

#else
# define FAST_MATH_ON
# define FAST_MATH_OFF
#endif
1 change: 1 addition & 0 deletions src/color_bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ lut3d_t lut3d_float;

static void BenchmarkCalcColorTransform(EOTF inputEOTF, benchmark::State &state)
{
SET_FAST_MATH_FLAGS
const primaries_t primaries = { { 0.602f, 0.355f }, { 0.340f, 0.574f }, { 0.164f, 0.121f } };
const glm::vec2 white = { 0.3070f, 0.3220f };
const glm::vec2 destVirtualWhite = { 0.f, 0.f };
Expand Down
6 changes: 5 additions & 1 deletion src/color_helpers.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#define COLOR_HELPERS_CPP
#include "color_helpers_impl.h"

FAST_MATH_ON

#include <algorithm>
#include <cstdint>
#include <cmath>
Expand Down Expand Up @@ -214,7 +216,7 @@ inline void lerp_rgb(float* out, const float* a, const float* b, const float* c,

inline float ClampAndSanitize( float a, float min, float max )
{
#ifndef __FAST_MATH__
#if !( defined(__FAST_MATH__) || defined(__FINITE_MATH_ONLY__) )
return std::isfinite( a ) ? std::min(std::max(min, a), max) : min;
#else
return std::min(std::max(min, a), max);
Expand Down Expand Up @@ -910,3 +912,5 @@ const glm::mat3 k_xyz_from_2020 = normalised_primary_matrix( displaycolorimetry_
const glm::mat3 k_2020_from_xyz = glm::inverse( k_xyz_from_2020 );

const glm::mat3 k_2020_from_709 = k_2020_from_xyz * k_xyz_from_709;

FAST_MATH_OFF
8 changes: 7 additions & 1 deletion src/color_helpers_impl.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
#pragma once
#include "Utils/Directives.h"

FAST_MATH_ON

#include "color_helpers.h"

namespace rendervulkan {
Expand All @@ -17,4 +21,6 @@ namespace ns_color_tests {

#ifdef COLOR_HELPERS_CPP
REGISTER_LUT_EDGE_SIZE(rendervulkan::s_nLutEdgeSize3d);
#endif
#endif

FAST_MATH_OFF
4 changes: 3 additions & 1 deletion src/color_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include "color_helpers.h"
#include "color_helpers_impl.h"
#include <cstdio>

//#include <glm/ext.hpp>
Expand All @@ -16,6 +16,7 @@ lut3d_t lut3d_float;
static void BenchmarkCalcColorTransform(EOTF inputEOTF, benchmark::State &state)
{
SET_FAST_MATH_FLAGS
const primaries_t primaries = { { 0.602f, 0.355f }, { 0.340f, 0.574f }, { 0.164f, 0.121f } };
const glm::vec2 white = { 0.3070f, 0.3220f };
Expand Down Expand Up @@ -232,6 +233,7 @@ void test_eetf2390_mono()

int main(int argc, char* argv[])
{
SET_FAST_MATH_FLAGS
printf("color_tests\n");
// test_eetf2390_mono();
color_tests();
Expand Down
3 changes: 2 additions & 1 deletion src/steamcompmgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ static const gamescope_color_mgmt_t k_ScreenshotColorMgmtHDR =
static void
create_color_mgmt_luts(const gamescope_color_mgmt_t& newColorMgmt, gamescope_color_mgmt_luts outColorMgmtLuts[ EOTF_Count ])
{
SET_FAST_MATH_FLAGS
const displaycolorimetry_t& displayColorimetry = newColorMgmt.displayColorimetry;
const displaycolorimetry_t& outputEncodingColorimetry = newColorMgmt.outputEncodingColorimetry;

Expand Down Expand Up @@ -5069,7 +5070,7 @@ steamcompmgr_latch_frame_done( steamcompmgr_win_t *w, uint64_t vblank_idx )

static inline float santitize_float( float f )
{
#ifndef __FAST_MATH__
#if !( defined(__FAST_MATH__) || defined(__FINITE_MATH_ONLY__) )
return ( std::isfinite( f ) ? f : 0.f );
#else
return f;
Expand Down

0 comments on commit 916e09f

Please sign in to comment.