diff --git a/include/OpenColorIO/OpenColorTypes.h b/include/OpenColorIO/OpenColorTypes.h index c400568eaa..b5729d41a0 100644 --- a/include/OpenColorIO/OpenColorTypes.h +++ b/include/OpenColorIO/OpenColorTypes.h @@ -619,6 +619,13 @@ enum OptimizationFlags : unsigned long */ OPTIMIZATION_NO_DYNAMIC_PROPERTIES = 0x10000000, + /** + * For GPU processor, use native trilinear interpolation for 3D LUTs. This is faster, + * but on many GPUs also lower precision. With low-resolution LUTs, LUTs with large + * extents, or LUTs applied in a linear color space this can sometimes cause color banding. + */ + OPTIMIZATION_NATIVE_GPU_TRILINEAR = 0x20000000, + /// Apply all possible optimizations. OPTIMIZATION_ALL = 0xFFFFFFFF, @@ -645,6 +652,7 @@ enum OptimizationFlags : unsigned long OPTIMIZATION_COMP_LUT1D | OPTIMIZATION_LUT_INV_FAST | OPTIMIZATION_FAST_LOG_EXP_POW | + OPTIMIZATION_NATIVE_GPU_TRILINEAR | OPTIMIZATION_COMP_SEPARABLE_PREFIX), OPTIMIZATION_GOOD = OPTIMIZATION_VERY_GOOD | OPTIMIZATION_COMP_LUT3D, diff --git a/src/OpenColorIO/GPUProcessor.cpp b/src/OpenColorIO/GPUProcessor.cpp index e463903c08..9d86b2b770 100644 --- a/src/OpenColorIO/GPUProcessor.cpp +++ b/src/OpenColorIO/GPUProcessor.cpp @@ -85,6 +85,9 @@ void GPUProcessor::Impl::finalize(const OpRcPtrVec & rawOps, OptimizationFlags o // Is NoOp ? m_isNoOp = m_ops.isNoOp(); + // Store optimization flags for use when generating shader code. + m_oFlags = oFlags; + // Does the color processing introduce crosstalk between the pixel channels? m_hasChannelCrosstalk = m_ops.hasChannelCrosstalk(); @@ -104,7 +107,7 @@ void GPUProcessor::Impl::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCrea // Create the shader program information. for(const auto & op : m_ops) { - op->extractGpuShaderInfo(shaderCreator); + op->extractGpuShaderInfo(shaderCreator, m_oFlags); } WriteShaderHeader(shaderCreator); diff --git a/src/OpenColorIO/GPUProcessor.h b/src/OpenColorIO/GPUProcessor.h index 30039a1cd2..5ea1265730 100644 --- a/src/OpenColorIO/GPUProcessor.h +++ b/src/OpenColorIO/GPUProcessor.h @@ -41,6 +41,7 @@ class GPUProcessor::Impl OpRcPtrVec m_ops; bool m_isNoOp = false; bool m_hasChannelCrosstalk = true; + OptimizationFlags m_oFlags = OPTIMIZATION_DEFAULT; std::string m_cacheID; mutable Mutex m_mutex; }; diff --git a/src/OpenColorIO/Op.h b/src/OpenColorIO/Op.h index d48fe512db..f0c4149f20 100644 --- a/src/OpenColorIO/Op.h +++ b/src/OpenColorIO/Op.h @@ -246,7 +246,7 @@ class Op virtual bool supportedByLegacyShader() const { return true; } // Create & add the gpu shader information needed by the op. Op has to be finalized. - virtual void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const = 0; + virtual void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags oFlags = OPTIMIZATION_DEFAULT) const = 0; virtual bool isDynamic() const; virtual bool hasDynamicProperty(DynamicPropertyType type) const; diff --git a/src/OpenColorIO/ops/cdl/CDLOp.cpp b/src/OpenColorIO/ops/cdl/CDLOp.cpp index bd0511ea43..29d841a4d2 100644 --- a/src/OpenColorIO/ops/cdl/CDLOp.cpp +++ b/src/OpenColorIO/ops/cdl/CDLOp.cpp @@ -50,7 +50,7 @@ class CDLOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; protected: ConstCDLOpDataRcPtr cdlData() const { return DynamicPtrCast(data()); } @@ -133,7 +133,7 @@ ConstOpCPURcPtr CDLOp::getCPUOp(bool fastLogExpPow) const return GetCDLCPURenderer(data, fastLogExpPow); } -void CDLOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void CDLOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { ConstCDLOpDataRcPtr data = cdlData(); GetCDLGPUShaderProgram(shaderCreator, data); diff --git a/src/OpenColorIO/ops/exponent/ExponentOp.cpp b/src/OpenColorIO/ops/exponent/ExponentOp.cpp index 41b95992c4..0e667db23f 100644 --- a/src/OpenColorIO/ops/exponent/ExponentOp.cpp +++ b/src/OpenColorIO/ops/exponent/ExponentOp.cpp @@ -150,7 +150,7 @@ class ExponentOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; protected: ConstExponentOpDataRcPtr expData() const { return DynamicPtrCast(data()); } @@ -251,7 +251,7 @@ ConstOpCPURcPtr ExponentOp::getCPUOp(bool /*fastLogExpPow*/) const return std::make_shared(expData()); } -void ExponentOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void ExponentOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { GpuShaderText ss(shaderCreator->getLanguage()); ss.indent(); diff --git a/src/OpenColorIO/ops/exposurecontrast/ExposureContrastOp.cpp b/src/OpenColorIO/ops/exposurecontrast/ExposureContrastOp.cpp index fc5d1cd4e4..031b96fd02 100644 --- a/src/OpenColorIO/ops/exposurecontrast/ExposureContrastOp.cpp +++ b/src/OpenColorIO/ops/exposurecontrast/ExposureContrastOp.cpp @@ -49,7 +49,7 @@ class ExposureContrastOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; protected: ConstExposureContrastOpDataRcPtr ecData() const @@ -135,7 +135,7 @@ ConstOpCPURcPtr ExposureContrastOp::getCPUOp(bool /*fastLogExpPow*/) const return GetExposureContrastCPURenderer(ecOpData); } -void ExposureContrastOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void ExposureContrastOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { ConstExposureContrastOpDataRcPtr ecOpData = ecData(); GetExposureContrastGPUShaderProgram(shaderCreator, ecOpData); diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOp.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOp.cpp index ac672e1190..f85f4321c2 100644 --- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOp.cpp +++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOp.cpp @@ -45,7 +45,7 @@ class FixedFunctionOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; protected: ConstFixedFunctionOpDataRcPtr fnData() const { return DynamicPtrCast(data()); } @@ -125,7 +125,7 @@ ConstOpCPURcPtr FixedFunctionOp::getCPUOp(bool /*fastLogExpPow*/) const return GetFixedFunctionCPURenderer(data); } -void FixedFunctionOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void FixedFunctionOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { ConstFixedFunctionOpDataRcPtr fnOpData = fnData(); GetFixedFunctionGPUShaderProgram(shaderCreator, fnOpData); diff --git a/src/OpenColorIO/ops/gamma/GammaOp.cpp b/src/OpenColorIO/ops/gamma/GammaOp.cpp index 9d6afccc7d..b1683c0ec7 100644 --- a/src/OpenColorIO/ops/gamma/GammaOp.cpp +++ b/src/OpenColorIO/ops/gamma/GammaOp.cpp @@ -44,7 +44,7 @@ class GammaOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; protected: ConstGammaOpDataRcPtr gammaData() const { return DynamicPtrCast(data()); } @@ -123,7 +123,7 @@ ConstOpCPURcPtr GammaOp::getCPUOp(bool fastLogExpPow) const return GetGammaRenderer(data, fastLogExpPow); } -void GammaOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void GammaOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { ConstGammaOpDataRcPtr data = gammaData(); GetGammaGPUShaderProgram(shaderCreator, data); diff --git a/src/OpenColorIO/ops/gradingprimary/GradingPrimaryOp.cpp b/src/OpenColorIO/ops/gradingprimary/GradingPrimaryOp.cpp index 5bb2f960bf..355c877167 100644 --- a/src/OpenColorIO/ops/gradingprimary/GradingPrimaryOp.cpp +++ b/src/OpenColorIO/ops/gradingprimary/GradingPrimaryOp.cpp @@ -52,7 +52,7 @@ class GradingPrimaryOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; protected: ConstGradingPrimaryOpDataRcPtr primaryData() const @@ -190,7 +190,7 @@ ConstOpCPURcPtr GradingPrimaryOp::getCPUOp(bool /*fastLogExpPow*/) const return GetGradingPrimaryCPURenderer(data); } -void GradingPrimaryOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void GradingPrimaryOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { ConstGradingPrimaryOpDataRcPtr data = primaryData(); GetGradingPrimaryGPUShaderProgram(shaderCreator, data); diff --git a/src/OpenColorIO/ops/gradingrgbcurve/GradingRGBCurveOp.cpp b/src/OpenColorIO/ops/gradingrgbcurve/GradingRGBCurveOp.cpp index 598ae28e67..69cfc37cec 100644 --- a/src/OpenColorIO/ops/gradingrgbcurve/GradingRGBCurveOp.cpp +++ b/src/OpenColorIO/ops/gradingrgbcurve/GradingRGBCurveOp.cpp @@ -52,7 +52,7 @@ class GradingRGBCurveOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; protected: ConstGradingRGBCurveOpDataRcPtr rgbCurveData() const @@ -190,7 +190,7 @@ ConstOpCPURcPtr GradingRGBCurveOp::getCPUOp(bool /*fastLogExpPow*/) const return GetGradingRGBCurveCPURenderer(data); } -void GradingRGBCurveOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void GradingRGBCurveOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { ConstGradingRGBCurveOpDataRcPtr data = rgbCurveData(); GetGradingRGBCurveGPUShaderProgram(shaderCreator, data); diff --git a/src/OpenColorIO/ops/gradingtone/GradingToneOp.cpp b/src/OpenColorIO/ops/gradingtone/GradingToneOp.cpp index 5968210e71..aeb24aa131 100644 --- a/src/OpenColorIO/ops/gradingtone/GradingToneOp.cpp +++ b/src/OpenColorIO/ops/gradingtone/GradingToneOp.cpp @@ -52,7 +52,7 @@ class GradingToneOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; protected: ConstGradingToneOpDataRcPtr toneData() const @@ -184,7 +184,7 @@ ConstOpCPURcPtr GradingToneOp::getCPUOp(bool /*fastLogExpPow*/) const return GetGradingToneCPURenderer(data); } -void GradingToneOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void GradingToneOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { ConstGradingToneOpDataRcPtr data = toneData(); GetGradingToneGPUShaderProgram(shaderCreator, data); diff --git a/src/OpenColorIO/ops/log/LogOp.cpp b/src/OpenColorIO/ops/log/LogOp.cpp index 8eeff9b3b2..7a1fc66221 100644 --- a/src/OpenColorIO/ops/log/LogOp.cpp +++ b/src/OpenColorIO/ops/log/LogOp.cpp @@ -44,7 +44,7 @@ class LogOp: public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; protected: ConstLogOpDataRcPtr logData() const { return DynamicPtrCast(data()); } @@ -110,7 +110,7 @@ ConstOpCPURcPtr LogOp::getCPUOp(bool fastLogExpPow) const return GetLogRenderer(data, fastLogExpPow); } -void LogOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void LogOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { ConstLogOpDataRcPtr data = logData(); GetLogGPUShaderProgram(shaderCreator, data); diff --git a/src/OpenColorIO/ops/lut1d/Lut1DOp.cpp b/src/OpenColorIO/ops/lut1d/Lut1DOp.cpp index 412462414b..1fbfddfaa1 100644 --- a/src/OpenColorIO/ops/lut1d/Lut1DOp.cpp +++ b/src/OpenColorIO/ops/lut1d/Lut1DOp.cpp @@ -52,7 +52,7 @@ class Lut1DOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; bool supportedByLegacyShader() const override { return false; } - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; ConstLut1DOpDataRcPtr lut1DData() const { return DynamicPtrCast(data()); } Lut1DOpDataRcPtr lut1DData() { return DynamicPtrCast(data()); } @@ -154,7 +154,7 @@ ConstOpCPURcPtr Lut1DOp::getCPUOp(bool /*fastLogExpPow*/) const return GetLut1DRenderer(data, BIT_DEPTH_F32, BIT_DEPTH_F32); } -void Lut1DOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void Lut1DOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { ConstLut1DOpDataRcPtr lutData = lut1DData(); if (lutData->getDirection() == TRANSFORM_DIR_INVERSE) diff --git a/src/OpenColorIO/ops/lut3d/Lut3DOp.cpp b/src/OpenColorIO/ops/lut3d/Lut3DOp.cpp index 4f0de76f4d..7ddfeb334f 100644 --- a/src/OpenColorIO/ops/lut3d/Lut3DOp.cpp +++ b/src/OpenColorIO/ops/lut3d/Lut3DOp.cpp @@ -98,7 +98,7 @@ class Lut3DOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; bool supportedByLegacyShader() const override { return false; } - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags oFlags) const override; protected: ConstLut3DOpDataRcPtr lut3DData() const @@ -200,7 +200,7 @@ ConstOpCPURcPtr Lut3DOp::getCPUOp(bool /*fastLogExpPow*/) const return GetLut3DRenderer(data); } -void Lut3DOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void Lut3DOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags oFlags) const { ConstLut3DOpDataRcPtr lutData = lut3DData(); if (lutData->getDirection() == TRANSFORM_DIR_INVERSE) @@ -216,7 +216,7 @@ void Lut3DOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const lutData = tmp; } - GetLut3DGPUShaderProgram(shaderCreator, lutData); + GetLut3DGPUShaderProgram(shaderCreator, lutData, oFlags); } } diff --git a/src/OpenColorIO/ops/lut3d/Lut3DOpGPU.cpp b/src/OpenColorIO/ops/lut3d/Lut3DOpGPU.cpp index 786f52961c..5ef8c8c606 100644 --- a/src/OpenColorIO/ops/lut3d/Lut3DOpGPU.cpp +++ b/src/OpenColorIO/ops/lut3d/Lut3DOpGPU.cpp @@ -14,7 +14,7 @@ namespace OCIO_NAMESPACE { -void GetLut3DGPUShaderProgram(GpuShaderCreatorRcPtr & shaderCreator, ConstLut3DOpDataRcPtr & lutData) +void GetLut3DGPUShaderProgram(GpuShaderCreatorRcPtr & shaderCreator, ConstLut3DOpDataRcPtr & lutData, OptimizationFlags oFlags) { if (shaderCreator->getLanguage() == LANGUAGE_OSL_1) @@ -32,9 +32,11 @@ void GetLut3DGPUShaderProgram(GpuShaderCreatorRcPtr & shaderCreator, ConstLut3DO std::string name(resName.str()); StringUtils::ReplaceInPlace(name, "__", "_"); + const bool use_high_precision = !HasFlag(oFlags, OPTIMIZATION_NATIVE_GPU_TRILINEAR); Interpolation samplerInterpolation = lutData->getConcreteInterpolation(); - // Enforce GL_NEAREST with shader-generated tetrahedral interpolation. - if (samplerInterpolation == INTERP_TETRAHEDRAL) + // Enforce GL_NEAREST with shader-generated tetrahedral interpolation + // or hand-rolled trilinear interpolation. + if (samplerInterpolation == INTERP_TETRAHEDRAL || use_high_precision) { samplerInterpolation = INTERP_NEAREST; } @@ -225,18 +227,72 @@ void GetLut3DGPUShaderProgram(GpuShaderCreatorRcPtr & shaderCreator, ConstLut3DO else { // Trilinear interpolation - // Use texture3d and GL_LINEAR and the GPU's built-in trilinear algorithm. - // Note that the fractional components are quantized to 8-bits on some - // hardware, which introduces significant error with small grid sizes. - - ss.newLine() << ss.float3Decl(name + "_coords") - << " = (" << shaderCreator->getPixelName() << ".zyx * " - << ss.float3Const(dim - 1) << " + " - << ss.float3Const(0.5f) + ") / " - << ss.float3Const(dim) << ";"; - - ss.newLine() << shaderCreator->getPixelName() << ".rgb = " - << ss.sampleTex3D(name, name + "_coords") << ".rgb;"; + if (use_high_precision) + { + // Use GL_NEAREST and do interpolation by hand to avoid the precision + // issues of native trilinear interpolation on many popular GPUs. + + ss.newLine() << ss.float3Decl("coords") << " = " + << shaderCreator->getPixelName() << ".rgb * " + << ss.float3Const(dim - 1) << "; "; + + // baseInd is on [0,dim-1] + ss.newLine() << ss.float3Decl("baseInd") << " = floor(coords);"; + + // frac is on [0,1] + ss.newLine() << ss.float3Decl("frac") << " = coords - baseInd;"; + + // Scale/offset baseInd onto [0,1] as usual for doing texture lookups. + // We use zyx to flip the order since blue varies most rapidly + // in the grid array ordering. + ss.newLine() << "baseInd = ( baseInd.zyx + " << ss.float3Const(0.5f) << " ) / " << ss.float3Const(dim) << ";"; + + // Fetch the 8 corners of the 3D texture cell. + ss.newLine() << ss.float3Decl("nextInd") << " = baseInd;"; + ss.newLine() << ss.float3Decl("v1") << " = " << ss.sampleTex3D(name, "nextInd") << ".rgb;"; + ss.newLine() << "nextInd = baseInd + " << ss.float3Const(incr, 0.0f, 0.0f) << ";"; + ss.newLine() << ss.float3Decl("v2") << " = " << ss.sampleTex3D(name, "nextInd") << ".rgb;"; + ss.newLine() << "nextInd = baseInd + " << ss.float3Const(0.0f, incr, 0.0f) << ";"; + ss.newLine() << ss.float3Decl("v3") << " = " << ss.sampleTex3D(name, "nextInd") << ".rgb;"; + ss.newLine() << "nextInd = baseInd + " << ss.float3Const(incr, incr, 0.0f) << ";"; + ss.newLine() << ss.float3Decl("v4") << " = " << ss.sampleTex3D(name, "nextInd") << ".rgb;"; + ss.newLine() << "nextInd = baseInd + " << ss.float3Const(0.0f, 0.0f, incr) << ";"; + ss.newLine() << ss.float3Decl("v5") << " = " << ss.sampleTex3D(name, "nextInd") << ".rgb;"; + ss.newLine() << "nextInd = baseInd + " << ss.float3Const(incr, 0.0f, incr) << ";"; + ss.newLine() << ss.float3Decl("v6") << " = " << ss.sampleTex3D(name, "nextInd") << ".rgb;"; + ss.newLine() << "nextInd = baseInd + " << ss.float3Const(0.0f, incr, incr) << ";"; + ss.newLine() << ss.float3Decl("v7") << " = " << ss.sampleTex3D(name, "nextInd") << ".rgb;"; + ss.newLine() << "nextInd = baseInd + " << ss.float3Const(incr, incr, incr) << ";"; + ss.newLine() << ss.float3Decl("v8") << " = " << ss.sampleTex3D(name, "nextInd") << ".rgb;"; + + // Lerp on Z. + ss.newLine() << ss.float3Decl("v1_2") << " = " << ss.lerp("v1", "v2", "frac.z") << ";"; + ss.newLine() << ss.float3Decl("v3_4") << " = " << ss.lerp("v3", "v4", "frac.z") << ";"; + ss.newLine() << ss.float3Decl("v5_6") << " = " << ss.lerp("v5", "v6", "frac.z") << ";"; + ss.newLine() << ss.float3Decl("v7_8") << " = " << ss.lerp("v7", "v8", "frac.z") << ";"; + + // Lerp on Y. + ss.newLine() << ss.float3Decl("v1_2_3_4") << " = " << ss.lerp("v1_2", "v3_4", "frac.y") << ";"; + ss.newLine() << ss.float3Decl("v5_6_7_8") << " = " << ss.lerp("v5_6", "v7_8", "frac.y") << ";"; + + // Lerp on X. + ss.newLine() << shaderCreator->getPixelName() << ".rgb = " << ss.lerp("v1_2_3_4", "v5_6_7_8", "frac.x") << ";"; + } + else + { + // Use texture3d and GL_LINEAR and the GPU's built-in trilinear algorithm. + // Note that the fractional components are quantized to 8-bits on some + // hardware, which introduces significant error with small grid sizes. + + ss.newLine() << ss.float3Decl(name + "_coords") + << " = (" << shaderCreator->getPixelName() << ".zyx * " + << ss.float3Const(dim - 1) << " + " + << ss.float3Const(0.5f) + ") / " + << ss.float3Const(dim) << ";"; + + ss.newLine() << shaderCreator->getPixelName() << ".rgb = " + << ss.sampleTex3D(name, name + "_coords") << ".rgb;"; + } } shaderCreator->addToFunctionShaderCode(ss.string().c_str()); diff --git a/src/OpenColorIO/ops/lut3d/Lut3DOpGPU.h b/src/OpenColorIO/ops/lut3d/Lut3DOpGPU.h index d34996f65f..68fc67aa55 100644 --- a/src/OpenColorIO/ops/lut3d/Lut3DOpGPU.h +++ b/src/OpenColorIO/ops/lut3d/Lut3DOpGPU.h @@ -12,7 +12,7 @@ namespace OCIO_NAMESPACE { -void GetLut3DGPUShaderProgram(GpuShaderCreatorRcPtr & shaderCreator, ConstLut3DOpDataRcPtr & lutData); +void GetLut3DGPUShaderProgram(GpuShaderCreatorRcPtr & shaderCreator, ConstLut3DOpDataRcPtr & lutData, OptimizationFlags oFlags); } // namespace OCIO_NAMESPACE diff --git a/src/OpenColorIO/ops/matrix/MatrixOp.cpp b/src/OpenColorIO/ops/matrix/MatrixOp.cpp index 93505a284e..f4fc9b37c4 100644 --- a/src/OpenColorIO/ops/matrix/MatrixOp.cpp +++ b/src/OpenColorIO/ops/matrix/MatrixOp.cpp @@ -62,7 +62,7 @@ class MatrixOffsetOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; protected: ConstMatrixOpDataRcPtr matrixData() const { return DynamicPtrCast(data()); } @@ -187,7 +187,7 @@ ConstOpCPURcPtr MatrixOffsetOp::getCPUOp(bool /*fastLogExpPow*/) const return GetMatrixRenderer(data); } -void MatrixOffsetOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void MatrixOffsetOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { ConstMatrixOpDataRcPtr data = matrixData(); if (data->getDirection() == TRANSFORM_DIR_INVERSE) diff --git a/src/OpenColorIO/ops/noop/NoOps.cpp b/src/OpenColorIO/ops/noop/NoOps.cpp index cf71b1fde5..326969cb52 100644 --- a/src/OpenColorIO/ops/noop/NoOps.cpp +++ b/src/OpenColorIO/ops/noop/NoOps.cpp @@ -51,7 +51,7 @@ class AllocationNoOp : public Op void apply(const void * inImg, void * outImg, long numPixels) const override { memcpy(outImg, inImg, numPixels * 4 * sizeof(float)); } - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & /*shaderCreator*/) const override {} + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & /*shaderCreator*/, OptimizationFlags /*oFlags*/) const override {} void getGpuAllocation(AllocationData & allocation) const; @@ -322,7 +322,7 @@ class FileNoOp : public Op void apply(const void * inImg, void * outImg, long numPixels) const override { memcpy(outImg, inImg, numPixels * 4 * sizeof(float)); } - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & /*shaderCreator*/) const override {} + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & /*shaderCreator*/, OptimizationFlags /*oFlags*/) const override {} private: std::string m_fileReference; @@ -408,7 +408,7 @@ class LookNoOp : public Op void apply(const void * inImg, void * outImg, long numPixels) const override { memcpy(outImg, inImg, numPixels * 4 * sizeof(float)); } - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & /*shaderCreator*/) const override {} + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & /*shaderCreator*/, OptimizationFlags /*oFlags*/) const override {} private: std::string m_look; diff --git a/src/OpenColorIO/ops/range/RangeOp.cpp b/src/OpenColorIO/ops/range/RangeOp.cpp index f708908fbd..ac3321ec03 100644 --- a/src/OpenColorIO/ops/range/RangeOp.cpp +++ b/src/OpenColorIO/ops/range/RangeOp.cpp @@ -51,7 +51,7 @@ class RangeOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; protected: ConstRangeOpDataRcPtr rangeData() const { return DynamicPtrCast(data()); } @@ -199,7 +199,7 @@ ConstOpCPURcPtr RangeOp::getCPUOp(bool /*fastLogExpPow*/) const return GetRangeRenderer(data); } -void RangeOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void RangeOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { ConstRangeOpDataRcPtr data = rangeData(); if (data->getDirection() == TRANSFORM_DIR_INVERSE)