diff --git a/media_common/agnostic/common/vp/hal/vp_common_defs.h b/media_common/agnostic/common/vp/hal/vp_common_defs.h index a3f911a42b..b2ec06a24b 100644 --- a/media_common/agnostic/common/vp/hal/vp_common_defs.h +++ b/media_common/agnostic/common/vp/hal/vp_common_defs.h @@ -193,6 +193,7 @@ enum VpKernelIDNext { vpKernelIDNextBase = 0x200, kernelHdr3DLutCalc = vpKernelIDNextBase, + kernelHdr3DLutCalcL0, kernelHVSCalc, vpKernelIDNextMax }; diff --git a/media_softlet/agnostic/common/vp/hal/feature_manager/sw_filter.cpp b/media_softlet/agnostic/common/vp/hal/feature_manager/sw_filter.cpp index c7291247a3..086a235c42 100644 --- a/media_softlet/agnostic/common/vp/hal/feature_manager/sw_filter.cpp +++ b/media_softlet/agnostic/common/vp/hal/feature_manager/sw_filter.cpp @@ -1298,6 +1298,18 @@ MOS_STATUS SwFilterHdr::Configure(VP_PIPELINE_PARAMS ¶ms, bool isInputSurf, uint32_t dwUpdateMask = 0; bool bSupported = false; VPHAL_HDR_LUT_MODE CurrentLUTMode = VPHAL_HDR_LUT_MODE_NONE; + VP_PUBLIC_CHK_NULL_RETURN(m_vpInterface.GetHwInterface()); + VP_PUBLIC_CHK_NULL_RETURN(m_vpInterface.GetHwInterface()->m_userFeatureControl); + VP_PUBLIC_CHK_NULL_RETURN(m_vpInterface.GetHwInterface()->m_vpPlatformInterface); + VP_PUBLIC_CHK_NULL_RETURN(m_vpInterface.GetHwInterface()->m_reporting); + + auto userFeatureControl = m_vpInterface.GetHwInterface()->m_userFeatureControl; + auto vpPlatformInterface = m_vpInterface.GetHwInterface()->m_vpPlatformInterface; + VpFeatureReport *vpFeatureReport = dynamic_cast(m_vpInterface.GetHwInterface()->m_reporting); +#if (_DEBUG || _RELEASE_INTERNAL) + m_Params.isL0KernelEnabled = (vpPlatformInterface->IsAdvanceNativeKernelSupported() && userFeatureControl->EnableL03DLut()); + vpFeatureReport->GetFeatures().isL03DLut = m_Params.isL0KernelEnabled; +#endif VP_PUBLIC_CHK_NULL_RETURN(surfInput); VP_PUBLIC_CHK_NULL_RETURN(surfOutput); diff --git a/media_softlet/agnostic/common/vp/hal/feature_manager/sw_filter.h b/media_softlet/agnostic/common/vp/hal/feature_manager/sw_filter.h index 15df9db664..f300c0c696 100644 --- a/media_softlet/agnostic/common/vp/hal/feature_manager/sw_filter.h +++ b/media_softlet/agnostic/common/vp/hal/feature_manager/sw_filter.h @@ -882,6 +882,7 @@ struct FeatureParamHdr : public FeatureParam uint32_t uiSplitFramePortions = 1; //!< Split Frame flag bool bForceSplitFrame = false; bool bNeed3DSampler = false; //!< indicate whether 3D should neede by force considering AVS removal etc. + bool isL0KernelEnabled = false; HDR_PARAMS srcHDRParams = {}; HDR_PARAMS targetHDRParams = {}; diff --git a/media_softlet/agnostic/common/vp/hal/feature_manager/vp_kernelset.cpp b/media_softlet/agnostic/common/vp/hal/feature_manager/vp_kernelset.cpp index 7113bb69f6..371162ef44 100644 --- a/media_softlet/agnostic/common/vp/hal/feature_manager/vp_kernelset.cpp +++ b/media_softlet/agnostic/common/vp/hal/feature_manager/vp_kernelset.cpp @@ -31,6 +31,7 @@ #include "vp_render_vebox_hvs_kernel.h" #include "vp_render_hdr_kernel.h" #include "vp_render_l0_fc_kernel.h" +#include "vp_render_vebox_hdr_3dlut_l0_kernel.h" using namespace vp; @@ -139,6 +140,11 @@ MOS_STATUS VpKernelSet::CreateSingleKernelObject( } VP_RENDER_CHK_NULL_RETURN(kernel); break; + case kernelHdr3DLutCalcL0: + VP_RENDER_NORMALMESSAGE("HDR 3dlut kernel use l0 fillLutTable_3dlut kernel"); + kernel = (VpRenderKernelObj *)MOS_New(VpRenderHdr3DLutL0Kernel, m_hwInterface, m_allocator); + VP_RENDER_CHK_NULL_RETURN(kernel); + break; case kernelHVSCalc: kernel = (VpRenderKernelObj *)MOS_New(VpRenderHVSKernel, m_hwInterface, kernelId, kernelIndex, m_allocator); VP_RENDER_CHK_NULL_RETURN(kernel); diff --git a/media_softlet/agnostic/common/vp/hal/features/kernel_args/igvp3dlut_args.h b/media_softlet/agnostic/common/vp/hal/features/kernel_args/igvp3dlut_args.h new file mode 100644 index 0000000000..b50e0fb0ea --- /dev/null +++ b/media_softlet/agnostic/common/vp/hal/features/kernel_args/igvp3dlut_args.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2024, Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * 'Software'), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +//////////////////////////////////////////////////////////////////////////////// +// !!! WARNING - AUTO GENERATED FILE. DO NOT EDIT DIRECTLY. !!! +// Generated by KernelBinToSource.exe tool +//////////////////////////////////////////////////////////////////////////////// + +#ifndef __IGVP3DLUT_ENUM_H__ +#define __IGVP3DLUT_ENUM_H__ + +enum ARGS_3DLUT_FILLLUTTABLE +{ + LUT_FILLLUTTABLE_IOLUTINDEX, + LUT_FILLLUTTABLE_ICOEFINDEX, + LUT_FILLLUTTABLE_LUTSIZE, + LUT_FILLLUTTABLE_MULSIZE, + LUT_FILLLUTTABLE_PRIVATE_BASE_STATELESS, + LUT_FILLLUTTABLE_GLOBAL_ID_OFFSET, + LUT_FILLLUTTABLE_LOCAL_SIZE, + LUT_FILLLUTTABLE_MAX +}; + +#endif // __IGVP3DLUT_ENUM_H__ diff --git a/media_softlet/agnostic/common/vp/hal/features/kernel_args/media_srcs.cmake b/media_softlet/agnostic/common/vp/hal/features/kernel_args/media_srcs.cmake index fe066a28aa..91f65552df 100644 --- a/media_softlet/agnostic/common/vp/hal/features/kernel_args/media_srcs.cmake +++ b/media_softlet/agnostic/common/vp/hal/features/kernel_args/media_srcs.cmake @@ -24,6 +24,7 @@ set(TMP_SOURCES_ set(TMP_HEADERS_ ${CMAKE_CURRENT_LIST_DIR}/igvpfc_scale_args.h + ${CMAKE_CURRENT_LIST_DIR}/igvp3dlut_args.h ) set(SOFTLET_VP_SOURCES_ diff --git a/media_softlet/agnostic/common/vp/hal/features/vp_filter.h b/media_softlet/agnostic/common/vp/hal/features/vp_filter.h index 3eff1e944f..7f2dccef84 100644 --- a/media_softlet/agnostic/common/vp/hal/features/vp_filter.h +++ b/media_softlet/agnostic/common/vp/hal/features/vp_filter.h @@ -357,6 +357,8 @@ struct _RENDER_HDR_3DLUT_CAL_PARAMS VpKernelID kernelId; uint32_t threadWidth; uint32_t threadHeight; + uint32_t localWidth; + uint32_t localHeight; KERNEL_ARGS kernelArgs; void Init(); }; diff --git a/media_softlet/agnostic/common/vp/hal/features/vp_hdr_filter.cpp b/media_softlet/agnostic/common/vp/hal/features/vp_hdr_filter.cpp index 548833acfa..69a7b3098d 100644 --- a/media_softlet/agnostic/common/vp/hal/features/vp_hdr_filter.cpp +++ b/media_softlet/agnostic/common/vp/hal/features/vp_hdr_filter.cpp @@ -28,6 +28,7 @@ #include "hw_filter.h" #include "sw_filter_pipe.h" #include "vp_render_cmd_packet.h" +#include "igvp3dlut_args.h" namespace vp { @@ -53,7 +54,11 @@ MOS_STATUS VpHdrFilter::Prepare() MOS_STATUS VpHdrFilter::Destroy() { VP_FUNC_CALL(); - + for (auto &handle : m_renderHdr3DLutL0Params) + { + KRN_ARG &krnArg = handle.second; + MOS_FreeMemAndSetNull(krnArg.pData); + } return MOS_STATUS_SUCCESS; } @@ -107,35 +112,116 @@ MOS_STATUS VpHdrFilter::CalculateEngineParams( m_renderHdr3DLutParams.maxDisplayLum = hdrParams.uiMaxDisplayLum; m_renderHdr3DLutParams.maxContentLevelLum = hdrParams.uiMaxContentLevelLum; m_renderHdr3DLutParams.hdrMode = hdrParams.hdrMode; - m_renderHdr3DLutParams.kernelId = (VpKernelID)kernelHdr3DLutCalc; m_renderHdr3DLutParams.threadWidth = hdrParams.lutSize; m_renderHdr3DLutParams.threadHeight = hdrParams.lutSize; - KRN_ARG krnArg = {}; - krnArg.uIndex = 0; - krnArg.eArgKind = ARG_KIND_SURFACE; - krnArg.uSize = 4; - krnArg.pData = &m_surfType3DLut; - m_renderHdr3DLutParams.kernelArgs.push_back(krnArg); - - krnArg.uIndex = 1; - krnArg.eArgKind = ARG_KIND_SURFACE; - krnArg.uSize = 4; - krnArg.pData = &m_surfType3DLutCoef; - m_renderHdr3DLutParams.kernelArgs.push_back(krnArg); - - krnArg.uIndex = 2; - krnArg.eArgKind = ARG_KIND_GENERAL; - krnArg.uSize = 2; - krnArg.pData = &m_3DLutSurfaceWidth; - m_renderHdr3DLutParams.kernelArgs.push_back(krnArg); - - krnArg.uIndex = 3; - krnArg.eArgKind = ARG_KIND_GENERAL; - krnArg.uSize = 2; - krnArg.pData = &m_3DLutSurfaceHeight; - m_renderHdr3DLutParams.kernelArgs.push_back(krnArg); + if (hdrParams.isL0KernelEnabled == false) + { + KRN_ARG krnArg = {}; + krnArg.uIndex = 0; + krnArg.eArgKind = ARG_KIND_SURFACE; + krnArg.uSize = 4; + krnArg.pData = &m_surfType3DLut; + m_renderHdr3DLutParams.kernelArgs.push_back(krnArg); + + krnArg.uIndex = 1; + krnArg.eArgKind = ARG_KIND_SURFACE; + krnArg.uSize = 4; + krnArg.pData = &m_surfType3DLutCoef; + m_renderHdr3DLutParams.kernelArgs.push_back(krnArg); + + krnArg.uIndex = 2; + krnArg.eArgKind = ARG_KIND_GENERAL; + krnArg.uSize = 2; + krnArg.pData = &m_3DLutSurfaceWidth; + m_renderHdr3DLutParams.kernelArgs.push_back(krnArg); + + krnArg.uIndex = 3; + krnArg.eArgKind = ARG_KIND_GENERAL; + krnArg.uSize = 2; + krnArg.pData = &m_3DLutSurfaceHeight; + m_renderHdr3DLutParams.kernelId = (VpKernelID)kernelHdr3DLutCalc; + m_renderHdr3DLutParams.kernelArgs.push_back(krnArg); + } + else + { + VP_PUBLIC_CHK_NULL_RETURN(m_pvpMhwInterface); + VP_PUBLIC_CHK_NULL_RETURN(m_pvpMhwInterface->m_vpPlatformInterface); + + auto handle = m_pvpMhwInterface->m_vpPlatformInterface->GetKernelPool().find("fillLutTable_3dlut"); + VP_PUBLIC_CHK_NOT_FOUND_RETURN(handle, &m_pvpMhwInterface->m_vpPlatformInterface->GetKernelPool()); + KERNEL_ARGS kernelArgs = handle->second.GetKernelArgs(); + uint32_t localWidth = 128; + uint32_t localHeight = 1; + uint32_t localDepth = 1; + m_renderHdr3DLutParams.localWidth = localWidth; + m_renderHdr3DLutParams.localHeight = localHeight; + m_renderHdr3DLutParams.kernelId = (VpKernelID)kernelHdr3DLutCalcL0; + + //step 1: setting curbe arguments + for (auto const &kernelArg : kernelArgs) + { + uint32_t uIndex = kernelArg.uIndex; + auto argHandle = m_renderHdr3DLutL0Params.find(uIndex); + if (argHandle == m_renderHdr3DLutL0Params.end()) + { + KRN_ARG krnArg = {}; + argHandle = m_renderHdr3DLutL0Params.insert(std::make_pair(uIndex, krnArg)).first; + VP_PUBLIC_CHK_NOT_FOUND_RETURN(argHandle, &m_renderHdr3DLutL0Params); + } + KRN_ARG &krnArg = argHandle->second; + krnArg.uIndex = uIndex; + bool bInit = true; + if (krnArg.pData == nullptr) + { + if (kernelArg.uSize > 0) + { + krnArg.uSize = kernelArg.uSize; + krnArg.pData = MOS_AllocAndZeroMemory(kernelArg.uSize); + } + } + else + { + VP_PUBLIC_CHK_VALUE_RETURN(krnArg.uSize, kernelArg.uSize); + MOS_ZeroMemory(krnArg.pData, krnArg.uSize); + } + uint16_t mulSize = hdrParams.lutSize == 65 ? 128 : 64; + krnArg.eArgKind = kernelArg.eArgKind; + switch (krnArg.uIndex) + { + case LUT_FILLLUTTABLE_IOLUTINDEX: + VP_PUBLIC_CHK_NULL_RETURN(krnArg.pData); + *(uint32_t *)krnArg.pData = SurfaceType3DLut; + break; + case LUT_FILLLUTTABLE_ICOEFINDEX: + VP_PUBLIC_CHK_NULL_RETURN(krnArg.pData); + *(uint32_t *)krnArg.pData = SurfaceType3DLutCoef; + break; + case LUT_FILLLUTTABLE_LUTSIZE: + VP_PUBLIC_CHK_NULL_RETURN(krnArg.pData); + MOS_SecureMemcpy(krnArg.pData, kernelArg.uSize, &hdrParams.lutSize, sizeof(uint16_t)); + break; + case LUT_FILLLUTTABLE_MULSIZE: + VP_PUBLIC_CHK_NULL_RETURN(krnArg.pData); + MOS_SecureMemcpy(krnArg.pData, kernelArg.uSize, &mulSize, sizeof(uint16_t)); + break; + case LUT_FILLLUTTABLE_LOCAL_SIZE: + VP_PUBLIC_CHK_NULL_RETURN(krnArg.pData); + static_cast(krnArg.pData)[0] = localWidth; + static_cast(krnArg.pData)[1] = localHeight; + static_cast(krnArg.pData)[2] = localDepth; + break; + default: + bInit = false; + break; + } + if (bInit) + { + m_renderHdr3DLutParams.kernelArgs.push_back(krnArg); + } + } + } } else { diff --git a/media_softlet/agnostic/common/vp/hal/features/vp_hdr_filter.h b/media_softlet/agnostic/common/vp/hal/features/vp_hdr_filter.h index 8948cb9ecd..73df414068 100644 --- a/media_softlet/agnostic/common/vp/hal/features/vp_hdr_filter.h +++ b/media_softlet/agnostic/common/vp/hal/features/vp_hdr_filter.h @@ -243,6 +243,7 @@ class VpHdrFilter : public VpFilter VEBOX_HDR_PARAMS m_veboxHdrParams = {}; RENDER_HDR_3DLUT_CAL_PARAMS m_renderHdr3DLutParams = {}; RENDER_HDR_PARAMS m_renderHdrParams = {}; + KERNEL_INDEX_ARG_MAP m_renderHdr3DLutL0Params = {}; SwFilterPipe *m_executedPipe = nullptr; SurfaceType m_surfType3DLut = SurfaceType3DLut; diff --git a/media_softlet/agnostic/common/vp/hal/packet/media_srcs.cmake b/media_softlet/agnostic/common/vp/hal/packet/media_srcs.cmake index 4d86b38456..b9f04e8de1 100644 --- a/media_softlet/agnostic/common/vp/hal/packet/media_srcs.cmake +++ b/media_softlet/agnostic/common/vp/hal/packet/media_srcs.cmake @@ -33,6 +33,7 @@ set(TMP_SOURCES_ ${CMAKE_CURRENT_LIST_DIR}/vp_render_vebox_hvs_kernel.cpp ${CMAKE_CURRENT_LIST_DIR}/vp_render_hdr_kernel.cpp ${CMAKE_CURRENT_LIST_DIR}/vp_render_l0_fc_kernel.cpp + ${CMAKE_CURRENT_LIST_DIR}/vp_render_vebox_hdr_3dlut_l0_kernel.cpp ) set(TMP_HEADERS_ @@ -55,6 +56,7 @@ set(TMP_HEADERS_ ${CMAKE_CURRENT_LIST_DIR}/vp_render_vebox_hvs_kernel.h ${CMAKE_CURRENT_LIST_DIR}/vp_render_hdr_kernel.h ${CMAKE_CURRENT_LIST_DIR}/vp_render_l0_fc_kernel.h + ${CMAKE_CURRENT_LIST_DIR}/vp_render_vebox_hdr_3dlut_l0_kernel.h ) set(SOFTLET_VP_SOURCES_ diff --git a/media_softlet/agnostic/common/vp/hal/packet/vp_render_cmd_packet.cpp b/media_softlet/agnostic/common/vp/hal/packet/vp_render_cmd_packet.cpp index 985270f2c7..5f93e4599d 100644 --- a/media_softlet/agnostic/common/vp/hal/packet/vp_render_cmd_packet.cpp +++ b/media_softlet/agnostic/common/vp/hal/packet/vp_render_cmd_packet.cpp @@ -829,6 +829,12 @@ MOS_STATUS VpRenderCmdPacket::SetupSurfaceState() } VP_RENDER_CHK_STATUS_RETURN(m_kernel->UpdateCompParams()); } + else + { + // Reset status + m_renderHal->bCmfcCoeffUpdate = false; + m_renderHal->pCmfcCoeffSurface = nullptr; + } return MOS_STATUS_SUCCESS; } @@ -2116,6 +2122,8 @@ MOS_STATUS VpRenderCmdPacket::SetHdr3DLutParams( // kernel.GetKernelArgs(). kernelParams.kernelThreadSpace.uWidth = params->threadWidth; kernelParams.kernelThreadSpace.uHeight = params->threadHeight; + kernelParams.kernelThreadSpace.uLocalWidth = params->localWidth; + kernelParams.kernelThreadSpace.uLocalHeight = params->localHeight; kernelParams.kernelArgs = params->kernelArgs; kernelParams.syncFlag = true; m_renderKernelParams.push_back(kernelParams); diff --git a/media_softlet/agnostic/common/vp/hal/packet/vp_render_common.h b/media_softlet/agnostic/common/vp/hal/packet/vp_render_common.h index d1159b478e..24a89aef3c 100644 --- a/media_softlet/agnostic/common/vp/hal/packet/vp_render_common.h +++ b/media_softlet/agnostic/common/vp/hal/packet/vp_render_common.h @@ -72,6 +72,14 @@ enum KRN_ARG_KIND ARG_KIND_INLINE = 0xa00 }; +enum KRN_ARG_ADDRESSMODE +{ + AddressingModeStateful = 0, + AddressingModeStateless, + AddressingModeBindless, + AddressIngModeMax +}; + struct KRN_ARG { uint32_t uIndex; @@ -80,6 +88,7 @@ struct KRN_ARG uint32_t uSize; // size of arg in byte KRN_ARG_KIND eArgKind; bool isOutput; + KRN_ARG_ADDRESSMODE addressMode; }; //for L0 use only diff --git a/media_softlet/agnostic/common/vp/hal/packet/vp_render_kernel_obj.cpp b/media_softlet/agnostic/common/vp/hal/packet/vp_render_kernel_obj.cpp index 4617c6fc17..0b41bd2a67 100644 --- a/media_softlet/agnostic/common/vp/hal/packet/vp_render_kernel_obj.cpp +++ b/media_softlet/agnostic/common/vp/hal/packet/vp_render_kernel_obj.cpp @@ -222,11 +222,43 @@ MOS_STATUS VpRenderKernelObj::CpPrepareResources() return MOS_STATUS_SUCCESS; } +MOS_STATUS VpRenderKernelObj::SetupStatelessBuffer() +{ + m_statelessArray.clear(); + VP_RENDER_NORMALMESSAGE("Not prepare stateless buffer in kernel %d.", m_kernelId); + return MOS_STATUS_SUCCESS; +} + +MOS_STATUS VpRenderKernelObj::SetupStatelessBufferResource(SurfaceType surf) +{ + VP_RENDER_CHK_NULL_RETURN(m_surfaceGroup); + VP_RENDER_CHK_NULL_RETURN(m_hwInterface); + if (surf != SurfaceTypeInvalid) + { + PMOS_INTERFACE osInterface = m_hwInterface->m_osInterface; + VP_RENDER_CHK_NULL_RETURN(osInterface); + auto it = m_surfaceGroup->find(surf); + VP_SURFACE *curSurf = (m_surfaceGroup->end() != it) ? it->second : nullptr; + VP_RENDER_CHK_NULL_RETURN(curSurf); + uint64_t ui64GfxAddress = osInterface->pfnGetResourceGfxAddress(osInterface, &curSurf->osSurface->OsResource); + + VP_RENDER_CHK_STATUS_RETURN(osInterface->pfnRegisterResource( + osInterface, + &curSurf->osSurface->OsResource, + false, + true)); + m_statelessArray.insert(std::make_pair(surf, ui64GfxAddress)); + } + + return MOS_STATUS_SUCCESS; +} + MOS_STATUS VpRenderKernelObj::SetProcessSurfaceGroup(VP_SURFACE_GROUP &surfaces) { m_surfaceGroup = &surfaces; VP_RENDER_CHK_STATUS_RETURN(SetupSurfaceState()); VP_RENDER_CHK_STATUS_RETURN(CpPrepareResources()); + VP_RENDER_CHK_STATUS_RETURN(SetupStatelessBuffer()); return MOS_STATUS_SUCCESS; } diff --git a/media_softlet/agnostic/common/vp/hal/packet/vp_render_kernel_obj.h b/media_softlet/agnostic/common/vp/hal/packet/vp_render_kernel_obj.h index 0cc383ff6c..41b8701952 100644 --- a/media_softlet/agnostic/common/vp/hal/packet/vp_render_kernel_obj.h +++ b/media_softlet/agnostic/common/vp/hal/packet/vp_render_kernel_obj.h @@ -92,6 +92,7 @@ using KERNEL_SAMPLER_INDEX = std::vector; using KERNEL_SURFACE_CONFIG = std::map; using KERNEL_SURFACE_BINDING_INDEX = std::map>; using KERNEL_ARG_INDEX_SURFACE_MAP = std::map; +using KERNEL_STATELESS_BUFF_CONFIG = std::map; typedef struct _KERNEL_PARAMS { @@ -545,6 +546,10 @@ class VpRenderKernelObj virtual MOS_STATUS CpPrepareResources(); + virtual MOS_STATUS SetupStatelessBuffer(); + + virtual MOS_STATUS SetupStatelessBufferResource(SurfaceType surf); + virtual MOS_STATUS GetCurbeState(void *&curbe, uint32_t &curbeLength) = 0; virtual MOS_STATUS GetAlignedLength(uint32_t &curbeLength, uint32_t &curbeLengthAligned, RENDERHAL_KERNEL_PARAM kernelParam, uint32_t dwBlockAlign) @@ -563,7 +568,7 @@ class VpRenderKernelObj KERNEL_SURFACE_BINDING_INDEX m_surfaceBindingIndex; // store the binding index for processed surface PVpAllocator m_allocator = nullptr; MediaUserSettingSharedPtr m_userSettingPtr = nullptr; // usersettingInstance - + KERNEL_STATELESS_BUFF_CONFIG m_statelessArray; // kernel attribute std::string m_kernelName = ""; void * m_kernelBinary = nullptr; diff --git a/media_softlet/agnostic/common/vp/hal/packet/vp_render_vebox_hdr_3dlut_l0_kernel.cpp b/media_softlet/agnostic/common/vp/hal/packet/vp_render_vebox_hdr_3dlut_l0_kernel.cpp new file mode 100644 index 0000000000..e979e6d6f9 --- /dev/null +++ b/media_softlet/agnostic/common/vp/hal/packet/vp_render_vebox_hdr_3dlut_l0_kernel.cpp @@ -0,0 +1,620 @@ +/* +* Copyright (c) 2024, Intel Corporation +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included +* in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +* OTHER DEALINGS IN THE SOFTWARE. +*/ +//! +//! \file vp_render_vebox_hdr_3dlut_l0_kernel.cpp +//! \brief render packet which used in by mediapipline. +//! \details render packet provide the structures and generate the cmd buffer which mediapipline will used. +//! +#include "vp_render_vebox_hdr_3dlut_l0_kernel.h" +#include "vp_dumper.h" +#include "vp_kernelset.h" + +using namespace vp; + +static const float ccm_identity[12] = {1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f}; +static float color_matrix_calculation[3][4] = {0.0f}; + +template +void mat_3by3_inv(T *m1, T *m2) +{ + double a11, a12, a13, a21, a22, a23, a31, a32, a33; + double det, det_inv; + + a11 = *m1; + a12 = *(m1 + 1); + a13 = *(m1 + 2); + a21 = *(m1 + 3); + a22 = *(m1 + 4); + a23 = *(m1 + 5); + a31 = *(m1 + 6); + a32 = *(m1 + 7); + a33 = *(m1 + 8); + + det = a11 * (a33 * a22 - a32 * a23) - a21 * (a33 * a12 - a32 * a13) + a31 * (a12 * a23 - a13 * a22); + + det_inv = 1 / det; + + *m2 = (float)(det_inv * (a33 * a22 - a32 * a23)); + *(m2 + 1) = (float)(-det_inv * (a33 * a12 - a32 * a13)); + *(m2 + 2) = (float)(det_inv * (a12 * a23 - a13 * a22)); + *(m2 + 3) = (float)(-det_inv * (a33 * a21 - a31 * a23)); + *(m2 + 4) = (float)(det_inv * (a33 * a11 - a31 * a13)); + *(m2 + 5) = (float)(-det_inv * (a23 * a11 - a21 * a13)); + *(m2 + 6) = (float)(det_inv * (a32 * a21 - a31 * a22)); + *(m2 + 7) = (float)(-det_inv * (a32 * a11 - a31 * a12)); + *(m2 + 8) = (float)(det_inv * (a22 * a11 - a21 * a12)); +} + +template +void mat_mul_float(T *m1, T *m2, T *m3, short m_size, short n_size, short l_size) +{ + int i, j, k, Tjn, Tjl, off3; + float *m1_offs, *m2_offs, *m2_k_offs, *m3_offs; + + for (j = 0; j < m_size; j++) + { + Tjn = j * n_size; + Tjl = j * l_size; + for (k = 0; k < l_size; k++) + { + off3 = Tjl + k; + m3_offs = m3 + off3; + *m3_offs = 0; + m2_k_offs = m2 + k; + m1_offs = m1 + Tjn; + for (i = 0; i < n_size; i++) + { + m2_offs = m2_k_offs + i * l_size; + *m3_offs += *m1_offs * *m2_offs; + m1_offs++; + } + } + } +} +static void CalcCCMMatrix() +{ + float mat_rgb2xyz_src[3][3], mat_rgb2xyz_dst[3][3]; + float mat_xyz[3][3], mat_invxyz[3][3]; + float matW[3]; + float matE[3] = {}; + + float fccmSrcZR, fccmSrcZG, fccmSrcZB, fccmSrcZW; + float fccmDstZR, fccmDstZG, fccmDstZB, fccmDstZW; + + const float fccmSrcXR = 0.708000000000000f; + const float fccmSrcYR = 0.292000000000000f; + const float fccmSrcXG = 0.170000000000000f; + const float fccmSrcYG = 0.797000000000000f; + const float fccmSrcXB = 0.131000000000000f; + const float fccmSrcYB = 0.046000000000000f; + const float fccmSrcXW = 0.312700000000000f; + const float fccmSrcYW = 0.329000000000000f; + const float fccmDstXR = 0.640000000000000f; + const float fccmDstYR = 0.330000000000000f; + const float fccmDstXG = 0.300000000000000f; + const float fccmDstYG = 0.600000000000000f; + const float fccmDstXB = 0.150000000000000f; + const float fccmDstYB = 0.060000000000000f; + const float fccmDstXW = 0.312700000000000f; + const float FccmDstYW = 0.329000000000000f; + + fccmSrcZR = 1 - fccmSrcXR - fccmSrcYR; + fccmSrcZG = 1 - fccmSrcXG - fccmSrcYG; + fccmSrcZB = 1 - fccmSrcXB - fccmSrcYB; + fccmSrcZW = 1 - fccmSrcXW - fccmSrcYW; + + mat_xyz[0][0] = fccmSrcXR; + mat_xyz[0][1] = fccmSrcXG; + mat_xyz[0][2] = fccmSrcXB; + mat_xyz[1][0] = fccmSrcYR; + mat_xyz[1][1] = fccmSrcYG; + mat_xyz[1][2] = fccmSrcYB; + mat_xyz[2][0] = fccmSrcZR; + mat_xyz[2][1] = fccmSrcZG; + mat_xyz[2][2] = fccmSrcZB; + + mat_3by3_inv(mat_xyz[0], mat_invxyz[0]); + + matW[0] = fccmSrcXW / fccmSrcYW; + matW[1] = 1; + matW[2] = fccmSrcZW / fccmSrcYW; + + mat_mul_float(mat_invxyz[0], matW, matE, 3, 3, 1); + + mat_rgb2xyz_src[0][0] = matE[0] * fccmSrcXR; + mat_rgb2xyz_src[1][0] = matE[0] * fccmSrcYR; + mat_rgb2xyz_src[2][0] = matE[0] * fccmSrcZR; + mat_rgb2xyz_src[0][1] = matE[1] * fccmSrcXG; + mat_rgb2xyz_src[1][1] = matE[1] * fccmSrcYG; + mat_rgb2xyz_src[2][1] = matE[1] * fccmSrcZG; + mat_rgb2xyz_src[0][2] = matE[2] * fccmSrcXB; + mat_rgb2xyz_src[1][2] = matE[2] * fccmSrcYB; + mat_rgb2xyz_src[2][2] = matE[2] * fccmSrcZB; + + fccmDstZR = 1 - fccmDstXR - fccmDstYR; + fccmDstZG = 1 - fccmDstXG - fccmDstYG; + fccmDstZB = 1 - fccmDstXB - fccmDstYB; + fccmDstZW = 1 - fccmDstXW - FccmDstYW; + + mat_xyz[0][0] = fccmDstXR; + mat_xyz[0][1] = fccmDstXG; + mat_xyz[0][2] = fccmDstXB; + mat_xyz[1][0] = fccmDstYR; + mat_xyz[1][1] = fccmDstYG; + mat_xyz[1][2] = fccmDstYB; + mat_xyz[2][0] = fccmDstZR; + mat_xyz[2][1] = fccmDstZG; + mat_xyz[2][2] = fccmDstZB; + + mat_3by3_inv(mat_xyz[0], mat_invxyz[0]); + + matW[0] = fccmDstXW / FccmDstYW; + matW[1] = 1; + matW[2] = fccmDstZW / FccmDstYW; + + mat_mul_float(mat_invxyz[0], matW, matE, 3, 3, 1); + + mat_rgb2xyz_dst[0][0] = matE[0] * fccmDstXR; + mat_rgb2xyz_dst[1][0] = matE[0] * fccmDstYR; + mat_rgb2xyz_dst[2][0] = matE[0] * fccmDstZR; + mat_rgb2xyz_dst[0][1] = matE[1] * fccmDstXG; + mat_rgb2xyz_dst[1][1] = matE[1] * fccmDstYG; + mat_rgb2xyz_dst[2][1] = matE[1] * fccmDstZG; + mat_rgb2xyz_dst[0][2] = matE[2] * fccmDstXB; + mat_rgb2xyz_dst[1][2] = matE[2] * fccmDstYB; + mat_rgb2xyz_dst[2][2] = matE[2] * fccmDstZB; + + float mat_invrgb2xyz_dst[3][3]; + mat_3by3_inv(mat_rgb2xyz_dst[0], mat_invrgb2xyz_dst[0]); + + float CCMmat[3][3]; + mat_mul_float(mat_invrgb2xyz_dst[0], mat_rgb2xyz_src[0], CCMmat[0], 3, 3, 3); + + color_matrix_calculation[0][0] = CCMmat[0][0]; + color_matrix_calculation[0][1] = CCMmat[0][1]; + color_matrix_calculation[0][2] = CCMmat[0][2]; + color_matrix_calculation[0][3] = 0.0f; + color_matrix_calculation[1][0] = CCMmat[1][0]; + color_matrix_calculation[1][1] = CCMmat[1][1]; + color_matrix_calculation[1][2] = CCMmat[1][2]; + color_matrix_calculation[1][3] = 0.0f; + color_matrix_calculation[2][0] = CCMmat[2][0]; + color_matrix_calculation[2][1] = CCMmat[2][1]; + color_matrix_calculation[2][2] = CCMmat[2][2]; + color_matrix_calculation[2][3] = 0.0f; +} + +// Used by L0 kernel +VpRenderHdr3DLutL0Kernel::VpRenderHdr3DLutL0Kernel(PVP_MHWINTERFACE hwInterface, PVpAllocator allocator) : + VpRenderKernelObj(hwInterface, (VpKernelID)kernelHdr3DLutCalcL0, 0, VP_HDR_KERNEL_NAME_L0_3DLUT, allocator) +{ + VP_FUNC_CALL(); + m_kernelBinaryID = VP_ADV_KERNEL_BINARY_ID(kernelHdr3DLutCalcL0); + m_isAdvKernel = true; +} + +VpRenderHdr3DLutL0Kernel::~VpRenderHdr3DLutL0Kernel() +{ + MOS_SafeFreeMemory(m_curbe); + m_curbe = nullptr; +} + +MOS_STATUS VpRenderHdr3DLutL0Kernel::Init(VpRenderKernel &kernel) +{ + VP_FUNC_CALL(); + + VP_RENDER_NORMALMESSAGE("Initializing SR krn %s", kernel.GetKernelName().c_str()); + + m_kernelSize = kernel.GetKernelSize(); + + uint8_t *pKernelBin = (uint8_t *)kernel.GetKernelBinPointer(); + VP_RENDER_CHK_NULL_RETURN(pKernelBin); + + m_kernelBinary = pKernelBin + kernel.GetKernelBinOffset(); + + m_kernelArgs = kernel.GetKernelArgs(); + + for (auto arg : m_kernelArgs) + { + arg.pData = nullptr; + } + + m_kernelBtis = kernel.GetKernelBtis(); + + m_kernelEnv = kernel.GetKernelExeEnv(); + + m_curbeSize = kernel.GetCurbeSize(); + + return MOS_STATUS_SUCCESS; +} + + +MOS_STATUS VpRenderHdr3DLutL0Kernel::SetupSurfaceState() +{ + VP_FUNC_CALL(); + VP_RENDER_CHK_STATUS_RETURN(InitCoefSurface(m_maxDisplayLum, m_maxContentLevelLum, m_hdrMode)); + + return MOS_STATUS_SUCCESS; +} + +MOS_STATUS VpRenderHdr3DLutL0Kernel::CpPrepareResources() +{ + VP_FUNC_CALL(); + + PMOS_RESOURCE source[VPHAL_MAX_SOURCES] = {nullptr}; + PMOS_RESOURCE target[VPHAL_MAX_TARGETS] = {nullptr}; + + if ((nullptr != m_hwInterface->m_osInterface) && + (nullptr != m_hwInterface->m_osInterface->osCpInterface)) + { + auto it = m_surfaceGroup->find(SurfaceType3DLutCoef); + VP_SURFACE *surf = (m_surfaceGroup->end() != it) ? it->second : nullptr; + VP_RENDER_CHK_NULL_RETURN(surf); + source[0] = &(surf->osSurface->OsResource); + + it = m_surfaceGroup->find(SurfaceType3DLut); + surf = (m_surfaceGroup->end() != it) ? it->second : nullptr; + VP_RENDER_CHK_NULL_RETURN(surf); + + target[0] = &(surf->osSurface->OsResource); + m_hwInterface->m_osInterface->osCpInterface->PrepareResources((void **)source, 1, (void **)target, 1); + } + return MOS_STATUS_SUCCESS; +} + +MOS_STATUS VpRenderHdr3DLutL0Kernel::SetupStatelessBuffer() +{ + VP_FUNC_CALL(); + m_statelessArray.clear(); + VP_RENDER_CHK_STATUS_RETURN(SetupStatelessBufferResource(SurfaceType3DLutCoef)); + VP_RENDER_CHK_STATUS_RETURN(SetupStatelessBufferResource(SurfaceType3DLut)); + return MOS_STATUS_SUCCESS; +} + +MOS_STATUS VpRenderHdr3DLutL0Kernel::GetCurbeState(void *&curbe, uint32_t &curbeLength) +{ + VP_FUNC_CALL(); + curbeLength = m_curbeSize; + + VP_RENDER_NORMALMESSAGE("KernelID %d, Curbe Size %d\n", m_kernelId, curbeLength); + if (curbeLength == 0) + { + return MOS_STATUS_INVALID_PARAMETER; + } + + uint8_t *pCurbe = (uint8_t *)MOS_AllocAndZeroMemory(curbeLength); + VP_RENDER_CHK_NULL_RETURN(pCurbe); + MOS_FreeMemAndSetNull(m_curbe); + m_curbe = pCurbe; + + for (auto &arg : m_kernelArgs) + { + if (arg.eArgKind == ARG_KIND_GENERAL) + { + if (arg.pData != nullptr) + { + MOS_SecureMemcpy(pCurbe + arg.uOffsetInPayload, arg.uSize, arg.pData, arg.uSize); + VP_RENDER_NORMALMESSAGE("Setting Curbe State KernelID %d, index %d , value %d, argKind %d", m_kernelId, arg.uIndex, *(uint32_t *)arg.pData, arg.eArgKind); + } + else + { + VP_RENDER_NORMALMESSAGE("KernelID %d, index %d, argKind %d is empty", m_kernelId, arg.uIndex, arg.eArgKind); + } + } + else if (arg.eArgKind == ARG_KIND_SURFACE) + { + if (arg.addressMode == AddressingModeStateless && arg.pData != nullptr) + { + for (uint32_t idx = 0; idx < arg.uSize / sizeof(SurfaceType); idx++) + { + uint32_t *pSurfaceindex = (uint32_t *)(arg.pData) + idx; + SurfaceType surf = (SurfaceType)*pSurfaceindex; + + if (surf != SurfaceTypeInvalid) + { + auto it = m_statelessArray.find(surf); + uint64_t ui64GfxAddress = (m_statelessArray.end() != it) ? it->second : 0xFFFF; + *((uint64_t *)(pCurbe + arg.uOffsetInPayload) + idx) = ui64GfxAddress; + break; + } + else + { + *((uint64_t *)(pCurbe + arg.uOffsetInPayload) + idx) = 0xFFFF; + } + } + } + } + else if (arg.eArgKind == ARG_KIND_INLINE) + { + VP_RENDER_NORMALMESSAGE("Skip inline data here"); + } + else + { + return MOS_STATUS_UNIMPLEMENTED; + } + } + + curbe = pCurbe; + + return MOS_STATUS_SUCCESS; +} + + +MOS_STATUS VpRenderHdr3DLutL0Kernel::GetWalkerSetting(KERNEL_WALKER_PARAMS &walkerParam, KERNEL_PACKET_RENDER_DATA &renderData) +{ + + VP_FUNC_CALL(); + + walkerParam = m_walkerParam; + walkerParam.iBindingTable = renderData.bindingTable; + walkerParam.iMediaID = renderData.mediaID; + walkerParam.iCurbeOffset = renderData.iCurbeOffset; + // Should use renderData.iCurbeLength instead of kernelSettings.CURBE_Length. + // kernelSettings.CURBE_Length is 32 aligned with 5 bits shift. + // renderData.iCurbeLength is RENDERHAL_CURBE_BLOCK_ALIGN(64) aligned. + walkerParam.iCurbeLength = renderData.iCurbeLength; + return MOS_STATUS_SUCCESS; +} + +// Only for Adv kernels. +MOS_STATUS VpRenderHdr3DLutL0Kernel::SetWalkerSetting(KERNEL_THREAD_SPACE &threadSpace, bool bSyncFlag, bool flushL1) +{ + VP_FUNC_CALL(); + MOS_ZeroMemory(&m_walkerParam, sizeof(KERNEL_WALKER_PARAMS)); + + m_walkerParam.iBlocksX = threadSpace.uWidth; + m_walkerParam.iBlocksY = threadSpace.uHeight; + m_walkerParam.threadWidth = threadSpace.uLocalWidth; + m_walkerParam.threadHeight = threadSpace.uLocalHeight; + m_walkerParam.threadDepth = 1; + m_walkerParam.isVerticalPattern = false; + m_walkerParam.bSyncFlag = bSyncFlag; + + m_walkerParam.pipeControlParams.bUpdateNeeded = true; + m_walkerParam.pipeControlParams.bEnableDataPortFlush = true; + m_walkerParam.pipeControlParams.bUnTypedDataPortCacheFlush = true; + m_walkerParam.pipeControlParams.bFlushRenderTargetCache = false; + m_walkerParam.pipeControlParams.bInvalidateTextureCache = false; + + for (auto &arg : m_kernelArgs) + { + if (arg.eArgKind == ARG_KIND_INLINE) + { + if (arg.pData != nullptr) + { + MOS_SecureMemcpy(m_inlineData + arg.uOffsetInPayload, arg.uSize, arg.pData, arg.uSize); + VP_RENDER_NORMALMESSAGE("Setting Inline Data KernelID %d, index %d , value %d, argKind %d", m_kernelId, arg.uIndex, *(uint32_t *)arg.pData, arg.eArgKind); + } + else + { + VP_RENDER_NORMALMESSAGE("KernelID %d, index %d, argKind %d is empty", m_kernelId, arg.uIndex, arg.eArgKind); + } + } + } + m_walkerParam.inlineDataLength = sizeof(m_inlineData); + m_walkerParam.inlineData = m_inlineData; + + if (m_kernelEnv.uSimdSize != 1 && + (m_kernelEnv.uiWorkGroupWalkOrderDimensions[0] != 0 || + m_kernelEnv.uiWorkGroupWalkOrderDimensions[1] != 0 || + m_kernelEnv.uiWorkGroupWalkOrderDimensions[2] != 0)) + { + m_walkerParam.isEmitInlineParameter = true; + m_walkerParam.isGenerateLocalID = true; + m_walkerParam.emitLocal = MHW_EMIT_LOCAL_XYZ; + } + + return MOS_STATUS_SUCCESS; +} + +MOS_STATUS VpRenderHdr3DLutL0Kernel::InitCoefSurface(const uint32_t maxDLL, const uint32_t maxCLL, const VPHAL_HDR_MODE hdrMode) +{ + VP_FUNC_CALL(); + float *hdrcoefBuffer = nullptr; + int32_t oetfCurve = 0, tmMode = 0, tmSrcType = 0; + float *ccmMatrix = m_ccmMatrix; + float tmMaxCLL = 0.0f, tmMaxDLL = 0.0f; + + MOS_ZeroMemory(m_ccmMatrix, sizeof(m_ccmMatrix)); + + // Get surface addr + auto it = m_surfaceGroup->find(SurfaceType3DLutCoef); + VP_SURFACE *surf = (m_surfaceGroup->end() != it) ? it->second : nullptr; + VP_RENDER_CHK_NULL_RETURN(surf); + + tmMaxCLL = (float)maxCLL; + tmMaxDLL = (float)maxDLL; + + // Lock surface + uint8_t *lockedAddr = (uint8_t *)m_allocator->LockResourceForWrite(&surf->osSurface->OsResource); + + VP_RENDER_CHK_NULL_RETURN(lockedAddr); + + hdrcoefBuffer = (float *)lockedAddr; + + if (hdrMode == VPHAL_HDR_MODE_TONE_MAPPING) // H2S + { + CalcCCMMatrix(); + MOS_SecureMemcpy(ccmMatrix, sizeof(float) * 12, color_matrix_calculation, sizeof(float) * 12); + + tmMode = (TONE_MAPPING_MODE)TONE_MAPPING_MODE_H2S; + oetfCurve = (OETF_CURVE_TYPE)OETF_SRGB; + tmSrcType = (TONE_MAPPING_SOURCE_TYPE)TONE_MAPPING_SOURCE_PSEUDO_Y_BT709; + } + else // H2H + { + MOS_SecureMemcpy(ccmMatrix, sizeof(float) * 12, ccm_identity, sizeof(float) * 12); + + tmMode = (TONE_MAPPING_MODE)TONE_MAPPING_MODE_H2H; + oetfCurve = (OETF_CURVE_TYPE)OETF_CURVE_HDR_2084; + tmSrcType = (TONE_MAPPING_SOURCE_TYPE)TONE_MAPPING_SOURCE_PSEUDO_Y_BT709; + } + + // Fill Coefficient Surface: Media kernel define the layout of coefficients. Please don't change it. + const uint32_t pos_coef[17] = {7, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28, 29, 54, 55, 62, 63}; + + // OETF curve + ((int *)hdrcoefBuffer)[pos_coef[0]] = oetfCurve; + // CCM + for (uint32_t i = 0; i < VP_CCM_MATRIX_SIZE; ++i) + { + hdrcoefBuffer[pos_coef[i + 1]] = ccmMatrix[i]; + } + // TM Source Type + ((int *)hdrcoefBuffer)[pos_coef[13]] = tmSrcType; + // TM Mode + ((int *)hdrcoefBuffer)[pos_coef[14]] = tmMode; + // Max CLL and DLL + hdrcoefBuffer[pos_coef[15]] = tmMaxCLL; + hdrcoefBuffer[pos_coef[16]] = tmMaxDLL; + + //Unlock + VP_RENDER_CHK_STATUS_RETURN(m_allocator->UnLock(&surf->osSurface->OsResource)); + + return MOS_STATUS_SUCCESS; +} + +MOS_STATUS VpRenderHdr3DLutL0Kernel::SetKernelConfigs(KERNEL_CONFIGS &kernelConfigs) +{ + VP_FUNC_CALL(); + auto it = kernelConfigs.find((VpKernelID)kernelHdr3DLutCalcL0); + + if (kernelConfigs.end() == it || nullptr == it->second) + { + VP_RENDER_CHK_STATUS_RETURN(MOS_STATUS_INVALID_PARAMETER); + } + + PRENDER_HDR_3DLUT_CAL_PARAMS params = (PRENDER_HDR_3DLUT_CAL_PARAMS)it->second; + + if (m_maxDisplayLum == params->maxDisplayLum && m_maxContentLevelLum == params->maxContentLevelLum && + m_hdrMode == params->hdrMode && m_hdrLutSize == params->threadWidth) + { + // For such case, 3DLut calculation should be skipped in Policy::GetHdrExecutionCaps. + VP_RENDER_ASSERTMESSAGE("No change in 3D Lut parameters!"); + } + else + { + m_maxDisplayLum = params->maxDisplayLum; + m_maxContentLevelLum = params->maxContentLevelLum; + m_hdrMode = params->hdrMode; + m_hdrLutSize = params->threadWidth; + VP_RENDER_NORMALMESSAGE("Maximum Display Luminance %d, Maximum Content Level Luminance %d, HDR mode %d, Lut size %d", + m_maxDisplayLum, + m_maxContentLevelLum, + m_hdrMode, + m_hdrLutSize); + } + + return MOS_STATUS_SUCCESS; +} + + + +MOS_STATUS VpRenderHdr3DLutL0Kernel::SetKernelArgs(KERNEL_ARGS &kernelArgs, VP_PACKET_SHARED_CONTEXT *sharedContext) +{ + VP_FUNC_CALL(); + + //All pData will be free in VpSrFilter::Destroy so no need to free here + for (KRN_ARG &srcArg : kernelArgs) + { + for (KRN_ARG &dstArg : m_kernelArgs) + { + if (srcArg.uIndex == dstArg.uIndex) + { + if (dstArg.eArgKind == ARG_KIND_GENERAL || dstArg.eArgKind == ARG_KIND_INLINE || srcArg.eArgKind == ARG_KIND_SURFACE) + { + if (srcArg.pData == nullptr) + { + VP_RENDER_ASSERTMESSAGE("The Kernel Argument General Data is null! KernelID %d, argIndex %d", m_kernelId, dstArg.uIndex); + return MOS_STATUS_INVALID_PARAMETER; + } + else + { + dstArg.eArgKind = srcArg.eArgKind; + dstArg.pData = srcArg.pData; + srcArg.pData = nullptr; + } + } + } + } + + if (srcArg.pData != nullptr) + { + srcArg.pData = nullptr; + VP_RENDER_ASSERTMESSAGE("The Kernel Argument is set but not used. KernelID %d, argIndex %d", m_kernelId, srcArg.uIndex); + } + } + + return MOS_STATUS_SUCCESS; +} + +void VpRenderHdr3DLutL0Kernel::DumpSurfaces() +{ + VP_FUNC_CALL(); + for (auto &arg : m_kernelArgs) + { + if (arg.eArgKind == ARG_KIND_SURFACE) + { + for (uint32_t idx = 0; idx < arg.uSize / (sizeof(SurfaceType) * 2); idx++) + { + uint32_t *pSurfaceindex = (uint32_t *)(arg.pData) + idx; + SurfaceType surfType = (SurfaceType)*pSurfaceindex; + if (surfType == SurfaceTypeInvalid) + { + VP_RENDER_ASSERTMESSAGE("Surf type was invalid"); + return; + } + auto surf = m_surfaceGroup->find(surfType); + if (m_surfaceGroup->end() == surf) + { + VP_RENDER_ASSERTMESSAGE("Surf was not found"); + return; + } + + char bufName[MAX_PATH] = {}; + + MOS_SecureStringPrint( + bufName, + MAX_PATH, + sizeof(bufName), + "k_%d_%s_argi_%d", + m_kernelIndex, + m_kernelName.c_str(), + idx); + + if (surf->second == nullptr) + { + return; + } + if (surf->second->osSurface == nullptr) + { + return; + } + + DumpSurface(surf->second, bufName); + + } + } + } + + return; +} \ No newline at end of file diff --git a/media_softlet/agnostic/common/vp/hal/packet/vp_render_vebox_hdr_3dlut_l0_kernel.h b/media_softlet/agnostic/common/vp/hal/packet/vp_render_vebox_hdr_3dlut_l0_kernel.h new file mode 100644 index 0000000000..69f005617c --- /dev/null +++ b/media_softlet/agnostic/common/vp/hal/packet/vp_render_vebox_hdr_3dlut_l0_kernel.h @@ -0,0 +1,91 @@ +/* +* Copyright (c) 2024, Intel Corporation +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included +* in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +* OTHER DEALINGS IN THE SOFTWARE. +*/ +#ifndef __VP_RENDER_VEBOX_HDR_3DLUT_L0_KERNEL_H__ +#define __VP_RENDER_VEBOX_HDR_3DLUT_L0_KERNEL_H__ + +#include "vp_render_cmd_packet.h" +#include "vp_render_vebox_hdr_3dlut_kernel.h" +#define VP_HDR_KERNEL_NAME_L0_3DLUT "fillLutTable_3dlut" +namespace vp +{ +//! +//! \brief Tone Mapping Source Type, Please don't change the Enmu Value. +//! \details These Enmu Values are passed to media kernel for further processing. +//! + +class VpRenderHdr3DLutL0Kernel : public VpRenderKernelObj +{ +public: + VpRenderHdr3DLutL0Kernel(PVP_MHWINTERFACE hwInterface, PVpAllocator allocator); + virtual ~VpRenderHdr3DLutL0Kernel(); + + virtual MOS_STATUS Init(VpRenderKernel& kernel); + virtual MOS_STATUS GetCurbeState(void *&curbe, uint32_t &curbeLength) override; + + virtual MOS_STATUS FreeCurbe(void*& curbe) override + { + return MOS_STATUS_SUCCESS; + } + + virtual uint32_t GetInlineDataSize() override + { + return 0; + } + + virtual bool IsKernelCached() override + { + return true; + } + virtual MOS_STATUS GetWalkerSetting(KERNEL_WALKER_PARAMS &walkerParam, KERNEL_PACKET_RENDER_DATA &renderData) override; + virtual MOS_STATUS InitCoefSurface(const uint32_t maxDLL, const uint32_t maxCLL, const VPHAL_HDR_MODE hdrMode); + +protected: + virtual MOS_STATUS SetupSurfaceState() override; + virtual MOS_STATUS CpPrepareResources() override; + virtual MOS_STATUS SetupStatelessBuffer() override; + virtual MOS_STATUS SetWalkerSetting(KERNEL_THREAD_SPACE &threadSpace, bool bSyncFlag, bool flushL1 = false); + virtual MOS_STATUS SetKernelArgs(KERNEL_ARGS &kernelArgs, VP_PACKET_SHARED_CONTEXT *sharedContext); + virtual MOS_STATUS SetKernelConfigs(KERNEL_CONFIGS &kernelConfigs) override; + virtual void DumpSurfaces(); + + //kernel Arguments + KERNEL_ARGS m_kernelArgs = {}; + KERNEL_WALKER_PARAMS m_walkerParam = {}; + uint8_t m_inlineData[32] = {}; + + float m_ccmMatrix[VP_CCM_MATRIX_SIZE] = {0.0}; + uint32_t m_maxDisplayLum = 1000; //!< Maximum Display Luminance + uint32_t m_maxContentLevelLum = 4000; //!< Maximum Content Level Luminance + VPHAL_HDR_MODE m_hdrMode = VPHAL_HDR_MODE_NONE; + uint32_t m_hdrLutSize = LUT65_SEG_SIZE; + void* m_curbe = nullptr; + uint32_t m_curbeSize = 0; + KERNEL_BTIS m_kernelBtis = {}; + KRN_EXECUTE_ENV m_kernelEnv = {}; + KERNEL_ARG_INDEX_SURFACE_MAP m_argIndexSurfMap = {}; + + MEDIA_CLASS_DEFINE_END(vp__VpRenderHdr3DLutL0Kernel) +}; + +} // namespace vp + +#endif //__RENDER_VEBOX_HDR_3DLUT_L0_KERNEL__ \ No newline at end of file diff --git a/media_softlet/agnostic/common/vp/hal/pipeline/vp_feature_report.h b/media_softlet/agnostic/common/vp/hal/pipeline/vp_feature_report.h index a727141246..e37ec770ef 100644 --- a/media_softlet/agnostic/common/vp/hal/pipeline/vp_feature_report.h +++ b/media_softlet/agnostic/common/vp/hal/pipeline/vp_feature_report.h @@ -72,6 +72,7 @@ class VpFeatureReport uint8_t rtCacheSetting = 0; //!< Render Target cache usage #if (_DEBUG || _RELEASE_INTERNAL) uint8_t rtOldCacheSetting = 0; //!< Render Target old cache usage + bool isL03DLut = false; #endif bool VeboxScalability = false; //!< Vebox Scalability flag bool VPApogeios = false; //!< VP Apogeios flag diff --git a/media_softlet/agnostic/common/vp/hal/pipeline/vp_pipeline.cpp b/media_softlet/agnostic/common/vp/hal/pipeline/vp_pipeline.cpp index cc1bf8923d..fabccbfdcd 100644 --- a/media_softlet/agnostic/common/vp/hal/pipeline/vp_pipeline.cpp +++ b/media_softlet/agnostic/common/vp/hal/pipeline/vp_pipeline.cpp @@ -218,6 +218,15 @@ MOS_STATUS VpPipeline::UserFeatureReport() { m_reporting->GetFeatures().rtOldCacheSetting = (uint8_t)(m_vpMhwInterface.m_renderHal->oldCacheSettingForTargetSurface); } + if (m_reporting->GetFeatures().isL03DLut) + { + VP_PUBLIC_NORMALMESSAGE("VP L0 3DLut Enabled"); + ReportUserSettingForDebug( + m_userSettingPtr, + __MEDIA_USER_FEATURE_VALUE_VP_L0_3DLUT_ENABLED, + 1, + MediaUserSetting::Group::Sequence); + } #endif } diff --git a/media_softlet/agnostic/common/vp/hal/utils/hal_ddi_share/vp_user_setting.cpp b/media_softlet/agnostic/common/vp/hal/utils/hal_ddi_share/vp_user_setting.cpp index d561771ac7..8116bb59ad 100644 --- a/media_softlet/agnostic/common/vp/hal/utils/hal_ddi_share/vp_user_setting.cpp +++ b/media_softlet/agnostic/common/vp/hal/utils/hal_ddi_share/vp_user_setting.cpp @@ -369,6 +369,20 @@ MOS_STATUS VpUserSetting::InitVpUserSetting(MediaUserSettingSharedPtr userSettin 0, true); + DeclareUserSettingKeyForDebug( + userSettingPtr, + __MEDIA_USER_FEATURE_VALUE_ENABLE_VP_L0_3DLUT, + MediaUserSetting::Group::Sequence, + 0, + true); + + DeclareUserSettingKeyForDebug( + userSettingPtr, + __MEDIA_USER_FEATURE_VALUE_VP_L0_3DLUT_ENABLED, + MediaUserSetting::Group::Sequence, + 0, + true); + #endif return MOS_STATUS_SUCCESS; diff --git a/media_softlet/agnostic/common/vp/hal/utils/vp_user_feature_control.cpp b/media_softlet/agnostic/common/vp/hal/utils/vp_user_feature_control.cpp index ee87ed6b35..b8133fd15b 100644 --- a/media_softlet/agnostic/common/vp/hal/utils/vp_user_feature_control.cpp +++ b/media_softlet/agnostic/common/vp/hal/utils/vp_user_feature_control.cpp @@ -391,6 +391,22 @@ MOS_STATUS VpUserFeatureControl::CreateUserSettingForDebug() // Default value m_ctrlValDefault.enableIFNCC = false; } + + bool bEnableL03DLut = false; + eRegKeyReadStatus = ReadUserSettingForDebug( + m_userSettingPtr, + bEnableL03DLut, + __MEDIA_USER_FEATURE_VALUE_ENABLE_VP_L0_3DLUT, + MediaUserSetting::Group::Sequence); + if (MOS_SUCCEEDED(eRegKeyReadStatus)) + { + m_ctrlValDefault.bEnableL03DLut = bEnableL03DLut; + } + else + { + // Default value + m_ctrlValDefault.bEnableL03DLut = false; + } #endif return MOS_STATUS_SUCCESS; } diff --git a/media_softlet/agnostic/common/vp/hal/utils/vp_user_feature_control.h b/media_softlet/agnostic/common/vp/hal/utils/vp_user_feature_control.h index 9bbfc1e538..be38147829 100644 --- a/media_softlet/agnostic/common/vp/hal/utils/vp_user_feature_control.h +++ b/media_softlet/agnostic/common/vp/hal/utils/vp_user_feature_control.h @@ -59,6 +59,7 @@ class VpUserFeatureControl uint32_t enabledSFCNv12P010LinearOutput = 0; uint32_t enabledSFCRGBPRGB24Output = 0; bool enableIFNCC = false; + bool bEnableL03DLut = false; #endif bool disablePacketReuse = false; bool enablePacketReuseTeamsAlways = false; @@ -97,6 +98,11 @@ class VpUserFeatureControl { return m_ctrlVal.enableIFNCC; } + + bool EnableL03DLut() + { + return m_ctrlVal.bEnableL03DLut; + } #endif virtual MOS_STATUS CreateUserSettingForDebug(); diff --git a/media_softlet/agnostic/common/vp/hal/utils/vp_utils.h b/media_softlet/agnostic/common/vp/hal/utils/vp_utils.h index 893e48a853..ab4747d4ed 100644 --- a/media_softlet/agnostic/common/vp/hal/utils/vp_utils.h +++ b/media_softlet/agnostic/common/vp/hal/utils/vp_utils.h @@ -327,6 +327,9 @@ class Trace #define __MEDIA_USER_FEATURE_VALUE_INTER_FRAME_MEMORY_NINJA_START_COUNTER "InterFrameNinjaStartCounter" #define __MEDIA_USER_FEATURE_VALUE_INTER_FRAME_MEMORY_NINJA_END_COUNTER "InterFrameNinjaEndCounter" #define __MEDIA_USER_FEATURE_VALUE_ENABLE_IFNCC "EnableIFNCC" +// For L0 3DLut +#define __MEDIA_USER_FEATURE_VALUE_ENABLE_VP_L0_3DLUT "Enable L0 3DLUT" +#define __MEDIA_USER_FEATURE_VALUE_VP_L0_3DLUT_ENABLED "L0 3DLUT Enabled" #endif //(_DEBUG || _RELEASE_INTERNAL) class VpUtils