Skip to content

Commit

Permalink
[Encode] Cpublt to gen11/12
Browse files Browse the repository at this point in the history
* [Encode] Cpublt to gen11/12

Back port cpublt to g11/g12 vp9
  • Loading branch information
leyu-yao authored and intel-mediadev committed Sep 20, 2023
1 parent c7c39b2 commit 0a9bd3d
Show file tree
Hide file tree
Showing 6 changed files with 152 additions and 149 deletions.
139 changes: 139 additions & 0 deletions media_driver/agnostic/common/codec/hal/codechal_vdenc_vp9_base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7967,3 +7967,142 @@ MOS_STATUS CodechalVdencVp9State::DumpSegmentParams(
return MOS_STATUS_SUCCESS;
}
#endif

void CodechalVdencVp9State::fill_pad_with_value(PMOS_SURFACE psSurface, uint32_t real_height, uint32_t aligned_height)
{
CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(psSurface);

// unaligned surfaces only
if (aligned_height <= real_height || aligned_height > psSurface->dwHeight)
{
return;
}

// avoid DYS frames cases
if (m_dysRefFrameFlags != DYS_REF_NONE && m_dysVdencMultiPassEnabled)
{
return;
}

if (psSurface->OsResource.TileType == MOS_TILE_INVALID)
{
return;
}

if (psSurface->Format == Format_NV12 || psSurface->Format == Format_P010)
{
uint32_t pitch = psSurface->dwPitch;
uint32_t UVPlaneOffset = psSurface->UPlaneOffset.iSurfaceOffset;
uint32_t YPlaneOffset = psSurface->dwOffset;
uint32_t pad_rows = aligned_height - real_height;
uint32_t y_plane_size = pitch * real_height;
uint32_t uv_plane_size = pitch * real_height / 2;

MOS_LOCK_PARAMS lockFlags;
MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
lockFlags.WriteOnly = 1;

// padding for the linear format buffer.
if (psSurface->OsResource.TileType == MOS_TILE_LINEAR)
{
uint8_t *src_data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &(psSurface->OsResource), &lockFlags);
CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(src_data);

uint8_t *src_data_y = src_data + YPlaneOffset;
uint8_t *src_data_y_end = src_data_y + y_plane_size;
for (uint32_t i = 0; i < pad_rows; i++)
{
MOS_SecureMemcpy(src_data_y_end + i * pitch, pitch, src_data_y_end - pitch, pitch);
}

uint8_t *src_data_uv = src_data + UVPlaneOffset;
uint8_t *src_data_uv_end = src_data_uv + uv_plane_size;
for (uint32_t i = 0; i < pad_rows / 2; i++)
{
MOS_SecureMemcpy(src_data_uv_end + i * pitch, pitch, src_data_uv_end - pitch, pitch);
}

m_osInterface->pfnUnlockResource(m_osInterface, &(psSurface->OsResource));
}
else
{
// we don't copy out the whole tiled buffer to linear and padding on the tiled buffer directly.
lockFlags.TiledAsTiled = 1;

uint8_t *src_data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &(psSurface->OsResource), &lockFlags);
CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(src_data);

uint8_t* padding_data = (uint8_t *)MOS_AllocMemory(pitch * pad_rows);
CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(padding_data);

// Copy last Y row data to linear padding data.
GMM_RES_COPY_BLT gmmResCopyBlt = {0};
gmmResCopyBlt.Gpu.pData = src_data;
gmmResCopyBlt.Gpu.OffsetX = 0;
gmmResCopyBlt.Gpu.OffsetY = (YPlaneOffset + y_plane_size - pitch) / pitch;
gmmResCopyBlt.Sys.pData = padding_data;
gmmResCopyBlt.Sys.RowPitch = pitch;
gmmResCopyBlt.Sys.BufferSize = pitch * pad_rows;
gmmResCopyBlt.Sys.SlicePitch = pitch;
gmmResCopyBlt.Blt.Slices = 1;
gmmResCopyBlt.Blt.Upload = false;
gmmResCopyBlt.Blt.Width = psSurface->dwWidth;
gmmResCopyBlt.Blt.Height = 1;
psSurface->OsResource.pGmmResInfo->CpuBlt(&gmmResCopyBlt);
// Fill the remain padding lines with last Y row data.
for (uint32_t i = 1; i < pad_rows; i++)
{
MOS_SecureMemcpy(padding_data + i * pitch, pitch, padding_data, pitch);
}
// Filling the padding for Y.
gmmResCopyBlt.Gpu.pData = src_data;
gmmResCopyBlt.Gpu.OffsetX = 0;
gmmResCopyBlt.Gpu.OffsetY = (YPlaneOffset + y_plane_size) / pitch;
gmmResCopyBlt.Sys.pData = padding_data;
gmmResCopyBlt.Sys.RowPitch = pitch;
gmmResCopyBlt.Sys.BufferSize = pitch * pad_rows;
gmmResCopyBlt.Sys.SlicePitch = pitch;
gmmResCopyBlt.Blt.Slices = 1;
gmmResCopyBlt.Blt.Upload = true;
gmmResCopyBlt.Blt.Width = psSurface->dwWidth;
gmmResCopyBlt.Blt.Height = pad_rows;
psSurface->OsResource.pGmmResInfo->CpuBlt(&gmmResCopyBlt);

// Copy last UV row data to linear padding data.
gmmResCopyBlt.Gpu.pData = src_data;
gmmResCopyBlt.Gpu.OffsetX = 0;
gmmResCopyBlt.Gpu.OffsetY = (UVPlaneOffset + uv_plane_size - pitch) / pitch;
gmmResCopyBlt.Sys.pData = padding_data;
gmmResCopyBlt.Sys.RowPitch = pitch;
gmmResCopyBlt.Sys.BufferSize = pitch * pad_rows / 2;
gmmResCopyBlt.Sys.SlicePitch = pitch;
gmmResCopyBlt.Blt.Slices = 1;
gmmResCopyBlt.Blt.Upload = false;
gmmResCopyBlt.Blt.Width = psSurface->dwWidth;
gmmResCopyBlt.Blt.Height = 1;
psSurface->OsResource.pGmmResInfo->CpuBlt(&gmmResCopyBlt);
// Fill the remain padding lines with last UV row data.
for (uint32_t i = 1; i < pad_rows / 2; i++)
{
MOS_SecureMemcpy(padding_data + i * pitch, pitch, padding_data, pitch);
}
// Filling the padding for UV.
gmmResCopyBlt.Gpu.pData = src_data;
gmmResCopyBlt.Gpu.OffsetX = 0;
gmmResCopyBlt.Gpu.OffsetY = (UVPlaneOffset + uv_plane_size) / pitch;
gmmResCopyBlt.Sys.pData = padding_data;
gmmResCopyBlt.Sys.RowPitch = pitch;
gmmResCopyBlt.Sys.BufferSize = pitch * pad_rows / 2;
gmmResCopyBlt.Sys.SlicePitch = pitch;
gmmResCopyBlt.Blt.Slices = 1;
gmmResCopyBlt.Blt.Upload = true;
gmmResCopyBlt.Blt.Width = psSurface->dwWidth;
gmmResCopyBlt.Blt.Height = pad_rows / 2;
psSurface->OsResource.pGmmResInfo->CpuBlt(&gmmResCopyBlt);

MOS_FreeMemory(padding_data);
padding_data = nullptr;
m_osInterface->pfnUnlockResource(m_osInterface, &(psSurface->OsResource));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2415,6 +2415,8 @@ class CodechalVdencVp9State : public CodechalEncoderState
MOS_STATUS DumpPicParams(
PCODEC_VP9_ENCODE_PIC_PARAMS picParams);
#endif

void fill_pad_with_value(PMOS_SURFACE psSurface, uint32_t real_height, uint32_t aligned_height);
};

#endif // __CODECHAL_VDENC_VP9_BASE_H__
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4387,6 +4387,14 @@ MOS_STATUS CodechalVdencVp9StateG11::ExecutePictureLevel()
CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_LAST_SURFACE_ID]), delete_func);
}

if (MEDIA_IS_WA(m_waTable, Wa_Vp9UnalignedHeight))
{
uint32_t real_height = m_oriFrameHeight;
uint32_t aligned_height = MOS_ALIGN_CEIL(real_height, CODEC_VP9_MIN_BLOCK_HEIGHT);

fill_pad_with_value(m_rawSurfaceToPak, real_height, aligned_height);
}

// Golden reference picture
if (refSurface[1])
{
Expand Down Expand Up @@ -5330,4 +5338,4 @@ MOS_STATUS CodechalVdencVp9StateG11::UpdateCmdBufAttribute(
}

return eStatus;
}
}
147 changes: 0 additions & 147 deletions media_driver/agnostic/gen12/codec/hal/codechal_vdenc_vp9_g12.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3461,153 +3461,6 @@ MOS_STATUS CodechalVdencVp9StateG12::SetPictureStructs()
return eStatus;
}

void CodechalVdencVp9StateG12::fill_pad_with_value(PMOS_SURFACE psSurface, uint32_t real_height, uint32_t aligned_height)
{
CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(psSurface);

// unaligned surfaces only
if (aligned_height <= real_height || aligned_height > psSurface->dwHeight)
{
return;
}

// avoid DYS frames cases
if (m_dysRefFrameFlags != DYS_REF_NONE && m_dysVdencMultiPassEnabled)
{
return;
}

if (psSurface->Format == Format_NV12 || psSurface->Format == Format_P010)
{
uint32_t pitch = psSurface->dwPitch;
uint32_t UVPlaneOffset = psSurface->UPlaneOffset.iSurfaceOffset;
uint32_t YPlaneOffset = psSurface->dwOffset;
uint32_t pad_rows = aligned_height - real_height;
uint32_t y_plane_size = pitch * real_height;
uint32_t uv_plane_size = pitch * real_height / 2;

MOS_LOCK_PARAMS lockFlags;
MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
lockFlags.WriteOnly = 1;

// padding for the linear format buffer.
if (psSurface->OsResource.TileType == MOS_TILE_LINEAR)
{
uint8_t *src_data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &(psSurface->OsResource), &lockFlags);

if (!src_data)
return;

uint8_t *src_data_y = src_data + YPlaneOffset;
uint8_t *src_data_y_end = src_data_y + y_plane_size;
for (uint32_t i = 0; i < pad_rows; i++)
{
MOS_SecureMemcpy(src_data_y_end + i * pitch, pitch, src_data_y_end - pitch, pitch);
}

uint8_t *src_data_uv = src_data + UVPlaneOffset;
uint8_t *src_data_uv_end = src_data_uv + uv_plane_size;
for (uint32_t i = 0; i < pad_rows / 2; i++)
{
MOS_SecureMemcpy(src_data_uv_end + i * pitch, pitch, src_data_uv_end - pitch, pitch);
}

m_osInterface->pfnUnlockResource(m_osInterface, &(psSurface->OsResource));
}
else if (psSurface->OsResource.TileType == MOS_TILE_Y)
{
// we don't copy out the tiled buffer to linear and padding on the tiled buffer directly.
lockFlags.TiledAsTiled = 1;

uint8_t *src_data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &(psSurface->OsResource), &lockFlags);
if (!src_data)
return;

uint8_t* padding_data = (uint8_t *)MOS_AllocMemory(pitch);
CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(padding_data);

int32_t LinearOffset;
int32_t TileOffset;
int32_t x;
int32_t y;

int32_t swizzleflags = 0; // 0 for MOS_TILE_Y

// copy out the last Y row data.
y = (YPlaneOffset + y_plane_size - pitch) / pitch;
for (x = 0, LinearOffset = 0; x < static_cast<int32_t>(pitch); x++, LinearOffset++)
{
TileOffset = Mos_SwizzleOffsetWrapper(
x,
y,
pitch,
MOS_TILE_Y,
false,
swizzleflags);
if (TileOffset < psSurface->OsResource.iSize)
*(padding_data + LinearOffset) = *(src_data + TileOffset);
}

// padding the unaligned region for Y.
y = (YPlaneOffset + y_plane_size) / pitch;
for (uint32_t i = 0; i < pad_rows; y++, i++)
{
LinearOffset = 0;
for (x = 0; x < static_cast<int32_t>(pitch); x++, LinearOffset++)
{
TileOffset = Mos_SwizzleOffsetWrapper(
x,
y,
pitch,
MOS_TILE_Y,
false,
swizzleflags);
if (TileOffset < psSurface->OsResource.iSize)
*(src_data + TileOffset) = *(padding_data + LinearOffset);
}
}

// copy out the last UV row data.
y = (UVPlaneOffset + uv_plane_size - pitch) / pitch;
for (x = 0, LinearOffset = 0; x < static_cast<int32_t>(pitch); x++, LinearOffset++)
{
TileOffset = Mos_SwizzleOffsetWrapper(
x,
y,
pitch,
MOS_TILE_Y,
false,
swizzleflags);
if (TileOffset < psSurface->OsResource.iSize)
*(padding_data + LinearOffset) = *(src_data + TileOffset);
}

// padding the unaligned region for UV.
y = (UVPlaneOffset + uv_plane_size) / pitch;
for (uint32_t i = 0; i < pad_rows / 2; y++, i++)
{
LinearOffset = 0;
for (x = 0; x < static_cast<int32_t>(pitch); x++, LinearOffset++)
{
TileOffset = Mos_SwizzleOffsetWrapper(
x,
y,
pitch,
MOS_TILE_Y,
false,
swizzleflags);
if (TileOffset < psSurface->OsResource.iSize)
*(src_data + TileOffset) = *(padding_data + LinearOffset);
}
}

MOS_FreeMemory(padding_data);
padding_data = nullptr;
m_osInterface->pfnUnlockResource(m_osInterface, &(psSurface->OsResource));
}
}
}

MOS_STATUS CodechalVdencVp9StateG12::ExecutePictureLevel()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1314,6 +1314,5 @@ class CodechalVdencVp9StateG12 : public CodechalVdencVp9State
PMOS_COMMAND_BUFFER cmdBuffer,
uint32_t currPass);

void fill_pad_with_value(PMOS_SURFACE psSurface, uint32_t real_height, uint32_t aligned_height);
};
#endif // __CODECHAL_VDENC_VP9_G12_H__
2 changes: 2 additions & 0 deletions media_driver/linux/gen11/ddi/media_sku_wa_g11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,8 @@ static bool InitIclMediaWa(struct GfxDeviceInfo *devInfo,

MEDIA_WR_WA(waTable, WaDisableSetObjectCapture, 0);

MEDIA_WR_WA(waTable, Wa_Vp9UnalignedHeight, 1);

return true;
}

Expand Down

0 comments on commit 0a9bd3d

Please sign in to comment.