Skip to content

Commit

Permalink
GS/HW: Attempt to reduce the load of copies for offset Z
Browse files Browse the repository at this point in the history
  • Loading branch information
refractionpcsx2 committed Feb 23, 2025
1 parent fd29319 commit 1a1401d
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 20 deletions.
2 changes: 2 additions & 0 deletions pcsx2/GS/GSState.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,8 @@ class GSState : public GSAlignedClass<32>
bool m_texflush_flag = false;
bool m_isPackedUV_HackFlag = false;
bool m_channel_shuffle = false;
bool m_using_temp_z = false;
bool m_temp_z_full_copy = false;
bool m_in_target_draw = false;
bool m_channel_shuffle_abort = false;

Expand Down
99 changes: 80 additions & 19 deletions pcsx2/GS/Renderers/HW/GSRendererHW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2477,6 +2477,7 @@ void GSRendererHW::Draw()
m_texture_shuffle = false;
m_copy_16bit_to_target_shuffle = false;
m_same_group_texture_shuffle = false;
m_using_temp_z = false;

const bool is_split_texture_shuffle = (m_split_texture_shuffle_pages > 0);
if (is_split_texture_shuffle)
Expand Down Expand Up @@ -2589,6 +2590,12 @@ void GSRendererHW::Draw()
GSTextureCache::DepthStencil, ds_end_bp)) == nullptr ||
m_r.rintersect(tgt->m_valid).eq(tgt->m_valid));

if (g_texture_cache->GetTemporaryZ() != nullptr && (m_cached_ctx.FRAME.Block() == g_texture_cache->GetTemporaryZInfo().ZBP || m_cached_ctx.ZBUF.Block() == g_texture_cache->GetTemporaryZInfo().ZBP))
{
g_texture_cache->InvalidateTemporaryZ();
}


if (overwriting_whole_rt && overwriting_whole_ds &&
TryGSMemClear(no_rt, preserve_rt_color, is_zero_color_clear, rt_end_bp,
no_ds, preserve_depth, is_zero_depth_clear, ds_end_bp))
Expand Down Expand Up @@ -3174,15 +3181,29 @@ void GSRendererHW::Draw()
// Z isn't offset but RT is, so we need a temp Z to align it, hopefully nothing will ever write to the Z too, right??
if (ds && vertical_offset && (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) != (m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0))
{

const int z_vertical_offset = ((static_cast<int>(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y;
GL_CACHE("RT in RT Z copy on draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, vertical_offset);
GSVector4i dRect = GSVector4i(0, vertical_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, std::min(vertical_offset + m_r.w + 1, vertical_offset + ds->m_unscaled_size.y) * ds->m_scale);
const int new_height = std::max(static_cast<int>(ds->m_unscaled_size.y * ds->m_scale), dRect.w);
GSTexture* tex = g_gs_device->CreateDepthStencil(ds->m_unscaled_size.x * ds->m_scale, new_height, GSTexture::Format::DepthStencil, true);
g_gs_device->StretchRect(ds->m_texture, GSVector4(0.0f, z_vertical_offset / static_cast<float>(ds->m_unscaled_size.y), 1.0f, std::min(z_vertical_offset + m_r.w + 1, ds->m_unscaled_size.y) / static_cast<float>(ds->m_unscaled_size.y)), tex, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false);
if (g_texture_cache->GetTemporaryZ() != nullptr)
{
GSTextureCache::TempZAddress z_address_info = g_texture_cache->GetTemporaryZInfo();

if (ds->m_TEX0.TBP0 != z_address_info.ZBP || z_address_info.offset != (vertical_offset - z_vertical_offset))
g_texture_cache->InvalidateTemporaryZ();
}

g_texture_cache->SetTemporaryZ(tex);
if (g_texture_cache->GetTemporaryZ() == nullptr)
{
m_temp_z_full_copy = false;
u32 vertical_size = std::max(rt->m_unscaled_size.y, ds->m_unscaled_size.y);
GL_CACHE("RT in RT Z copy on draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, vertical_offset);
GSVector4i dRect = GSVector4i(0, vertical_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, (vertical_offset + ds->m_unscaled_size.y - z_vertical_offset) * ds->m_scale);
const int new_height = std::max(static_cast<int>(vertical_size * ds->m_scale), dRect.w);
GSTexture* tex = g_gs_device->CreateDepthStencil(ds->m_unscaled_size.x * ds->m_scale, new_height, GSTexture::Format::DepthStencil, true);
g_gs_device->StretchRect(ds->m_texture, GSVector4(0.0f, z_vertical_offset / static_cast<float>(ds->m_unscaled_size.y), 1.0f, (ds->m_unscaled_size.y - z_vertical_offset) / static_cast<float>(ds->m_unscaled_size.y)), tex, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
g_texture_cache->SetTemporaryZ(tex);
g_texture_cache->SetTemporaryZInfo(ds->m_TEX0.TBP0, vertical_offset - z_vertical_offset);
}
m_using_temp_z = true;
}

GSVertex* v = &m_vertex.buff[0];
Expand Down Expand Up @@ -3932,7 +3953,7 @@ void GSRendererHW::Draw()
{
s = GetDrawDumpPath("%05d_f%05lld_rz0_%05x_(%05x)_%s.bmp", s_n, frame, m_cached_ctx.ZBUF.Block(), ds->m_TEX0.TBP0, psm_str(m_cached_ctx.ZBUF.PSM));

if (g_texture_cache->GetTemporaryZ())
if (m_using_temp_z)
g_texture_cache->GetTemporaryZ()->Save(s);
else if (ds->m_texture)
ds->m_texture->Save(s);
Expand Down Expand Up @@ -4032,6 +4053,13 @@ void GSRendererHW::Draw()
// Remove overwritten Zs at the FBP.
g_texture_cache->InvalidateVideoMemType(GSTextureCache::DepthStencil, m_cached_ctx.FRAME.Block(),
m_cached_ctx.FRAME.PSM, m_texture_shuffle ? GetEffectiveTextureShuffleFbmsk() : fm);

if (!m_using_temp_z && g_texture_cache->GetTemporaryZ() != nullptr)
{
GSTextureCache::TempZAddress temp_z_info = g_texture_cache->GetTemporaryZInfo();
if (GSLocalMemory::GetStartBlockAddress(rt->m_TEX0.TBP0, rt->m_TEX0.TBW, rt->m_TEX0.PSM, real_rect) <= temp_z_info.ZBP && GSLocalMemory::GetEndBlockAddress(rt->m_TEX0.TBP0, rt->m_TEX0.TBW, rt->m_TEX0.PSM, real_rect) > temp_z_info.ZBP)
g_texture_cache->InvalidateTemporaryZ();
}
}

if (zm != 0xffffffff && ds)
Expand All @@ -4048,18 +4076,53 @@ void GSRendererHW::Draw()
g_texture_cache->InvalidateVideoMemType(
GSTextureCache::RenderTarget, m_cached_ctx.ZBUF.Block(), m_cached_ctx.ZBUF.PSM, zm);


if (ds && g_texture_cache->GetTemporaryZ())
if (m_using_temp_z)
{
if (m_cached_ctx.DepthWrite())
{
const int vertical_offset = ((static_cast<int>(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) / 32) / std::max(static_cast<int>(rt->m_TEX0.TBW), 1)) * frame_psm.pgs.y;
const int z_vertical_offset = ((static_cast<int>(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y;
const GSVector4i dRect = GSVector4i(0, z_vertical_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, std::min(z_vertical_offset + m_r.w + 1 - vertical_offset, ds->m_unscaled_size.y) * ds->m_scale);
const int get_next_ctx = m_env.PRIM.CTXT;
const GSDrawingContext& next_ctx = m_env.CTXT[get_next_ctx];
if ((m_state_flush_reason != CONTEXTCHANGE) || next_ctx.ZBUF.ZBP == m_context->ZBUF.ZBP && next_ctx.FRAME.FBP == m_context->FRAME.FBP)
{
m_temp_z_full_copy = true;
}
else
{
const int vertical_offset = ((static_cast<int>(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) / 32) / std::max(static_cast<int>(rt->m_TEX0.TBW), 1)) * frame_psm.pgs.y;
const int z_vertical_offset = ((static_cast<int>(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y;

if (!m_temp_z_full_copy)
{
const GSVector4i dRect = GSVector4i(real_rect.x * ds->m_scale, (z_vertical_offset + (real_rect.y - vertical_offset)) * ds->m_scale, (real_rect.z + (1.0f / ds->m_scale)) * ds->m_scale, (z_vertical_offset + (real_rect.w + (1.0f / ds->m_scale) - vertical_offset)) * ds->m_scale);
const GSVector4 sRect = GSVector4((real_rect.x * ds->m_scale) / static_cast<float>(g_texture_cache->GetTemporaryZ()->GetWidth()), static_cast<float>(real_rect.y * ds->m_scale) / static_cast<float>(g_texture_cache->GetTemporaryZ()->GetHeight()), ((real_rect.z + (1.0f / ds->m_scale)) * ds->m_scale) / static_cast<float>(g_texture_cache->GetTemporaryZ()->GetWidth()),
static_cast<float>((real_rect.w + (1.0f / ds->m_scale)) * ds->m_scale) / static_cast<float>(g_texture_cache->GetTemporaryZ()->GetHeight()));
GL_CACHE("RT in RT Z copy back draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, vertical_offset);
g_gs_device->StretchRect(g_texture_cache->GetTemporaryZ(), sRect, ds->m_texture, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
}
else
{
const GSVector4i dRect = GSVector4i(0, ds->m_valid.y * ds->m_scale, ds->m_valid.z * ds->m_scale, ds->m_valid.w * ds->m_scale);
const GSVector4 sRect = GSVector4((ds->m_valid.x * ds->m_scale) / static_cast<float>(g_texture_cache->GetTemporaryZ()->GetWidth()), static_cast<float>((ds->m_valid.y + vertical_offset) * ds->m_scale) / static_cast<float>(g_texture_cache->GetTemporaryZ()->GetHeight()), ((ds->m_valid.z + (1.0f / ds->m_scale)) * ds->m_scale) / static_cast<float>(g_texture_cache->GetTemporaryZ()->GetWidth()),
static_cast<float>(((ds->m_valid.w + vertical_offset) + (1.0f / ds->m_scale)) * ds->m_scale) / static_cast<float>(g_texture_cache->GetTemporaryZ()->GetHeight()));
GL_CACHE("RT in RT Z copy back draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, vertical_offset);
g_gs_device->StretchRect(g_texture_cache->GetTemporaryZ(), sRect, ds->m_texture, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
}

GL_CACHE("RT in RT Z copy back draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, vertical_offset);
g_gs_device->StretchRect(g_texture_cache->GetTemporaryZ(), GSVector4(0.0f, (static_cast<float>(vertical_offset) * ds->m_scale) / static_cast<float>(g_texture_cache->GetTemporaryZ()->GetHeight()), 1.0f,
floor(static_cast<float>(std::min(real_rect.w + 1, ds->m_unscaled_size.y + vertical_offset)) * ds->m_scale) / static_cast<float>(g_texture_cache->GetTemporaryZ()->GetHeight())), ds->m_texture, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false);
m_temp_z_full_copy = false;
}
}
}
else if (m_cached_ctx.DepthWrite() && g_texture_cache->GetTemporaryZ() != nullptr)
{
GSTextureCache::TempZAddress z_address_info = g_texture_cache->GetTemporaryZInfo();
if (ds->m_TEX0.TBP0 == z_address_info.ZBP)
{
GL_CACHE("RT in RT Updating Z copy on draw %d z_offset %d", s_n, z_address_info.offset);
GSVector4i dRect = GSVector4i(real_rect.x * ds->m_scale, (z_address_info.offset + real_rect.y) * ds->m_scale, (real_rect.z + (1.0f / ds->m_scale)) * ds->m_scale, (z_address_info.offset + real_rect.w + (1.0f / ds->m_scale)) * ds->m_scale);
g_gs_device->StretchRect(ds->m_texture, GSVector4(real_rect.x / static_cast<float>(ds->m_unscaled_size.x), real_rect.y / static_cast<float>(ds->m_unscaled_size.y), (real_rect.z + (1.0f / ds->m_scale)) / static_cast<float>(ds->m_unscaled_size.x), (real_rect.w + (1.0f / ds->m_scale)) / static_cast<float>(ds->m_unscaled_size.y)), g_texture_cache->GetTemporaryZ(), GSVector4(dRect), ShaderConvert::DEPTH_COPY, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
}
}
}
Expand Down Expand Up @@ -6412,8 +6475,6 @@ void GSRendererHW::CleanupDraw(bool invalidate_temp_src)
// Remove any RT source.
if (invalidate_temp_src)
g_texture_cache->InvalidateTemporarySource();

g_texture_cache->InvalidateTemporaryZ();
// Restore Scissor.
m_context->UpdateScissor();

Expand Down Expand Up @@ -6453,7 +6514,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
m_conf.cb_vs.texture_offset = {};
m_conf.ps.scanmsk = env.SCANMSK.MSK;
m_conf.rt = rt ? rt->m_texture : nullptr;
m_conf.ds = ds ? (g_texture_cache->GetTemporaryZ() ? g_texture_cache->GetTemporaryZ() : ds->m_texture) : nullptr;
m_conf.ds = ds ? (m_using_temp_z ? g_texture_cache->GetTemporaryZ() : ds->m_texture) : nullptr;

pxAssert(!ds || !rt || (ds->m_texture->GetSize().x == rt->m_texture->GetSize().x && ds->m_texture->GetSize().y == rt->m_texture->GetSize().y));

Expand Down
33 changes: 33 additions & 0 deletions pcsx2/GS/Renderers/HW/GSTextureCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3893,6 +3893,12 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r
GL_CACHE("TC: Dirty Target(%s) (0x%x) r(%d,%d,%d,%d)", to_string(type),
t->m_TEX0.TBP0, r.x, r.y, r.z, r.w);

if (t->m_type == DepthStencil && GetTemporaryZ() != nullptr)
{
if (GetTemporaryZInfo().ZBP == t->m_TEX0.TBP0)
InvalidateTemporaryZ();
}

if (GSLocalMemory::m_psm[psm].depth)
DirtyRectByPage(bp, psm, bw, t, r);
else
Expand Down Expand Up @@ -7004,6 +7010,22 @@ void GSTextureCache::Target::Update(bool cannot_scale)
m_alpha_range |= alpha_minmax.first != alpha_minmax.second;
}
g_gs_device->Recycle(t);

if (m_type == DepthStencil && g_texture_cache->GetTemporaryZ() != nullptr)
{
if (g_texture_cache->GetTemporaryZInfo().ZBP == m_TEX0.TBP0)
{
GSTextureCache::TempZAddress z_address_info = g_texture_cache->GetTemporaryZInfo();
if (m_TEX0.TBP0 == z_address_info.ZBP)
{
//GL_CACHE("RT in RT Updating Z copy on draw %d z_offset %d", s_n, z_address_info.offset);
GSVector4i dRect = GSVector4i(total_rect.x * m_scale, (z_address_info.offset + total_rect.y) * m_scale, (total_rect.z + (1.0f / m_scale)) * m_scale, (z_address_info.offset + total_rect.w + (1.0f / m_scale)) * m_scale);
g_gs_device->StretchRect(m_texture, GSVector4(total_rect.x / static_cast<float>(m_unscaled_size.x), total_rect.y / static_cast<float>(m_unscaled_size.y), (total_rect.z + (1.0f / m_scale)) / static_cast<float>(m_unscaled_size.x), (total_rect.w + (1.0f / m_scale)) / static_cast<float>(m_unscaled_size.y)), g_texture_cache->GetTemporaryZ(), GSVector4(dRect), ShaderConvert::DEPTH_COPY, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
}
}
}

m_dirty.clear();
}

Expand Down Expand Up @@ -7513,6 +7535,17 @@ void GSTextureCache::InvalidateTemporarySource()
m_temporary_source = nullptr;
}

GSTextureCache::TempZAddress GSTextureCache::GetTemporaryZInfo()
{
return m_temporary_z_info;
}

void GSTextureCache::SetTemporaryZInfo(u32 address, u32 offset)
{
m_temporary_z_info.ZBP = address;
m_temporary_z_info.offset = offset;
}

void GSTextureCache::SetTemporaryZ(GSTexture* temp_z)
{
m_temporary_z = temp_z;
Expand Down
10 changes: 9 additions & 1 deletion pcsx2/GS/Renderers/HW/GSTextureCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,12 @@ class GSTextureCache
bool operator()(const PaletteKey& lhs, const PaletteKey& rhs) const;
};

struct TempZAddress
{
u32 ZBP;
u32 offset;
};

class Target : public Surface
{
public:
Expand Down Expand Up @@ -428,6 +434,7 @@ class GSTextureCache

Source* m_temporary_source = nullptr; // invalidated after the draw
GSTexture* m_temporary_z = nullptr; // invalidated after the draw
TempZAddress m_temporary_z_info;

std::unique_ptr<GSDownloadTexture> m_color_download_texture;
std::unique_ptr<GSDownloadTexture> m_uint16_download_texture;
Expand Down Expand Up @@ -554,7 +561,8 @@ class GSTextureCache
void InvalidateTemporarySource();
void SetTemporaryZ(GSTexture* temp_z);
GSTexture* GetTemporaryZ();

TempZAddress GetTemporaryZInfo();
void SetTemporaryZInfo(u32 address, u32 offset);
/// Invalidates a temporary Z, a partial copy only created from the current DS for the current draw when Z is not offset but RT is
void InvalidateTemporaryZ();

Expand Down

0 comments on commit 1a1401d

Please sign in to comment.