From 72ce4838ea79252f9ec0df3f3eeb5959ca6616e6 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 19 Feb 2024 12:07:03 +0100 Subject: [PATCH] Latte: Optimize uniform register array size for known shaders --- src/Cafe/HW/Latte/Core/LatteShader.cpp | 2 +- .../LatteDecompilerAnalyzer.cpp | 2 +- .../LatteDecompilerEmitGLSLHeader.hpp | 4 ++-- .../LatteDecompilerInternal.h | 18 +++++++++++------- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteShader.cpp b/src/Cafe/HW/Latte/Core/LatteShader.cpp index 503fb664..b59702cd 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShader.cpp @@ -652,7 +652,7 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi } else { - shader->uniform.count_uniformRegister = decompilerOutput.uniformOffsetsVK.count_uniformRegister; + shader->uniform.count_uniformRegister = decompilerOutput.uniformOffsetsGL.count_uniformRegister; } // calculate aux hash if (calculateAuxHash) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp index 2e837198..cf22f05d 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp @@ -787,7 +787,7 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD continue; LatteDecompilerShader::QuickBufferEntry entry; entry.index = i; - entry.size = shaderContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE) * 16; + entry.size = shaderContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(shaderContext->shaderBaseHash, LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE) * 16; shader->list_quickBufferList.push_back(entry); } // get dimension of each used texture diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp index 21cae093..428f8647 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp @@ -37,7 +37,7 @@ namespace LatteDecompiler } else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE) { - uint32 cfileSize = decompilerContext->analyzer.uniformRegisterAccessTracker.DetermineSize(256); + uint32 cfileSize = decompilerContext->analyzer.uniformRegisterAccessTracker.DetermineSize(decompilerContext->shaderBaseHash, 256); // full or partial uniform register file has to be present if (shaderType == LatteConst::ShaderType::Vertex) shaderSrc->addFmt("uniform ivec4 uf_uniformRegisterVS[{}];" _CRLF, cfileSize); @@ -156,7 +156,7 @@ namespace LatteDecompiler shaderSrc->addFmt("uniform {}{}" _CRLF, _getShaderUniformBlockInterfaceName(decompilerContext->shaderType), i); shaderSrc->add("{" _CRLF); - shaderSrc->addFmt("vec4 {}{}[{}];" _CRLF, _getShaderUniformBlockVariableName(decompilerContext->shaderType), i, decompilerContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE)); + shaderSrc->addFmt("vec4 {}{}[{}];" _CRLF, _getShaderUniformBlockVariableName(decompilerContext->shaderType), i, decompilerContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(decompilerContext->shaderBaseHash, LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE)); shaderSrc->add("};" _CRLF _CRLF); shaderSrc->add(_CRLF); } diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h index ac2a1fe1..ed1858ba 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h @@ -157,19 +157,23 @@ struct LatteDecompilerBufferAccessTracker } } - sint32 DetermineSize(sint32 maximumSize) const + sint32 DetermineSize(uint64 shaderBaseHash, sint32 maximumSize) const { - // here we try to predict the accessed range so we dont have to upload the whole buffer - // potential risky optimization: assume that if there is a fixed-index access on an index higher than any other non-zero relative accesses, it bounds the prior relative access + // here we try to predict the accessed byte range so we dont have to upload the whole buffer + // if no bound can be determined then return maximumSize + // for some known shaders we use hand-tuned values instead of the maximumSize fallback value that those shaders would normally use + if(shaderBaseHash == 0x8ff56afdf1a2f837) // XCX text rendering + return 24; + if(shaderBaseHash == 0x37b9100c1310d3bb) // BotW UI backdrops 1 + return 24; + if(shaderBaseHash == 0xf7ba548c1fefe24a) // BotW UI backdrops 2 + return 30; + sint32 highestAccessIndex = -1; if(hasStaticIndexAccess) - { highestAccessIndex = highestAccessStaticIndex; - } if(hasDynamicIndexAccess) - { return maximumSize; // dynamic index exists and no bound can be determined - } if (highestAccessIndex < 0) return 1; // no access at all? But avoid zero as a size return highestAccessIndex + 1;