mirror of https://github.com/cemu-project/Cemu.git
Latte: Optimize uniform register array size for known shaders
This commit is contained in:
parent
96bbd3bd25
commit
72ce4838ea
|
@ -652,7 +652,7 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
shader->uniform.count_uniformRegister = decompilerOutput.uniformOffsetsVK.count_uniformRegister;
|
shader->uniform.count_uniformRegister = decompilerOutput.uniformOffsetsGL.count_uniformRegister;
|
||||||
}
|
}
|
||||||
// calculate aux hash
|
// calculate aux hash
|
||||||
if (calculateAuxHash)
|
if (calculateAuxHash)
|
||||||
|
|
|
@ -787,7 +787,7 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
|
||||||
continue;
|
continue;
|
||||||
LatteDecompilerShader::QuickBufferEntry entry;
|
LatteDecompilerShader::QuickBufferEntry entry;
|
||||||
entry.index = i;
|
entry.index = i;
|
||||||
entry.size = shaderContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE) * 16;
|
entry.size = shaderContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(shaderContext->shaderBaseHash, LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE) * 16;
|
||||||
shader->list_quickBufferList.push_back(entry);
|
shader->list_quickBufferList.push_back(entry);
|
||||||
}
|
}
|
||||||
// get dimension of each used texture
|
// get dimension of each used texture
|
||||||
|
|
|
@ -37,7 +37,7 @@ namespace LatteDecompiler
|
||||||
}
|
}
|
||||||
else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE)
|
else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE)
|
||||||
{
|
{
|
||||||
uint32 cfileSize = decompilerContext->analyzer.uniformRegisterAccessTracker.DetermineSize(256);
|
uint32 cfileSize = decompilerContext->analyzer.uniformRegisterAccessTracker.DetermineSize(decompilerContext->shaderBaseHash, 256);
|
||||||
// full or partial uniform register file has to be present
|
// full or partial uniform register file has to be present
|
||||||
if (shaderType == LatteConst::ShaderType::Vertex)
|
if (shaderType == LatteConst::ShaderType::Vertex)
|
||||||
shaderSrc->addFmt("uniform ivec4 uf_uniformRegisterVS[{}];" _CRLF, cfileSize);
|
shaderSrc->addFmt("uniform ivec4 uf_uniformRegisterVS[{}];" _CRLF, cfileSize);
|
||||||
|
@ -156,7 +156,7 @@ namespace LatteDecompiler
|
||||||
|
|
||||||
shaderSrc->addFmt("uniform {}{}" _CRLF, _getShaderUniformBlockInterfaceName(decompilerContext->shaderType), i);
|
shaderSrc->addFmt("uniform {}{}" _CRLF, _getShaderUniformBlockInterfaceName(decompilerContext->shaderType), i);
|
||||||
shaderSrc->add("{" _CRLF);
|
shaderSrc->add("{" _CRLF);
|
||||||
shaderSrc->addFmt("vec4 {}{}[{}];" _CRLF, _getShaderUniformBlockVariableName(decompilerContext->shaderType), i, decompilerContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE));
|
shaderSrc->addFmt("vec4 {}{}[{}];" _CRLF, _getShaderUniformBlockVariableName(decompilerContext->shaderType), i, decompilerContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(decompilerContext->shaderBaseHash, LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE));
|
||||||
shaderSrc->add("};" _CRLF _CRLF);
|
shaderSrc->add("};" _CRLF _CRLF);
|
||||||
shaderSrc->add(_CRLF);
|
shaderSrc->add(_CRLF);
|
||||||
}
|
}
|
||||||
|
|
|
@ -157,19 +157,23 @@ struct LatteDecompilerBufferAccessTracker
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sint32 DetermineSize(sint32 maximumSize) const
|
sint32 DetermineSize(uint64 shaderBaseHash, sint32 maximumSize) const
|
||||||
{
|
{
|
||||||
// here we try to predict the accessed range so we dont have to upload the whole buffer
|
// here we try to predict the accessed byte range so we dont have to upload the whole buffer
|
||||||
// potential risky optimization: assume that if there is a fixed-index access on an index higher than any other non-zero relative accesses, it bounds the prior relative access
|
// if no bound can be determined then return maximumSize
|
||||||
|
// for some known shaders we use hand-tuned values instead of the maximumSize fallback value that those shaders would normally use
|
||||||
|
if(shaderBaseHash == 0x8ff56afdf1a2f837) // XCX text rendering
|
||||||
|
return 24;
|
||||||
|
if(shaderBaseHash == 0x37b9100c1310d3bb) // BotW UI backdrops 1
|
||||||
|
return 24;
|
||||||
|
if(shaderBaseHash == 0xf7ba548c1fefe24a) // BotW UI backdrops 2
|
||||||
|
return 30;
|
||||||
|
|
||||||
sint32 highestAccessIndex = -1;
|
sint32 highestAccessIndex = -1;
|
||||||
if(hasStaticIndexAccess)
|
if(hasStaticIndexAccess)
|
||||||
{
|
|
||||||
highestAccessIndex = highestAccessStaticIndex;
|
highestAccessIndex = highestAccessStaticIndex;
|
||||||
}
|
|
||||||
if(hasDynamicIndexAccess)
|
if(hasDynamicIndexAccess)
|
||||||
{
|
|
||||||
return maximumSize; // dynamic index exists and no bound can be determined
|
return maximumSize; // dynamic index exists and no bound can be determined
|
||||||
}
|
|
||||||
if (highestAccessIndex < 0)
|
if (highestAccessIndex < 0)
|
||||||
return 1; // no access at all? But avoid zero as a size
|
return 1; // no access at all? But avoid zero as a size
|
||||||
return highestAccessIndex + 1;
|
return highestAccessIndex + 1;
|
||||||
|
|
Loading…
Reference in New Issue