Misc. Linux improvements and bug fixes. (#121)

Co-authored-by: Tom Lally <tomlally@protonmail.com>
This commit is contained in:
Tom Lally 2022-09-01 19:46:20 +01:00 committed by GitHub
parent b8d4cf5b29
commit d3a7b3b5a6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 101 additions and 104 deletions

View File

@ -28,15 +28,13 @@ elseif(UNIX)
add_definitions(-DVK_USE_PLATFORM_XLIB_KHR) # legacy. Do we need to support XLIB surfaces?
add_definitions(-DVK_USE_PLATFORM_XCB_KHR)
endif()
add_definitions(-fms-extensions)
add_definitions(-fpermissive)
add_definitions(-maes)
# warnings
if(CMAKE_C_COMPILER_ID MATCHES "Clang")
add_compile_options(-Wno-ambiguous-reversed-operator)
endif()
add_compile_options(-Wno-switch -Wno-ignored-attributes -Wno-deprecated-enum-enum-conversion)
add_compile_options(-Wno-multichar -Wno-invalid-offsetof -Wno-switch -Wno-ignored-attributes -Wno-deprecated-enum-enum-conversion)
endif()
add_definitions(-DVK_NO_PROTOTYPES)

View File

@ -1,6 +1,6 @@
project(CemuCafe)
if((CMAKE_C_COMPILER_ID STREQUAL "GNU") OR (CMAKE_C_COMPILER_ID STREQUAL "Clang"))
if(CMAKE_C_COMPILER_ID STREQUAL "GNU")
add_compile_options(-mssse3 -mavx2)
endif()

View File

@ -40,5 +40,4 @@ namespace CafeSystem
extern RPLModule* applicationRPX;
extern std::atomic_bool g_isGameRunning;
extern std::atomic_bool g_isGPUInitFinished;

View File

@ -11,7 +11,7 @@ static uint32 ppc_cmp_and_mask[8] = {
};
#define ppc_word_rotl(_data, _n) (_rotl(_data,(_n)&0x1F))
#define ppc_word_rotl(_data, _n) (std::rotl<uint32>(_data,(_n)&0x1F))
static inline uint32 ppc_mask(int MB, int ME)
{

View File

@ -32,7 +32,7 @@ uint64 muldiv64(uint64 a, uint64 b, uint64 d)
bool PPCTimer_hasInvariantRDTSCSupport()
{
uint32 cpuv[4];
__cpuid((int*)cpuv, 0x80000007);
cpuid((int*)cpuv, 0x80000007);
return ((cpuv[3] >> 8) & 1);
}
@ -106,11 +106,7 @@ uint64 PPCTimer_microsecondsToTsc(uint64 us)
uint64 PPCTimer_tscToMicroseconds(uint64 us)
{
uint128_t r{};
#if BOOST_OS_WINDOWS
r.low = _umul128(us, 1000000ULL, &r.high);
#else
r.low = _umul128(us, 1000000ULL, (unsigned long long*)&r.high);
#endif
uint64 remainder;
@ -146,11 +142,7 @@ uint64 PPCTimer_getFromRDTSC()
rdtscDif = rdtscDif & ~(uint64)((sint64)rdtscDif >> 63);
uint128_t diff{};
#if BOOST_OS_WINDOWS
diff.low = _umul128(rdtscDif, Espresso::CORE_CLOCK, &diff.high);
#else
diff.low = _umul128(rdtscDif, Espresso::CORE_CLOCK, (unsigned long long*)&diff.high);
#endif
if(rdtscCurrentMeasure > _rdtscLastMeasure)
_rdtscLastMeasure = rdtscCurrentMeasure; // only travel forward in time

View File

@ -575,12 +575,12 @@ void PPCRecompiler_init()
// query processor extensions
int cpuInfo[4];
__cpuid(cpuInfo, 0x80000001);
cpuid(cpuInfo, 0x80000001);
hasLZCNTSupport = ((cpuInfo[2] >> 5) & 1) != 0;
__cpuid(cpuInfo, 0x1);
cpuid(cpuInfo, 0x1);
hasMOVBESupport = ((cpuInfo[2] >> 22) & 1) != 0;
hasAVXSupport = ((cpuInfo[2] >> 28) & 1) != 0;
__cpuidex(cpuInfo, 0x7, 0);
cpuidex(cpuInfo, 0x7, 0);
hasBMI2Support = ((cpuInfo[1] >> 8) & 1) != 0;
forceLog_printf("Recompiler initialized. CPU extensions: %s%s%s", hasLZCNTSupport ? "LZCNT " : "", hasMOVBESupport ? "MOVBE " : "", hasAVXSupport ? "AVX " : "");

View File

@ -81,31 +81,31 @@ void LatteShader_calculateFSKey(LatteFetchShader* fetchShader)
{
LatteParsedFetchShaderAttribute_t* attrib = group.attrib + f;
key += (uint64)attrib->endianSwap;
key = std::rotl(key, 3);
key = std::rotl<uint64>(key, 3);
key += (uint64)attrib->nfa;
key = std::rotl(key, 3);
key = std::rotl<uint64>(key, 3);
key += (uint64)(attrib->isSigned?1:0);
key = std::rotl(key, 1);
key = std::rotl<uint64>(key, 1);
key += (uint64)attrib->format;
key = std::rotl(key, 7);
key = std::rotl<uint64>(key, 7);
key += (uint64)attrib->fetchType;
key = std::rotl(key, 8);
key = std::rotl<uint64>(key, 8);
key += (uint64)attrib->ds[0];
key = std::rotl(key, 2);
key = std::rotl<uint64>(key, 2);
key += (uint64)attrib->ds[1];
key = std::rotl(key, 2);
key = std::rotl<uint64>(key, 2);
key += (uint64)attrib->ds[2];
key = std::rotl(key, 2);
key = std::rotl<uint64>(key, 2);
key += (uint64)attrib->ds[3];
key = std::rotl(key, 2);
key = std::rotl<uint64>(key, 2);
key += (uint64)(attrib->aluDivisor+1);
key = std::rotl(key, 2);
key = std::rotl<uint64>(key, 2);
key += (uint64)attrib->attributeBufferIndex;
key = std::rotl(key, 8);
key = std::rotl<uint64>(key, 8);
key += (uint64)attrib->semanticId;
key = std::rotl(key, 8);
key = std::rotl<uint64>(key, 8);
key += (uint64)(attrib->offset & 3);
key = std::rotl(key, 2);
key = std::rotl<uint64>(key, 2);
}
}
// todo - also hash invalid buffer groups?

View File

@ -743,7 +743,7 @@ private:
//h ^= *memU64;
//memU64++;
h = std::rotr(h, 7);
h = std::rotr<uint64>(h, 7);
h += (*memU64 + (uint64)i);
memU64++;
}

View File

@ -158,15 +158,15 @@ void LatteMRT::ApplyCurrentState()
if (colorView)
{
key += ((uint64)colorView);
key = std::rotl(key, 5);
key = std::rotl<uint64>(key, 5);
fboLookupView = colorView;
}
key = std::rotl(key, 7);
key = std::rotl<uint64>(key, 7);
}
if (sLatteCurrentRendertargets.depthBuffer.view)
{
key += ((uint64)sLatteCurrentRendertargets.depthBuffer.view);
key = std::rotl(key, 5);
key = std::rotl<uint64>(key, 5);
key += (sLatteCurrentRendertargets.depthBuffer.hasStencil);
if (fboLookupView == NULL)
{

View File

@ -263,7 +263,7 @@ void LatteShader_UpdatePSInputs(uint32* contextRegisters)
cemu_assert_debug(psSemanticId != 0xFF);
key += (uint64)psInputControl;
key = std::rotl(key, 7);
key = std::rotl<uint64>(key, 7);
if (spi0_positionEnable && f == spi0_positionAddr)
{
_activePSImportTable.import[f].semanticId = LATTE_ANALYZER_IMPORT_INDEX_SPIPOSITION;
@ -546,7 +546,7 @@ uint64 LatteSHRC_CalcVSAuxHash(LatteDecompilerShader* vertexShader, uint32* cont
if(!vertexShader->streamoutBufferWriteMask2[i])
continue;
uint32 bufferStride = contextRegisters[mmVGT_STRMOUT_VTX_STRIDE_0 + i * 4];
auxHash = std::rotl(auxHash, 7);
auxHash = std::rotl<uint64>(auxHash, 7);
auxHash += (uint64)bufferStride;
}
}
@ -559,7 +559,7 @@ uint64 LatteSHRC_CalcVSAuxHash(LatteDecompilerShader* vertexShader, uint32* cont
if ((word4 & 0x300) == 0x100)
{
// integer format
auxHashTex = std::rotl(auxHashTex, 7);
auxHashTex = std::rotl<uint64>(auxHashTex, 7);
auxHashTex += 0x333;
}
}

View File

@ -984,7 +984,7 @@ float get_##__regname() const \
SRF_MODE_NO_ZERO = 1,
};
using E_ENDIAN_SWAP = E_ENDIAN_SWAP;
// using E_ENDIAN_SWAP = E_ENDIAN_SWAP;
enum class E_SEL
{

View File

@ -44,9 +44,9 @@ void OpenGLRenderer::uniformData_update()
for (sint32 f = 0; f < remappedArraySize; f++)
{
uniformDataHash[0] ^= remappedUniformData64[0];
uniformDataHash[0] = std::rotl(uniformDataHash[0], 11);
uniformDataHash[0] = std::rotl<uint64>(uniformDataHash[0], 11);
uniformDataHash[1] ^= remappedUniformData64[1];
uniformDataHash[1] = std::rotl(uniformDataHash[1], 11);
uniformDataHash[1] = std::rotl<uint64>(uniformDataHash[1], 11);
remappedUniformData64 += 2;
}
if (shader->uniformDataHash64[0] != uniformDataHash[0] || shader->uniformDataHash64[1] != uniformDataHash[1])

View File

@ -8,7 +8,7 @@ uint32 RendererShader::GeneratePrecompiledCacheId()
const char* s = EMULATOR_VERSION_SUFFIX;
while (*s)
{
v = _rotl(v, 7);
v = std::rotl<uint32>(v, 7);
v += (uint32)(*s);
s++;
}

View File

@ -4050,13 +4050,13 @@ VKRObjectRenderPass::VKRObjectRenderPass(AttachmentInfo_t& attachmentInfo, sint3
if (attachmentInfo.colorAttachment[i].isPresent || attachmentInfo.colorAttachment[i].viewObj)
{
stateHash += attachmentInfo.colorAttachment[i].format + i * 31;
stateHash = std::rotl(stateHash, 7);
stateHash = std::rotl<uint64>(stateHash, 7);
}
}
if (attachmentInfo.depthAttachment.isPresent || attachmentInfo.depthAttachment.viewObj)
{
stateHash += attachmentInfo.depthAttachment.format;
stateHash = std::rotl(stateHash, 7);
stateHash = std::rotl<uint64>(stateHash, 7);
}
m_hashForPipeline = stateHash;

View File

@ -24,18 +24,18 @@ uint64 VulkanRenderer::draw_calculateMinimalGraphicsPipelineHash(const LatteFetc
for (auto& group : fetchShader->bufferGroups)
{
uint32 bufferStride = group.getCurrentBufferStride(lcr.GetRawView());
stateHash = std::rotl(stateHash, 7);
stateHash = std::rotl<uint64>(stateHash, 7);
stateHash += bufferStride * 3;
}
stateHash += fetchShader->getVkPipelineHashFragment();
stateHash = std::rotl(stateHash, 7);
stateHash = std::rotl<uint64>(stateHash, 7);
stateHash += lcr.GetRawView()[mmVGT_PRIMITIVE_TYPE];
stateHash = std::rotl(stateHash, 7);
stateHash = std::rotl<uint64>(stateHash, 7);
stateHash += lcr.GetRawView()[mmVGT_STRMOUT_EN];
stateHash = std::rotl(stateHash, 7);
stateHash = std::rotl<uint64>(stateHash, 7);
if(lcr.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL())
stateHash += 0x333333;
@ -66,24 +66,24 @@ uint64 VulkanRenderer::draw_calculateGraphicsPipelineHash(const LatteFetchShader
if (vertexShader)
stateHash += vertexShader->baseHash;
stateHash = std::rotl(stateHash, 13);
stateHash = std::rotl<uint64>(stateHash, 13);
if (geometryShader)
stateHash += geometryShader->baseHash;
stateHash = std::rotl(stateHash, 13);
stateHash = std::rotl<uint64>(stateHash, 13);
if (pixelShader)
stateHash += pixelShader->baseHash + pixelShader->auxHash;
stateHash = std::rotl(stateHash, 13);
stateHash = std::rotl<uint64>(stateHash, 13);
uint32 polygonCtrl = lcr.PA_SU_SC_MODE_CNTL.getRawValue();
stateHash += polygonCtrl;
stateHash = std::rotl(stateHash, 7);
stateHash = std::rotl<uint64>(stateHash, 7);
stateHash += ctxRegister[Latte::REGADDR::PA_CL_CLIP_CNTL];
stateHash = std::rotl(stateHash, 7);
stateHash = std::rotl<uint64>(stateHash, 7);
const auto colorControlReg = ctxRegister[Latte::REGADDR::CB_COLOR_CONTROL];
stateHash += colorControlReg;
@ -97,7 +97,7 @@ uint64 VulkanRenderer::draw_calculateGraphicsPipelineHash(const LatteFetchShader
{
if (((blendEnableMask & (1 << i))) == 0)
continue;
stateHash = std::rotl(stateHash, 7);
stateHash = std::rotl<uint64>(stateHash, 7);
stateHash += ctxRegister[Latte::REGADDR::CB_BLEND0_CONTROL + i];
}
}
@ -109,11 +109,11 @@ uint64 VulkanRenderer::draw_calculateGraphicsPipelineHash(const LatteFetchShader
if (stencilTestEnable)
{
stateHash += ctxRegister[mmDB_STENCILREFMASK];
stateHash = std::rotl(stateHash, 17);
stateHash = std::rotl<uint64>(stateHash, 17);
if(depthControl & (1<<7)) // back stencil enable
{
stateHash += ctxRegister[mmDB_STENCILREFMASK_BF];
stateHash = std::rotl(stateHash, 13);
stateHash = std::rotl<uint64>(stateHash, 13);
}
}
else
@ -122,7 +122,7 @@ uint64 VulkanRenderer::draw_calculateGraphicsPipelineHash(const LatteFetchShader
depthControl &= 0xFF;
}
stateHash = std::rotl(stateHash, 17);
stateHash = std::rotl<uint64>(stateHash, 17);
stateHash += depthControl;
// polygon offset
@ -542,16 +542,16 @@ uint64 VulkanRenderer::GetDescriptorSetStateHash(LatteDecompilerShader* shader)
{
samplerIndex += LatteDecompiler_getTextureSamplerBaseIndex(shader->shaderType);
hash += LatteGPUState.contextRegister[Latte::REGADDR::SQ_TEX_SAMPLER_WORD0_0 + samplerIndex * 3 + 0];
hash = std::rotl(hash, 7);
hash = std::rotl<uint64>(hash, 7);
hash += LatteGPUState.contextRegister[Latte::REGADDR::SQ_TEX_SAMPLER_WORD0_0 + samplerIndex * 3 + 1];
hash = std::rotl(hash, 7);
hash = std::rotl<uint64>(hash, 7);
hash += LatteGPUState.contextRegister[Latte::REGADDR::SQ_TEX_SAMPLER_WORD0_0 + samplerIndex * 3 + 2];
hash = std::rotl(hash, 7);
hash = std::rotl<uint64>(hash, 7);
}
hash = std::rotl(hash, 7);
hash = std::rotl<uint64>(hash, 7);
// hash view id + swizzle mask
hash += (uint64)texture->GetUniqueId();
hash = std::rotr(hash, 21);
hash = std::rotr<uint64>(hash, 21);
hash += (uint64)(word4 & 0x0FFF0000);
}

View File

@ -108,12 +108,12 @@ uint64 VulkanRenderer::copySurface_getPipelineStateHash(VkCopySurfaceState_t& st
uint64 h = 0;
h += (uintptr_t)state.destinationTexture->GetFormat();
h = std::rotr(h, 7);
h = std::rotr<uint64>(h, 7);
h += state.sourceTexture->isDepth ? 0x1111ull : 0;
h = std::rotr(h, 7);
h = std::rotr<uint64>(h, 7);
h += state.destinationTexture->isDepth ? 0x1112ull : 0;
h = std::rotr(h, 7);
h = std::rotr<uint64>(h, 7);
return h;
}

View File

@ -563,10 +563,10 @@ int iosuAct_thread()
uint32 name = (uint32)actCemuRequest->uuidName;
uint8 tempArray[] = {
(name >> 24) & 0xFF,
(name >> 16) & 0xFF,
(name >> 8) & 0xFF,
(name >> 0) & 0xFF,
static_cast<uint8>((name >> 24) & 0xFF),
static_cast<uint8>((name >> 16) & 0xFF),
static_cast<uint8>((name >> 8) & 0xFF),
static_cast<uint8>((name >> 0) & 0xFF),
0x3A,
0x27,
0x5E,

View File

@ -713,8 +713,8 @@ void _calculateMappedImportNameHash(const char* rplName, const char* funcName, u
uint64 v = (uint64)*rplName;
h1 += v;
h2 ^= v;
h1 = std::rotl(h1, 3);
h2 = std::rotl(h2, 7);
h1 = std::rotl<uint64>(h1, 3);
h2 = std::rotl<uint64>(h2, 7);
rplName++;
}
// funcName
@ -723,8 +723,8 @@ void _calculateMappedImportNameHash(const char* rplName, const char* funcName, u
uint64 v = (uint64)*funcName;
h1 += v;
h2 ^= v;
h1 = std::rotl(h1, 3);
h2 = std::rotl(h2, 7);
h1 = std::rotl<uint64>(h1, 3);
h2 = std::rotl<uint64>(h2, 7);
funcName++;
}
*h1Out = h1;

View File

@ -228,11 +228,39 @@ typedef union _LARGE_INTEGER {
#if defined(_MSC_VER)
#define DLLEXPORT __declspec(dllexport)
#elif defined(__GNUC__)
#define DLLEXPORT __attribute__((dllexport))
#if BOOST_OS_WINDOWS
#define DLLEXPORT __attribute__((dllexport))
#else
#define DLLEXPORT
#endif
#else
#error No definition for DLLEXPORT
#endif
#ifdef __GNUC__
#include <cpuid.h>
#endif
inline void cpuid(int cpuInfo[4], int functionId) {
#if defined(_MSC_VER)
__cpuid(cpuInfo, functionId);
#elif defined(__GNUC__)
__cpuid(functionId, cpuInfo[0], cpuInfo[1], cpuInfo[2], cpuInfo[3]);
#else
#error No definition for cpuid
#endif
}
inline void cpuidex(int cpuInfo[4], int functionId, int subFunctionId) {
#if defined(_MSC_VER)
__cpuidex(cpuInfo, functionId, subFunctionId);
#elif defined(__GNUC__)
__cpuid_count(functionId, subFunctionId, cpuInfo[0], cpuInfo[1], cpuInfo[2], cpuInfo[3]);
#else
#error No definition for cpuidex
#endif
}
// MEMPTR
#include "Common/MemPtr.h"

View File

@ -6,20 +6,4 @@ uint32_t GetTickCount()
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (1000 * ts.tv_sec + ts.tv_nsec / 1000000);
}
#include <cpuid.h>
void (__cpuid)(int __cpuVal[4], unsigned int __leaf)
{
__cpuid(__cpuVal[0], __cpuVal[1], __cpuVal[2], __cpuVal[3], __leaf);
}
#undef __cpuid
#ifdef __clang__
void __cpuidex(int __cpuid_info[4], int __leaf, int __subleaf)
{
__cpuid_count (__leaf, __subleaf, __cpuid_info[0], __cpuid_info[1],
__cpuid_info[2], __cpuid_info[3]);
}
#endif
}

View File

@ -40,10 +40,6 @@ inline uint32_t GetExceptionError()
#undef Success
#undef ClientMessage
// cpu id (somewhat hacky, reorganize later)
void (__cpuid)(int __cpuVal[4], unsigned int __leaf);
void __cpuidex (int __cpuid_info[4], int __leaf, int __subleaf);
// placeholder
uint32_t GetTickCount();

View File

@ -62,11 +62,11 @@ void logCPUAndMemoryInfo()
unsigned nExIds, i = 0;
char CPUBrandString[0x40];
// Get the information associated with each extended ID.
__cpuid(CPUInfo, 0x80000000);
cpuid(CPUInfo, 0x80000000);
nExIds = CPUInfo[0];
for (i = 0x80000000; i <= nExIds; ++i)
{
__cpuid(CPUInfo, i);
cpuid(CPUInfo, i);
// Interpret CPU brand string
if (i == 0x80000002)
memcpy(CPUBrandString, CPUInfo, sizeof(CPUInfo));
@ -194,11 +194,11 @@ void mainEmulatorCommonInit()
PPCTimer_init();
// check available CPU extensions
int cpuInfo[4];
__cpuid(cpuInfo, 0x1);
cpuid(cpuInfo, 0x1);
_cpuExtension_SSSE3 = ((cpuInfo[2] >> 9) & 1) != 0;
_cpuExtension_SSE4_1 = ((cpuInfo[2] >> 19) & 1) != 0;
__cpuidex(cpuInfo, 0x7, 0);
cpuidex(cpuInfo, 0x7, 0);
_cpuExtension_AVX2 = ((cpuInfo[1] >> 5) & 1) != 0;
#if BOOST_OS_WINDOWS

View File

@ -332,25 +332,25 @@ void InvMixColumns(aes128Ctx_t* aesCtx)
b = stateVal(0, 1);
c = stateVal(0, 2);
d = stateVal(0, 3);
stateValU32(0) = lookupTable_multiply[a] ^ _rotl(lookupTable_multiply[b], 8) ^ _rotl(lookupTable_multiply[c], 16) ^ _rotl(lookupTable_multiply[d], 24);
stateValU32(0) = lookupTable_multiply[a] ^ std::rotl<uint32>(lookupTable_multiply[b], 8) ^ std::rotl<uint32>(lookupTable_multiply[c], 16) ^ std::rotl<uint32>(lookupTable_multiply[d], 24);
// i1
a = stateVal(1, 0);
b = stateVal(1, 1);
c = stateVal(1, 2);
d = stateVal(1, 3);
stateValU32(1) = lookupTable_multiply[a] ^ _rotl(lookupTable_multiply[b], 8) ^ _rotl(lookupTable_multiply[c], 16) ^ _rotl(lookupTable_multiply[d], 24);
stateValU32(1) = lookupTable_multiply[a] ^ std::rotl<uint32>(lookupTable_multiply[b], 8) ^ std::rotl<uint32>(lookupTable_multiply[c], 16) ^ std::rotl<uint32>(lookupTable_multiply[d], 24);
// i2
a = stateVal(2, 0);
b = stateVal(2, 1);
c = stateVal(2, 2);
d = stateVal(2, 3);
stateValU32(2) = lookupTable_multiply[a] ^ _rotl(lookupTable_multiply[b], 8) ^ _rotl(lookupTable_multiply[c], 16) ^ _rotl(lookupTable_multiply[d], 24);
stateValU32(2) = lookupTable_multiply[a] ^ std::rotl<uint32>(lookupTable_multiply[b], 8) ^ std::rotl<uint32>(lookupTable_multiply[c], 16) ^ std::rotl<uint32>(lookupTable_multiply[d], 24);
// i3
a = stateVal(3, 0);
b = stateVal(3, 1);
c = stateVal(3, 2);
d = stateVal(3, 3);
stateValU32(3) = lookupTable_multiply[a] ^ _rotl(lookupTable_multiply[b], 8) ^ _rotl(lookupTable_multiply[c], 16) ^ _rotl(lookupTable_multiply[d], 24);
stateValU32(3) = lookupTable_multiply[a] ^ std::rotl<uint32>(lookupTable_multiply[b], 8) ^ std::rotl<uint32>(lookupTable_multiply[c], 16) ^ std::rotl<uint32>(lookupTable_multiply[d], 24);
}
// The SubBytes Function Substitutes the values in the
@ -837,7 +837,7 @@ void AES128_init()
}
// check if AES-NI is available
int v[4];
__cpuid(v, 1);
cpuid(v, 1);
useAESNI = (v[2] & 0x2000000) != 0;
if (useAESNI)
{