PPCRec: Initial support for typed registers

This commit is contained in:
Exzap 2023-02-02 17:18:36 +01:00
parent 154aef0c1b
commit df74b99761
12 changed files with 1128 additions and 959 deletions

View File

@ -8,14 +8,22 @@
#include "util/MemMapper/MemMapper.h"
#include "Common/cpu_features.h"
static x86Assembler64::GPR32 _reg32(sint8 physRegId)
static x86Assembler64::GPR32 _reg32(IMLReg physReg)
{
return (x86Assembler64::GPR32)physRegId;
cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I32);
return (x86Assembler64::GPR32)physReg.GetRegID();
}
static x86Assembler64::GPR8_REX _reg8(sint8 physRegId)
static uint32 _reg64(IMLReg physReg)
{
return (x86Assembler64::GPR8_REX)physRegId;
cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I64);
return physReg.GetRegID();
}
static x86Assembler64::GPR8_REX _reg8(IMLReg physReg)
{
cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I32); // for now these are represented as 32bit
return (x86Assembler64::GPR8_REX)physReg.GetRegID();
}
static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId)
@ -28,6 +36,11 @@ static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId)
return (x86Assembler64::GPR8_REX)regId;
}
static x86Assembler64::GPR8_REX _reg8_from_reg64(uint32 regId)
{
return (x86Assembler64::GPR8_REX)regId;
}
static x86Assembler64::GPR64 _reg64_from_reg32(x86Assembler64::GPR32 regId)
{
return (x86Assembler64::GPR64)regId;
@ -132,7 +145,7 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction,
{
if (imlInstruction->operation == PPCREC_IML_MACRO_B_TO_REG)
{
uint32 branchDstReg = imlInstruction->op_macro.param;
uint32 branchDstReg = _reg32(imlInstruction->op_macro.paramReg);
if(X86_REG_RDX != branchDstReg)
x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RDX, branchDstReg);
// potential optimization: Use branchDstReg directly if possible instead of moving to RDX/EDX
@ -334,11 +347,16 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction,
*/
bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
{
sint32 realRegisterData = imlInstruction->op_storeLoad.registerData;
sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem;
sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER;
cemu_assert_debug(imlInstruction->op_storeLoad.registerData.GetRegFormat() == IMLRegFormat::I32);
cemu_assert_debug(imlInstruction->op_storeLoad.registerMem.GetRegFormat() == IMLRegFormat::I32);
if (indexed)
cemu_assert_debug(imlInstruction->op_storeLoad.registerMem2.GetRegFormat() == IMLRegFormat::I32);
IMLRegID realRegisterData = imlInstruction->op_storeLoad.registerData.GetRegID();
IMLRegID realRegisterMem = imlInstruction->op_storeLoad.registerMem.GetRegID();
IMLRegID realRegisterMem2 = PPC_REC_INVALID_REGISTER;
if( indexed )
realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2;
realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2.GetRegID();
if( indexed && realRegisterMem == realRegisterMem2 )
{
return false;
@ -439,11 +457,16 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p
*/
bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
{
sint32 realRegisterData = imlInstruction->op_storeLoad.registerData;
sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem;
sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER;
cemu_assert_debug(imlInstruction->op_storeLoad.registerData.GetRegFormat() == IMLRegFormat::I32);
cemu_assert_debug(imlInstruction->op_storeLoad.registerMem.GetRegFormat() == IMLRegFormat::I32);
if (indexed)
realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2;
cemu_assert_debug(imlInstruction->op_storeLoad.registerMem2.GetRegFormat() == IMLRegFormat::I32);
IMLRegID realRegisterData = imlInstruction->op_storeLoad.registerData.GetRegID();
IMLRegID realRegisterMem = imlInstruction->op_storeLoad.registerMem.GetRegID();
IMLRegID realRegisterMem2 = PPC_REC_INVALID_REGISTER;
if (indexed)
realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2.GetRegID();
if (indexed && realRegisterMem == realRegisterMem2)
{
@ -542,39 +565,42 @@ bool PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction_t* PPCRe
bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
auto regR = _reg32(imlInstruction->op_r_r.regR);
auto regA = _reg32(imlInstruction->op_r_r.regA);
if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)
{
// registerResult = registerA
if (imlInstruction->op_r_r.regR != imlInstruction->op_r_r.regA)
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA);
if (regR != regA)
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA);
}
else if (imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP)
{
if (imlInstruction->op_r_r.regA != imlInstruction->op_r_r.regR)
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA); // if movbe is available we can move and swap in a single instruction?
x64Gen_bswap_reg64Lower32bit(x64GenContext, imlInstruction->op_r_r.regR);
if (regA != regR)
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA); // if movbe is available we can move and swap in a single instruction?
x64Gen_bswap_reg64Lower32bit(x64GenContext, regR);
}
else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32 )
{
x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA);
x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, regR, regA);
}
else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32)
{
x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, imlInstruction->op_r_r.regR, reg32ToReg16(imlInstruction->op_r_r.regA));
x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, regR, reg32ToReg16(regA));
}
else if( imlInstruction->operation == PPCREC_IML_OP_NOT )
{
// copy register content if different registers
if( imlInstruction->op_r_r.regR != imlInstruction->op_r_r.regA )
x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA);
x64Gen_not_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR);
if( regR != regA )
x64Gen_mov_reg64_reg64(x64GenContext, regR, regA);
x64Gen_not_reg64Low32(x64GenContext, regR);
}
else if (imlInstruction->operation == PPCREC_IML_OP_NEG)
{
// copy register content if different registers
if (imlInstruction->op_r_r.regR != imlInstruction->op_r_r.regA)
x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA);
x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR);
if (regR != regA)
x64Gen_mov_reg64_reg64(x64GenContext, regR, regA);
x64Gen_neg_reg64Low32(x64GenContext, regR);
}
else if( imlInstruction->operation == PPCREC_IML_OP_CNTLZW )
{
@ -582,29 +608,29 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
// LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5])
if(g_CPUFeatures.x86.lzcnt)
{
x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA);
x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, regR, regA);
}
else
{
x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regA, imlInstruction->op_r_r.regA);
x64Gen_test_reg64Low32_reg64Low32(x64GenContext, regA, regA);
sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0);
x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA);
x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR);
x64Gen_add_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.regR, 32-1);
x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext, regR, regA);
x64Gen_neg_reg64Low32(x64GenContext, regR);
x64Gen_add_reg64Low32_imm32(x64GenContext, regR, 32-1);
sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.regR, 32);
x64Gen_mov_reg64Low32_imm32(x64GenContext, regR, 32);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
}
}
else if( imlInstruction->operation == PPCREC_IML_OP_DCBZ )
{
if( imlInstruction->op_r_r.regR != imlInstruction->op_r_r.regA )
if( regR != regA )
{
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.regA);
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.regR);
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, regA);
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, regR);
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F);
x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE);
for(sint32 f=0; f<0x20; f+=8)
@ -613,7 +639,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
else
{
// calculate effective address
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.regA);
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, regA);
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F);
x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE);
for(sint32 f=0; f<0x20; f+=8)
@ -630,15 +656,16 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
auto regR = _reg32(imlInstruction->op_r_immS32.regR);
if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN )
{
x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.regR, (uint32)imlInstruction->op_r_immS32.immS32);
x64Gen_mov_reg64Low32_imm32(x64GenContext, regR, (uint32)imlInstruction->op_r_immS32.immS32);
}
else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE )
{
if( (imlInstruction->op_r_immS32.immS32&0x80) )
assert_dbg(); // should not happen
x64Gen_rol_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_immS32.regR, (uint8)imlInstruction->op_r_immS32.immS32);
cemu_assert_debug((imlInstruction->op_r_immS32.immS32 & 0x80) == 0);
x64Gen_rol_reg64Low32_imm8(x64GenContext, regR, (uint8)imlInstruction->op_r_immS32.immS32);
}
else if( imlInstruction->operation == PPCREC_IML_OP_MFCR )
{
@ -698,12 +725,13 @@ bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCR
bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
auto rRegResult = _reg32(imlInstruction->op_r_r_r.regR);
auto rRegOperand1 = _reg32(imlInstruction->op_r_r_r.regA);
auto rRegOperand2 = _reg32(imlInstruction->op_r_r_r.regB);
if (imlInstruction->operation == PPCREC_IML_OP_ADD)
{
// registerResult = registerOperand1 + registerOperand2
sint32 rRegResult = imlInstruction->op_r_r_r.regR;
sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA;
sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB;
if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) )
{
// be careful not to overwrite the operand before we use it
@ -721,9 +749,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
}
else if( imlInstruction->operation == PPCREC_IML_OP_SUB )
{
sint32 rRegResult = imlInstruction->op_r_r_r.regR;
sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA;
sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB;
if( rRegOperand1 == rRegOperand2 )
{
// result = operand1 - operand1 -> 0
@ -748,28 +773,22 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
}
else if (imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR)
{
sint32 rRegResult = imlInstruction->op_r_r_r.regR;
sint32 rRegA = imlInstruction->op_r_r_r.regA;
sint32 rRegB = imlInstruction->op_r_r_r.regB;
if (rRegResult == rRegB)
std::swap(rRegA, rRegB);
if (rRegResult == rRegOperand2)
std::swap(rRegOperand1, rRegOperand2);
if (rRegResult != rRegA)
x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegA);
if (rRegResult != rRegOperand1)
x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1);
if (imlInstruction->operation == PPCREC_IML_OP_OR)
x64Gen_or_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegB);
x64Gen_or_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
else if (imlInstruction->operation == PPCREC_IML_OP_AND)
x64Gen_and_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegB);
x64Gen_and_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
else
x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegB);
x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
}
else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED )
{
// registerResult = registerOperand1 * registerOperand2
sint32 rRegResult = imlInstruction->op_r_r_r.regR;
sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA;
sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB;
if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) )
{
// be careful not to overwrite the operand before we use it
@ -789,9 +808,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
else if( imlInstruction->operation == PPCREC_IML_OP_SLW || imlInstruction->operation == PPCREC_IML_OP_SRW )
{
// registerResult = registerOperand1(rA) >> registerOperand2(rB) (up to 63 bits)
sint32 rRegResult = imlInstruction->op_r_r_r.regR;
sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA;
sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB;
if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SRW)
{
@ -831,9 +847,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
}
else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE )
{
sint32 rRegResult = imlInstruction->op_r_r_r.regR;
sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA;
sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB;
// todo: Use BMI2 rotate if available
// check if CL/ECX/RCX is available
if( rRegResult != X86_REG_RCX && rRegOperand1 != X86_REG_RCX && rRegOperand2 != X86_REG_RCX )
@ -872,10 +885,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
// since our register allocator doesn't support instruction based fixed phys registers yet
// we'll instead have to temporarily shuffle registers around
sint32 rRegResult = imlInstruction->op_r_r_r.regR;
sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA;
sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB;
// we use BMI2's shift instructions until the RA can assign fixed registers
if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
{
@ -947,10 +956,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
}
else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED )
{
sint32 rRegResult = imlInstruction->op_r_r_r.regR;
sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA;
sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB;
x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX);
x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), X86_REG_EDX);
// mov operand 2 to temp register
@ -981,10 +986,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
}
else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED || imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED )
{
sint32 rRegResult = imlInstruction->op_r_r_r.regR;
sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA;
sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB;
x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX);
x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), X86_REG_EDX);
// mov operand 2 to temp register
@ -1102,37 +1103,35 @@ bool PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction_t* PPCRecFunction,
bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
sint32 regResult = imlInstruction->op_r_r_s32.regR;
sint32 regOperand = imlInstruction->op_r_r_s32.regA;
auto regR = _reg32(imlInstruction->op_r_r_s32.regR);
auto regA = _reg32(imlInstruction->op_r_r_s32.regA);
uint32 immS32 = imlInstruction->op_r_r_s32.immS32;
if( imlInstruction->operation == PPCREC_IML_OP_ADD )
{
sint32 rRegResult = imlInstruction->op_r_r_s32.regR;
sint32 rRegOperand = imlInstruction->op_r_r_s32.regA;
uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32;
if(regResult != regOperand)
x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand);
x64Gen_add_reg64Low32_imm32(x64GenContext, regResult, (uint32)immU32);
if(regR != regA)
x64Gen_mov_reg64_reg64(x64GenContext, regR, regA);
x64Gen_add_reg64Low32_imm32(x64GenContext, regR, (uint32)immU32);
}
else if (imlInstruction->operation == PPCREC_IML_OP_SUB)
{
if (regResult != regOperand)
x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand);
x64Gen_sub_reg64Low32_imm32(x64GenContext, regResult, immS32);
if (regR != regA)
x64Gen_mov_reg64_reg64(x64GenContext, regR, regA);
x64Gen_sub_reg64Low32_imm32(x64GenContext, regR, immS32);
}
else if (imlInstruction->operation == PPCREC_IML_OP_AND ||
imlInstruction->operation == PPCREC_IML_OP_OR ||
imlInstruction->operation == PPCREC_IML_OP_XOR)
{
if (regResult != regOperand)
x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand);
if (regR != regA)
x64Gen_mov_reg64_reg64(x64GenContext, regR, regA);
if (imlInstruction->operation == PPCREC_IML_OP_AND)
x64Gen_and_reg64Low32_imm32(x64GenContext, regResult, immS32);
x64Gen_and_reg64Low32_imm32(x64GenContext, regR, immS32);
else if (imlInstruction->operation == PPCREC_IML_OP_OR)
x64Gen_or_reg64Low32_imm32(x64GenContext, regResult, immS32);
x64Gen_or_reg64Low32_imm32(x64GenContext, regR, immS32);
else // XOR
x64Gen_xor_reg64Low32_imm32(x64GenContext, regResult, immS32);
x64Gen_xor_reg64Low32_imm32(x64GenContext, regR, immS32);
}
else if( imlInstruction->operation == PPCREC_IML_OP_RLWIMI )
{
@ -1143,41 +1142,39 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
uint32 sh = (vImm>>16)&0xFF;
uint32 mask = ppc_mask(mb, me);
// copy rS to temporary register
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r_s32.regA);
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, regA);
// rotate destination register
if( sh )
x64Gen_rol_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (uint8)sh&0x1F);
// AND destination register with inverted mask
x64Gen_and_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r_s32.regR, ~mask);
x64Gen_and_reg64Low32_imm32(x64GenContext, regR, ~mask);
// AND temporary rS register with mask
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, mask);
// OR result with temporary
x64Gen_or_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r_s32.regR, REG_RESV_TEMP);
x64Gen_or_reg64Low32_reg64Low32(x64GenContext, regR, REG_RESV_TEMP);
}
else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED )
{
// registerResult = registerOperand * immS32
sint32 rRegResult = imlInstruction->op_r_r_s32.regR;
sint32 rRegOperand = imlInstruction->op_r_r_s32.regA;
sint32 immS32 = (uint32)imlInstruction->op_r_r_s32.immS32;
x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (sint64)immS32); // todo: Optimize
if( rRegResult != rRegOperand )
x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand);
x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, REG_RESV_TEMP);
if( regR != regA )
x64Gen_mov_reg64_reg64(x64GenContext, regR, regA);
x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, regR, REG_RESV_TEMP);
}
else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT ||
imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U ||
imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
{
if( imlInstruction->op_r_r_s32.regA != imlInstruction->op_r_r_s32.regR )
x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r_s32.regR, imlInstruction->op_r_r_s32.regA);
if( regA != regR )
x64Gen_mov_reg64_reg64(x64GenContext, regR, regA);
if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
x64Gen_shl_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.regR, imlInstruction->op_r_r_s32.immS32);
x64Gen_shl_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32);
else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
x64Gen_shr_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.regR, imlInstruction->op_r_r_s32.immS32);
x64Gen_shr_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32);
else // RIGHT_SHIFT_S
x64Gen_sar_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.regR, imlInstruction->op_r_r_s32.immS32);
x64Gen_sar_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32);
}
else
{
@ -1236,50 +1233,52 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction
void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
uint32 name = imlInstruction->op_r_name.name;
auto regR = _reg64(imlInstruction->op_r_name.regR);
if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 )
{
x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0));
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0));
}
else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 )
{
sint32 sprIndex = (name - PPCREC_NAME_SPR0);
if (sprIndex == SPR_LR)
x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR));
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR));
else if (sprIndex == SPR_CTR)
x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR));
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR));
else if (sprIndex == SPR_XER)
x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER));
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER));
else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7)
{
sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0);
x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, memOffset);
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, memOffset);
}
else
assert_dbg();
}
else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
{
x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY));
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY));
}
else if (name == PPCREC_NAME_XER_CA)
{
x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca));
x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca));
}
else if (name == PPCREC_NAME_XER_SO)
{
x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so));
x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so));
}
else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST)
{
x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR));
x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR));
}
else if (name == PPCREC_NAME_CPU_MEMRES_EA)
{
x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr));
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr));
}
else if (name == PPCREC_NAME_CPU_MEMRES_VAL)
{
x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue));
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue));
}
else
assert_dbg();
@ -1288,50 +1287,52 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction,
void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
uint32 name = imlInstruction->op_r_name.name;
auto regR = _reg64(imlInstruction->op_r_name.regR);
if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 )
{
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0), imlInstruction->op_r_name.regR);
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0), regR);
}
else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 )
{
uint32 sprIndex = (name - PPCREC_NAME_SPR0);
if (sprIndex == SPR_LR)
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR), imlInstruction->op_r_name.regR);
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR), regR);
else if (sprIndex == SPR_CTR)
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), imlInstruction->op_r_name.regR);
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), regR);
else if (sprIndex == SPR_XER)
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), imlInstruction->op_r_name.regR);
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), regR);
else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7)
{
sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0);
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, memOffset, imlInstruction->op_r_name.regR);
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, memOffset, regR);
}
else
assert_dbg();
}
else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
{
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), imlInstruction->op_r_name.regR);
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), regR);
}
else if (name == PPCREC_NAME_XER_CA)
{
x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.regR)));
x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg64(regR));
}
else if (name == PPCREC_NAME_XER_SO)
{
x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.regR)));
x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg64(regR));
}
else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST)
{
x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.regR)));
x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg64(regR));
}
else if (name == PPCREC_NAME_CPU_MEMRES_EA)
{
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr), imlInstruction->op_r_name.regR);
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr), regR);
}
else if (name == PPCREC_NAME_CPU_MEMRES_VAL)
{
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue), imlInstruction->op_r_name.regR);
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue), regR);
}
else
assert_dbg();
@ -1432,16 +1433,12 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
{
if (PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false)
{
codeGenerationFailed = true;
}
}
else if (imlInstruction->type == PPCREC_IML_TYPE_JUMP)
{
if (PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false)
{
codeGenerationFailed = true;
}
}
else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK )
{

View File

@ -7,7 +7,14 @@
uint32 _regF64(IMLReg r)
{
return (uint32)r;
cemu_assert_debug(r.GetRegFormat() == IMLRegFormat::F64);
return (uint32)r.GetRegID();
}
uint32 _regI32(IMLReg r)
{
cemu_assert_debug(r.GetRegFormat() == IMLRegFormat::I32);
return (uint32)r.GetRegID();
}
static x86Assembler64::GPR32 _reg32(sint8 physRegId)
@ -15,9 +22,10 @@ static x86Assembler64::GPR32 _reg32(sint8 physRegId)
return (x86Assembler64::GPR32)physRegId;
}
static x86Assembler64::GPR8_REX _reg8(sint8 physRegId)
static x86Assembler64::GPR8_REX _reg8(IMLReg r)
{
return (x86Assembler64::GPR8_REX)physRegId;
cemu_assert_debug(r.GetRegFormat() == IMLRegFormat::I32); // currently bool regs are implemented as 32bit registers
return (x86Assembler64::GPR8_REX)r.GetRegID();
}
static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId)
@ -33,13 +41,14 @@ static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId)
void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
uint32 name = imlInstruction->op_r_name.name;
uint32 fprReg = _regF64(imlInstruction->op_r_name.regR);
if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) )
{
x64Gen_movupd_xmmReg_memReg128(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0));
x64Gen_movupd_xmmReg_memReg128(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0));
}
else if( name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0+8) )
{
x64Gen_movupd_xmmReg_memReg128(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0));
x64Gen_movupd_xmmReg_memReg128(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0));
}
else
{
@ -50,13 +59,14 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunct
void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
uint32 name = imlInstruction->op_r_name.name;
uint32 fprReg = _regF64(imlInstruction->op_r_name.regR);
if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) )
{
x64Gen_movupd_memReg128_xmmReg(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0));
x64Gen_movupd_memReg128_xmmReg(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0));
}
else if( name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0+8) )
{
x64Gen_movupd_memReg128_xmmReg(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0));
x64Gen_movupd_memReg128_xmmReg(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0));
}
else
{
@ -64,10 +74,10 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunct
}
}
void PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, sint32 registerXMM, bool isLoad, bool scalePS1, sint32 registerGQR)
void PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, sint32 registerXMM, bool isLoad, bool scalePS1, IMLReg registerGQR)
{
// load GQR
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, registerGQR);
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, _regI32(registerGQR));
// extract scale field and multiply by 16 to get array offset
x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (isLoad?16:0)+8-4);
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (0x3F<<4));
@ -91,7 +101,7 @@ void PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext_t* ppcI
// generate code for PSQ load for a particular type
// if scaleGQR is -1 then a scale of 1.0 is assumed (no scale)
void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, sint32 registerGQR = -1)
void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR = IMLREG_INVALID)
{
if (mode == PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1)
{
@ -227,16 +237,16 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext,
// convert the two integers to doubles
x64Gen_cvtpi2pd_xmmReg_mem64Reg64(x64GenContext, registerXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR));
// scale
if (registerGQR >= 0)
if (registerGQR.IsValid())
PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext, x64GenContext, registerXMM, true, loadPS1, registerGQR);
}
}
void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, sint32 registerGQR)
void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR)
{
bool loadPS1 = (mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1);
// load GQR
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, registerGQR);
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, _regI32(registerGQR));
// extract load type field
x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 16);
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7);
@ -292,11 +302,11 @@ void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGen
// load from memory
bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
{
sint32 realRegisterXMM = imlInstruction->op_storeLoad.registerData;
sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem;
sint32 realRegisterXMM = _regF64(imlInstruction->op_storeLoad.registerData);
sint32 realRegisterMem = _regI32(imlInstruction->op_storeLoad.registerMem);
sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER;
if( indexed )
realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2;
realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2);
uint8 mode = imlInstruction->op_storeLoad.mode;
if( mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1 )
@ -417,7 +427,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio
return true;
}
void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, sint32 registerGQR = -1)
void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR = IMLREG_INVALID)
{
bool storePS1 = (mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1 ||
mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 ||
@ -425,7 +435,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext
mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 ||
mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1);
bool isFloat = mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0 || mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1;
if (registerGQR >= 0)
if (registerGQR.IsValid())
{
// move to temporary xmm and update registerXMM
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM);
@ -543,11 +553,11 @@ void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext
}
}
void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, sint32 registerGQR)
void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR)
{
bool storePS1 = (mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1);
// load GQR
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, registerGQR);
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, _regI32(registerGQR));
// extract store type field
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7);
// jump cases
@ -602,11 +612,11 @@ void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGe
// store to memory
bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
{
sint32 realRegisterXMM = imlInstruction->op_storeLoad.registerData;
sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem;
sint32 realRegisterXMM = _regF64(imlInstruction->op_storeLoad.registerData);
sint32 realRegisterMem = _regI32(imlInstruction->op_storeLoad.registerMem);
sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER;
if( indexed )
realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2;
realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2);
uint8 mode = imlInstruction->op_storeLoad.mode;
if( mode == PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0 )
{
@ -713,163 +723,166 @@ void _swapPS0PS1(x64GenContext_t* x64GenContext, sint32 xmmReg)
// FPR op FPR
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
uint32 regR = _regF64(imlInstruction->op_fpr_r_r.regR);
uint32 regA = _regF64(imlInstruction->op_fpr_r_r.regA);
if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP )
{
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA);
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, regR, regA);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP )
{
// VPUNPCKHQDQ
if (imlInstruction->op_fpr_r_r.regR == imlInstruction->op_fpr_r_r.regA)
if (regR == regA)
{
// unpack top to bottom and top
x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA);
x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, regR, regA);
}
//else if ( hasAVXSupport )
//{
// // unpack top to bottom and top with non-destructive destination
// // update: On Ivy Bridge this causes weird stalls?
// x64Gen_avx_VUNPCKHPD_xmm_xmm_xmm(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand, imlInstruction->op_fpr_r_r.registerOperand);
// x64Gen_avx_VUNPCKHPD_xmm_xmm_xmm(x64GenContext, registerResult, registerOperand, registerOperand);
//}
else
{
// move top to bottom
x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA);
x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, regR, regA);
// duplicate bottom
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regR);
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, regR, regR);
}
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM )
{
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA);
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP )
{
x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA);
x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, regR, regA);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED )
{
if( imlInstruction->op_fpr_r_r.regR != imlInstruction->op_fpr_r_r.regA )
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA);
_swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.regR);
if( regR != regA )
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA);
_swapPS0PS1(x64GenContext, regR);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP )
{
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA, 2);
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regA, 2);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM )
{
// use unpckhpd here?
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA, 3);
_swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.regR);
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regA, 3);
_swapPS0PS1(x64GenContext, regR);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM )
{
x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA);
x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regA);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_PAIR )
{
x64Gen_mulpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA);
x64Gen_mulpd_xmmReg_xmmReg(x64GenContext, regR, regA);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_BOTTOM )
{
x64Gen_divsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA);
x64Gen_divsd_xmmReg_xmmReg(x64GenContext, regR, regA);
}
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_PAIR)
{
x64Gen_divpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA);
x64Gen_divpd_xmmReg_xmmReg(x64GenContext, regR, regA);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM )
{
x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA);
x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regA);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_PAIR )
{
x64Gen_addpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA);
x64Gen_addpd_xmmReg_xmmReg(x64GenContext, regR, regA);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR )
{
x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA);
x64Gen_subpd_xmmReg_xmmReg(x64GenContext, regR, regA);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM )
{
x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA);
x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regA);
}
else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN )
{
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA);
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ )
{
x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext, REG_RESV_TEMP, imlInstruction->op_fpr_r_r.regA);
x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext, REG_RESV_TEMP, regA);
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP);
// move to FPR register
x64Gen_movq_xmmReg_reg64(x64GenContext, imlInstruction->op_fpr_r_r.regR, REG_RESV_TEMP);
x64Gen_movq_xmmReg_reg64(x64GenContext, regR, REG_RESV_TEMP);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP )
{
// move register to XMM15
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.regA);
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
// call assembly routine to calculate accurate FRES result in XMM15
x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_fres);
x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP);
// copy result to bottom and top half of result register
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, REG_RESV_FPR_TEMP);
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
}
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT)
{
// move register to XMM15
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.regA);
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
// call assembly routine to calculate accurate FRSQRTE result in XMM15
x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_frsqrte);
x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP);
// copy result to bottom of result register
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, REG_RESV_FPR_TEMP);
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_PAIR )
{
// copy register
if( imlInstruction->op_fpr_r_r.regR != imlInstruction->op_fpr_r_r.regA )
if( regR != regA )
{
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA);
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA);
}
// toggle sign bits
x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r_r.regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskPair));
x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskPair));
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_PAIR )
{
// copy register
if( imlInstruction->op_fpr_r_r.regR != imlInstruction->op_fpr_r_r.regA )
if( regR != regA )
{
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, imlInstruction->op_fpr_r_r.regA);
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA);
}
// set sign bit to 0
x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r_r.regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskPair));
x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskPair));
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR || imlInstruction->operation == PPCREC_IML_OP_FPR_FRSQRTE_PAIR)
{
// calculate bottom half of result
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.regA);
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
if(imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR)
x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_fres);
else
x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_frsqrte);
x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); // calculate fres result in xmm15
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, REG_RESV_FPR_TEMP);
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
// calculate top half of result
// todo - this top to bottom copy can be optimized?
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.regA, 3);
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, regA, 3);
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP, 1); // swap top and bottom
x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); // calculate fres result in xmm15
x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.regR, REG_RESV_FPR_TEMP); // copy bottom to top
x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); // copy bottom to top
}
else
{
@ -882,78 +895,82 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction
*/
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
uint32 regR = _regF64(imlInstruction->op_fpr_r_r_r.regR);
uint32 regA = _regF64(imlInstruction->op_fpr_r_r_r.regA);
uint32 regB = _regF64(imlInstruction->op_fpr_r_r_r.regB);
if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM)
{
if (imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regA)
if (regR == regA)
{
x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB);
x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regB);
}
else if (imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regB)
else if (regR == regB)
{
x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regA);
x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regA);
}
else
{
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regA);
x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB);
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA);
x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regB);
}
}
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM)
{
// todo: Use AVX 3-operand VADDSD if available
if (imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regA)
if (regR == regA)
{
x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB);
x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regB);
}
else if (imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regB)
else if (regR == regB)
{
x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regA);
x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regA);
}
else
{
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regA);
x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB);
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA);
x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regB);
}
}
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR)
{
// registerResult = registerOperandA - registerOperandB
if( imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regA )
if( regR == regA )
{
x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB);
x64Gen_subpd_xmmReg_xmmReg(x64GenContext, regR, regB);
}
else if (g_CPUFeatures.x86.avx)
{
x64Gen_avx_VSUBPD_xmm_xmm_xmm(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regA, imlInstruction->op_fpr_r_r_r.regB);
x64Gen_avx_VSUBPD_xmm_xmm_xmm(x64GenContext, regR, regA, regB);
}
else if( imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regB )
else if( regR == regB )
{
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r.regA);
x64Gen_subpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r.regB);
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, REG_RESV_FPR_TEMP);
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
x64Gen_subpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB);
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
}
else
{
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regA);
x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB);
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA);
x64Gen_subpd_xmmReg_xmmReg(x64GenContext, regR, regB);
}
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM )
{
if( imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regA )
if( regR == regA )
{
x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB);
x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regB);
}
else if( imlInstruction->op_fpr_r_r_r.regR == imlInstruction->op_fpr_r_r_r.regB )
else if( regR == regB )
{
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r.regA);
x64Gen_subsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r.regB);
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, REG_RESV_FPR_TEMP);
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
x64Gen_subsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB);
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
}
else
{
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regA);
x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.regR, imlInstruction->op_fpr_r_r_r.regB);
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA);
x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regB);
}
}
else
@ -965,32 +982,37 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti
*/
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
uint32 regR = _regF64(imlInstruction->op_fpr_r_r_r_r.regR);
uint32 regA = _regF64(imlInstruction->op_fpr_r_r_r_r.regA);
uint32 regB = _regF64(imlInstruction->op_fpr_r_r_r_r.regB);
uint32 regC = _regF64(imlInstruction->op_fpr_r_r_r_r.regC);
if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM0 )
{
// todo: Investigate if there are other optimizations possible if the operand registers overlap
// generic case
// 1) move frA bottom to frTemp bottom and top
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.regA);
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
// 2) add frB (both halfs, lower half is overwritten in the next step)
x64Gen_addpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.regB);
x64Gen_addpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB);
// 3) Interleave top of frTemp and frC
x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.regC);
x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regC);
// todo: We can optimize the REG_RESV_FPR_TEMP -> resultReg copy operation away when the result register does not overlap with any of the operand registers
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, REG_RESV_FPR_TEMP);
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM1 )
{
// todo: Investigate if there are other optimizations possible if the operand registers overlap
// 1) move frA bottom to frTemp bottom and top
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.regA);
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
// 2) add frB (both halfs, lower half is overwritten in the next step)
x64Gen_addpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.regB);
x64Gen_addpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB);
// 3) Copy bottom from frC
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.regC);
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regC);
//// 4) Swap bottom and top half
//x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP, 1);
// todo: We can optimize the REG_RESV_FPR_TEMP -> resultReg copy operation away when the result register does not overlap with any of the operand registers
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, REG_RESV_FPR_TEMP);
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
//float s0 = (float)hCPU->fpr[frC].fp0;
//float s1 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1);
@ -999,48 +1021,48 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_BOTTOM )
{
x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0));
x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0));
sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0);
// select C
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, imlInstruction->op_fpr_r_r_r_r.regC);
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regC);
sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
// select B
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, imlInstruction->op_fpr_r_r_r_r.regB);
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regB);
// end
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_PAIR )
{
// select bottom
x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0));
x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0));
sint32 jumpInstructionOffset1_bottom = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0);
// select C bottom
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, imlInstruction->op_fpr_r_r_r_r.regC);
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regC);
sint32 jumpInstructionOffset2_bottom = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
// select B bottom
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_bottom, x64GenContext->emitter->GetWriteIndex());
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, imlInstruction->op_fpr_r_r_r_r.regB);
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regB);
// end
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_bottom, x64GenContext->emitter->GetWriteIndex());
// select top
x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.regA); // copy top to bottom (todo: May cause stall?)
x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); // copy top to bottom (todo: May cause stall?)
x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0));
sint32 jumpInstructionOffset1_top = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0);
// select C top
//x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandC);
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, imlInstruction->op_fpr_r_r_r_r.regC, 2);
//x64Gen_movsd_xmmReg_xmmReg(x64GenContext, registerResult, registerOperandC);
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regC, 2);
sint32 jumpInstructionOffset2_top = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
// select B top
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_top, x64GenContext->emitter->GetWriteIndex());
//x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandB);
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r_r_r.regR, imlInstruction->op_fpr_r_r_r_r.regB, 2);
//x64Gen_movsd_xmmReg_xmmReg(x64GenContext, registerResult, registerOperandB);
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regB, 2);
// end
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_top, x64GenContext->emitter->GetWriteIndex());
}
@ -1050,38 +1072,40 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc
void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
uint32 regR = _regF64(imlInstruction->op_fpr_r.regR);
if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_BOTTOM )
{
x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom));
x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom));
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_BOTTOM )
{
x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskBottom));
x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskBottom));
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM )
{
x64Gen_orps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom));
x64Gen_orps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom));
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM )
{
// convert to 32bit single
x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.regR, imlInstruction->op_fpr_r.regR);
x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, regR, regR);
// convert back to 64bit double
x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.regR, imlInstruction->op_fpr_r.regR);
x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regR, regR);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR )
{
// convert to 32bit singles
x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.regR, imlInstruction->op_fpr_r.regR);
x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext, regR, regR);
// convert back to 64bit doubles
x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.regR, imlInstruction->op_fpr_r.regR);
x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext, regR, regR);
}
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64)
{
// convert bottom to 64bit double
x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.regR, imlInstruction->op_fpr_r.regR);
x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regR, regR);
// copy to top half
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.regR, imlInstruction->op_fpr_r.regR);
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, regR, regR);
}
else
{

View File

@ -26,7 +26,7 @@ bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment)
// for non-BDNZ loops, check for common patterns
// risky approach, look for ADD/SUB operations and assume that potential overflow means finite (does not include r_r_s32 ADD/SUB)
// this catches most loops with load-update and store-update instructions, but also those with decrementing counters
FixedSizeList<sint32, 64, true> list_modifiedRegisters;
FixedSizeList<IMLReg, 64, true> list_modifiedRegisters;
for (const IMLInstruction& instIt : imlSegment->imlList)
{
if (instIt.type == PPCREC_IML_TYPE_R_S32 && (instIt.operation == PPCREC_IML_OP_ADD || instIt.operation == PPCREC_IML_OP_SUB) )

View File

@ -41,20 +41,22 @@ const char* IMLDebug_GetOpcodeName(const IMLInstruction* iml)
return _tempOpcodename;
}
void IMLDebug_AppendRegisterParam(StringBuf& strOutput, sint32 virtualRegister, bool isLast = false)
void IMLDebug_AppendRegisterParam(StringBuf& strOutput, IMLReg virtualRegister, bool isLast = false)
{
uint32 regId = virtualRegister.GetRegID();
DEBUG_BREAK; // todo (print type)
if (isLast)
{
if (virtualRegister < 10)
strOutput.addFmt("t{} ", virtualRegister);
if (regId < 10)
strOutput.addFmt("t{} ", regId);
else
strOutput.addFmt("t{}", virtualRegister);
strOutput.addFmt("t{}", regId);
return;
}
if (virtualRegister < 10)
strOutput.addFmt("t{} , ", virtualRegister);
if (regId < 10)
strOutput.addFmt("t{} , ", regId);
else
strOutput.addFmt("t{}, ", virtualRegister);
strOutput.addFmt("t{}, ", regId);
}
void IMLDebug_AppendS32Param(StringBuf& strOutput, sint32 val, bool isLast = false)
@ -147,6 +149,12 @@ std::string IMLDebug_GetConditionName(IMLCondition cond)
return "ukn";
}
std::string IMLDebug_GetRegName(IMLReg r)
{
cemu_assert_unimplemented();
return "";
}
void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo)
{
StringBuf strOutput(1024);
@ -197,7 +205,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR);
strOutput.addFmt("name_{} (", inst.op_r_name.regR, inst.op_r_name.name);
strOutput.addFmt("name_{} (", inst.op_r_name.regR.GetRegID());
if (inst.op_r_name.name >= PPCREC_NAME_R0 && inst.op_r_name.name < (PPCREC_NAME_R0 + 999))
{
strOutput.addFmt("r{}", inst.op_r_name.name - PPCREC_NAME_R0);
@ -334,9 +342,9 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData);
if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED)
strOutput.addFmt("[t{}+t{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.registerMem2);
strOutput.addFmt("[t{}+t{}]", inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.registerMem2.GetRegID());
else
strOutput.addFmt("[t{}+{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32);
strOutput.addFmt("[t{}+{}]", inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.immS32);
}
else if (inst.type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
{
@ -391,7 +399,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
}
else if (inst.type == PPCREC_IML_TYPE_FPR_R_NAME)
{
strOutput.addFmt("fpr_t{} = name_{} (", inst.op_r_name.regR, inst.op_r_name.name);
strOutput.addFmt("fpr_t{} = name_{} (", inst.op_r_name.regR.GetRegID(), inst.op_r_name.name);
if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 999))
{
strOutput.addFmt("fpr{}", inst.op_r_name.name - PPCREC_NAME_FPR0);
@ -417,16 +425,16 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
}
else
strOutput.add("ukn");
strOutput.addFmt(") = fpr_t{}", inst.op_r_name.regR);
strOutput.addFmt(") = {}", IMLDebug_GetRegName(inst.op_r_name.regR));
}
else if (inst.type == PPCREC_IML_TYPE_FPR_LOAD)
{
strOutput.addFmt("fpr_t{} = ", inst.op_storeLoad.registerData);
strOutput.addFmt("{} = ", IMLDebug_GetRegName(inst.op_storeLoad.registerData));
if (inst.op_storeLoad.flags2.signExtend)
strOutput.add("S");
else
strOutput.add("U");
strOutput.addFmt("{} [t{}+{}] mode {}", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32, inst.op_storeLoad.mode);
strOutput.addFmt("{} [{}+{}] mode {}", inst.op_storeLoad.copyWidth / 8, IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32, inst.op_storeLoad.mode);
if (inst.op_storeLoad.flags2.notExpanded)
{
strOutput.addFmt(" <No expand>");
@ -438,23 +446,23 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
strOutput.add("S");
else
strOutput.add("U");
strOutput.addFmt("{} [t{}+{}]", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32);
strOutput.addFmt(" = fpr_t{} mode {}", inst.op_storeLoad.registerData, inst.op_storeLoad.mode);
strOutput.addFmt("{} [t{}+{}]", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.immS32);
strOutput.addFmt(" = {} mode {}", IMLDebug_GetRegName(inst.op_storeLoad.registerData), inst.op_storeLoad.mode);
}
else if (inst.type == PPCREC_IML_TYPE_FPR_R_R)
{
strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst));
strOutput.addFmt("fpr{:02}, fpr{:02}", inst.op_fpr_r_r.regR, inst.op_fpr_r_r.regA);
strOutput.addFmt("{}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r.regA));
}
else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R_R)
{
strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst));
strOutput.addFmt("fpr{:02}, fpr{:02}, fpr{:02}, fpr{:02}", inst.op_fpr_r_r_r_r.regR, inst.op_fpr_r_r_r_r.regA, inst.op_fpr_r_r_r_r.regB, inst.op_fpr_r_r_r_r.regC);
strOutput.addFmt("{}, {}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regB), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regC));
}
else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R)
{
strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst));
strOutput.addFmt("fpr{:02}, fpr{:02}, fpr{:02}", inst.op_fpr_r_r_r.regR, inst.op_fpr_r_r_r.regA, inst.op_fpr_r_r_r.regB);
strOutput.addFmt("{}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regB));
}
else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
{
@ -462,7 +470,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
}
else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32)
{
strOutput.addFmt("t{} ", inst.op_conditional_r_s32.regR);
strOutput.addFmt("{} ", IMLDebug_GetRegName(inst.op_conditional_r_s32.regR));
bool displayAsHex = false;
if (inst.operation == PPCREC_IML_OP_ASSIGN)
{

View File

@ -161,7 +161,8 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
}
else if (operation == PPCREC_IML_MACRO_B_TO_REG)
{
registersUsed->readGPR1 = op_macro.param;
cemu_assert_debug(op_macro.paramReg.IsValid());
registersUsed->readGPR1 = op_macro.paramReg;
}
else
cemu_assert_unimplemented();
@ -188,29 +189,29 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
else if (type == PPCREC_IML_TYPE_LOAD)
{
registersUsed->writtenGPR1 = op_storeLoad.registerData;
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
if (op_storeLoad.registerMem.IsValid())
registersUsed->readGPR1 = op_storeLoad.registerMem;
}
else if (type == PPCREC_IML_TYPE_LOAD_INDEXED)
{
registersUsed->writtenGPR1 = op_storeLoad.registerData;
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
if (op_storeLoad.registerMem.IsValid())
registersUsed->readGPR1 = op_storeLoad.registerMem;
if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER)
if (op_storeLoad.registerMem2.IsValid())
registersUsed->readGPR2 = op_storeLoad.registerMem2;
}
else if (type == PPCREC_IML_TYPE_STORE)
{
registersUsed->readGPR1 = op_storeLoad.registerData;
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
if (op_storeLoad.registerMem.IsValid())
registersUsed->readGPR2 = op_storeLoad.registerMem;
}
else if (type == PPCREC_IML_TYPE_STORE_INDEXED)
{
registersUsed->readGPR1 = op_storeLoad.registerData;
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
if (op_storeLoad.registerMem.IsValid())
registersUsed->readGPR2 = op_storeLoad.registerMem;
if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER)
if (op_storeLoad.registerMem2.IsValid())
registersUsed->readGPR3 = op_storeLoad.registerMem2;
}
else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
@ -235,20 +236,20 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
// fpr load operation
registersUsed->writtenFPR1 = op_storeLoad.registerData;
// address is in gpr register
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
if (op_storeLoad.registerMem.IsValid())
registersUsed->readGPR1 = op_storeLoad.registerMem;
// determine partially written result
switch (op_storeLoad.mode)
{
case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0:
case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1:
cemu_assert_debug(op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER);
cemu_assert_debug(op_storeLoad.registerGQR.IsValid());
registersUsed->readGPR2 = op_storeLoad.registerGQR;
break;
case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0:
// PS1 remains the same
registersUsed->readFPR4 = op_storeLoad.registerData;
cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER);
cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid());
break;
case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1:
case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1:
@ -261,7 +262,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1:
case PPCREC_FPR_LD_MODE_PSQ_U8_PS0:
case PPCREC_FPR_LD_MODE_PSQ_S8_PS0:
cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER);
cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid());
break;
default:
cemu_assert_unimplemented();
@ -272,21 +273,21 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
// fpr load operation
registersUsed->writtenFPR1 = op_storeLoad.registerData;
// address is in gpr registers
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
if (op_storeLoad.registerMem.IsValid())
registersUsed->readGPR1 = op_storeLoad.registerMem;
if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER)
if (op_storeLoad.registerMem2.IsValid())
registersUsed->readGPR2 = op_storeLoad.registerMem2;
// determine partially written result
switch (op_storeLoad.mode)
{
case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0:
case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1:
cemu_assert_debug(op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER);
cemu_assert_debug(op_storeLoad.registerGQR.IsValid());
registersUsed->readGPR3 = op_storeLoad.registerGQR;
break;
case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0:
// PS1 remains the same
cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER);
cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid());
registersUsed->readFPR4 = op_storeLoad.registerData;
break;
case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1:
@ -299,7 +300,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
case PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1:
case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1:
case PPCREC_FPR_LD_MODE_PSQ_U8_PS0:
cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER);
cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid());
break;
default:
cemu_assert_unimplemented();
@ -309,18 +310,18 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
{
// fpr store operation
registersUsed->readFPR1 = op_storeLoad.registerData;
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
if (op_storeLoad.registerMem.IsValid())
registersUsed->readGPR1 = op_storeLoad.registerMem;
// PSQ generic stores also access GQR
switch (op_storeLoad.mode)
{
case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0:
case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1:
cemu_assert_debug(op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER);
cemu_assert_debug(op_storeLoad.registerGQR.IsValid());
registersUsed->readGPR2 = op_storeLoad.registerGQR;
break;
default:
cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER);
cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid());
break;
}
}
@ -329,20 +330,20 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
// fpr store operation
registersUsed->readFPR1 = op_storeLoad.registerData;
// address is in gpr registers
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
if (op_storeLoad.registerMem.IsValid())
registersUsed->readGPR1 = op_storeLoad.registerMem;
if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER)
if (op_storeLoad.registerMem2.IsValid())
registersUsed->readGPR2 = op_storeLoad.registerMem2;
// PSQ generic stores also access GQR
switch (op_storeLoad.mode)
{
case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0:
case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1:
cemu_assert_debug(op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER);
cemu_assert_debug(op_storeLoad.registerGQR.IsValid());
registersUsed->readGPR3 = op_storeLoad.registerGQR;
break;
default:
cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER);
cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid());
break;
}
}
@ -473,92 +474,107 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
}
}
#define replaceRegister(__x,__r,__n) (((__x)==(__r))?(__n):(__x))
sint32 replaceRegisterMultiple(sint32 reg, const std::unordered_map<IMLReg, IMLReg>& translationTable)
//#define replaceRegister(__x,__r,__n) (((__x)==(__r))?(__n):(__x))
IMLReg replaceRegisterId(IMLReg reg, IMLRegID oldId, IMLRegID newId)
{
const auto& it = translationTable.find(reg);
cemu_assert_debug(it != translationTable.cend());
return it->second;
if (reg.GetRegID() != oldId)
return reg;
reg.SetRegID(newId);
return reg;
}
sint32 replaceRegisterMultiple(sint32 reg, sint32 match[4], sint32 replaced[4])
IMLReg replaceRegisterIdMultiple(IMLReg reg, const std::unordered_map<IMLRegID, IMLRegID>& translationTable)
{
if (reg.IsInvalid())
return reg;
const auto& it = translationTable.find(reg.GetRegID());
cemu_assert_debug(it != translationTable.cend());
IMLReg alteredReg = reg;
alteredReg.SetRegID(it->second);
return alteredReg;
}
IMLReg replaceRegisterIdMultiple(IMLReg reg, IMLReg match[4], IMLReg replaced[4])
{
// deprecated but still used for FPRs
for (sint32 i = 0; i < 4; i++)
{
if (match[i] < 0)
if (match[i].IsInvalid())
continue;
if (reg == match[i])
if (reg.GetRegID() == match[i].GetRegID())
{
cemu_assert_debug(reg.GetBaseFormat() == match[i].GetBaseFormat());
cemu_assert_debug(reg.GetRegFormat() == match[i].GetRegFormat());
cemu_assert_debug(reg.GetBaseFormat() == replaced[i].GetBaseFormat());
cemu_assert_debug(reg.GetRegFormat() == replaced[i].GetRegFormat());
return replaced[i];
}
}
return reg;
}
//void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4])
void IMLInstruction::RewriteGPR(const std::unordered_map<IMLReg, IMLReg>& translationTable)
void IMLInstruction::RewriteGPR(const std::unordered_map<IMLRegID, IMLRegID>& translationTable)
{
if (type == PPCREC_IML_TYPE_R_NAME)
{
op_r_name.regR = replaceRegisterMultiple(op_r_name.regR, translationTable);
op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable);
}
else if (type == PPCREC_IML_TYPE_NAME_R)
{
op_r_name.regR = replaceRegisterMultiple(op_r_name.regR, translationTable);
op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable);
}
else if (type == PPCREC_IML_TYPE_R_R)
{
op_r_r.regR = replaceRegisterMultiple(op_r_r.regR, translationTable);
op_r_r.regA = replaceRegisterMultiple(op_r_r.regA, translationTable);
op_r_r.regR = replaceRegisterIdMultiple(op_r_r.regR, translationTable);
op_r_r.regA = replaceRegisterIdMultiple(op_r_r.regA, translationTable);
}
else if (type == PPCREC_IML_TYPE_R_S32)
{
op_r_immS32.regR = replaceRegisterMultiple(op_r_immS32.regR, translationTable);
op_r_immS32.regR = replaceRegisterIdMultiple(op_r_immS32.regR, translationTable);
}
else if (type == PPCREC_IML_TYPE_CONDITIONAL_R_S32)
{
op_conditional_r_s32.regR = replaceRegisterMultiple(op_conditional_r_s32.regR, translationTable);
op_conditional_r_s32.regR = replaceRegisterIdMultiple(op_conditional_r_s32.regR, translationTable);
}
else if (type == PPCREC_IML_TYPE_R_R_S32)
{
op_r_r_s32.regR = replaceRegisterMultiple(op_r_r_s32.regR, translationTable);
op_r_r_s32.regA = replaceRegisterMultiple(op_r_r_s32.regA, translationTable);
op_r_r_s32.regR = replaceRegisterIdMultiple(op_r_r_s32.regR, translationTable);
op_r_r_s32.regA = replaceRegisterIdMultiple(op_r_r_s32.regA, translationTable);
}
else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY)
{
op_r_r_s32_carry.regR = replaceRegisterMultiple(op_r_r_s32_carry.regR, translationTable);
op_r_r_s32_carry.regA = replaceRegisterMultiple(op_r_r_s32_carry.regA, translationTable);
op_r_r_s32_carry.regCarry = replaceRegisterMultiple(op_r_r_s32_carry.regCarry, translationTable);
op_r_r_s32_carry.regR = replaceRegisterIdMultiple(op_r_r_s32_carry.regR, translationTable);
op_r_r_s32_carry.regA = replaceRegisterIdMultiple(op_r_r_s32_carry.regA, translationTable);
op_r_r_s32_carry.regCarry = replaceRegisterIdMultiple(op_r_r_s32_carry.regCarry, translationTable);
}
else if (type == PPCREC_IML_TYPE_R_R_R)
{
op_r_r_r.regR = replaceRegisterMultiple(op_r_r_r.regR, translationTable);
op_r_r_r.regA = replaceRegisterMultiple(op_r_r_r.regA, translationTable);
op_r_r_r.regB = replaceRegisterMultiple(op_r_r_r.regB, translationTable);
op_r_r_r.regR = replaceRegisterIdMultiple(op_r_r_r.regR, translationTable);
op_r_r_r.regA = replaceRegisterIdMultiple(op_r_r_r.regA, translationTable);
op_r_r_r.regB = replaceRegisterIdMultiple(op_r_r_r.regB, translationTable);
}
else if (type == PPCREC_IML_TYPE_R_R_R_CARRY)
{
op_r_r_r_carry.regR = replaceRegisterMultiple(op_r_r_r_carry.regR, translationTable);
op_r_r_r_carry.regA = replaceRegisterMultiple(op_r_r_r_carry.regA, translationTable);
op_r_r_r_carry.regB = replaceRegisterMultiple(op_r_r_r_carry.regB, translationTable);
op_r_r_r_carry.regCarry = replaceRegisterMultiple(op_r_r_r_carry.regCarry, translationTable);
op_r_r_r_carry.regR = replaceRegisterIdMultiple(op_r_r_r_carry.regR, translationTable);
op_r_r_r_carry.regA = replaceRegisterIdMultiple(op_r_r_r_carry.regA, translationTable);
op_r_r_r_carry.regB = replaceRegisterIdMultiple(op_r_r_r_carry.regB, translationTable);
op_r_r_r_carry.regCarry = replaceRegisterIdMultiple(op_r_r_r_carry.regCarry, translationTable);
}
else if (type == PPCREC_IML_TYPE_COMPARE)
{
op_compare.regR = replaceRegisterMultiple(op_compare.regR, translationTable);
op_compare.regA = replaceRegisterMultiple(op_compare.regA, translationTable);
op_compare.regB = replaceRegisterMultiple(op_compare.regB, translationTable);
op_compare.regR = replaceRegisterIdMultiple(op_compare.regR, translationTable);
op_compare.regA = replaceRegisterIdMultiple(op_compare.regA, translationTable);
op_compare.regB = replaceRegisterIdMultiple(op_compare.regB, translationTable);
}
else if (type == PPCREC_IML_TYPE_COMPARE_S32)
{
op_compare_s32.regR = replaceRegisterMultiple(op_compare_s32.regR, translationTable);
op_compare_s32.regA = replaceRegisterMultiple(op_compare_s32.regA, translationTable);
op_compare_s32.regR = replaceRegisterIdMultiple(op_compare_s32.regR, translationTable);
op_compare_s32.regA = replaceRegisterIdMultiple(op_compare_s32.regA, translationTable);
}
else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
{
op_conditional_jump.registerBool = replaceRegisterMultiple(op_conditional_jump.registerBool, translationTable);
op_conditional_jump.registerBool = replaceRegisterIdMultiple(op_conditional_jump.registerBool, translationTable);
}
else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP)
{
@ -576,7 +592,7 @@ void IMLInstruction::RewriteGPR(const std::unordered_map<IMLReg, IMLReg>& transl
}
else if (operation == PPCREC_IML_MACRO_B_TO_REG)
{
op_macro.param = replaceRegisterMultiple(op_macro.param, translationTable);
op_macro.paramReg = replaceRegisterIdMultiple(op_macro.paramReg, translationTable);
}
else
{
@ -585,40 +601,40 @@ void IMLInstruction::RewriteGPR(const std::unordered_map<IMLReg, IMLReg>& transl
}
else if (type == PPCREC_IML_TYPE_LOAD)
{
op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, translationTable);
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
if (op_storeLoad.registerMem.IsValid())
{
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable);
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
}
}
else if (type == PPCREC_IML_TYPE_LOAD_INDEXED)
{
op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, translationTable);
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable);
if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER)
op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable);
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
if (op_storeLoad.registerMem.IsValid())
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
if (op_storeLoad.registerMem2.IsValid())
op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
}
else if (type == PPCREC_IML_TYPE_STORE)
{
op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, translationTable);
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable);
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
if (op_storeLoad.registerMem.IsValid())
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
}
else if (type == PPCREC_IML_TYPE_STORE_INDEXED)
{
op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, translationTable);
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable);
if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER)
op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable);
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
if (op_storeLoad.registerMem.IsValid())
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
if (op_storeLoad.registerMem2.IsValid())
op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
}
else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
{
op_atomic_compare_store.regEA = replaceRegisterMultiple(op_atomic_compare_store.regEA, translationTable);
op_atomic_compare_store.regCompareValue = replaceRegisterMultiple(op_atomic_compare_store.regCompareValue, translationTable);
op_atomic_compare_store.regWriteValue = replaceRegisterMultiple(op_atomic_compare_store.regWriteValue, translationTable);
op_atomic_compare_store.regBoolOut = replaceRegisterMultiple(op_atomic_compare_store.regBoolOut, translationTable);
op_atomic_compare_store.regEA = replaceRegisterIdMultiple(op_atomic_compare_store.regEA, translationTable);
op_atomic_compare_store.regCompareValue = replaceRegisterIdMultiple(op_atomic_compare_store.regCompareValue, translationTable);
op_atomic_compare_store.regWriteValue = replaceRegisterIdMultiple(op_atomic_compare_store.regWriteValue, translationTable);
op_atomic_compare_store.regBoolOut = replaceRegisterIdMultiple(op_atomic_compare_store.regBoolOut, translationTable);
}
else if (type == PPCREC_IML_TYPE_FPR_R_NAME)
{
@ -630,54 +646,54 @@ void IMLInstruction::RewriteGPR(const std::unordered_map<IMLReg, IMLReg>& transl
}
else if (type == PPCREC_IML_TYPE_FPR_LOAD)
{
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
if (op_storeLoad.registerMem.IsValid())
{
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable);
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
}
if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER)
if (op_storeLoad.registerGQR.IsValid())
{
op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, translationTable);
op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable);
}
}
else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED)
{
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
if (op_storeLoad.registerMem.IsValid())
{
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable);
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
}
if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER)
if (op_storeLoad.registerMem2.IsValid())
{
op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable);
op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
}
if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER)
if (op_storeLoad.registerGQR.IsValid())
{
op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, translationTable);
op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable);
}
}
else if (type == PPCREC_IML_TYPE_FPR_STORE)
{
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
if (op_storeLoad.registerMem.IsValid())
{
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable);
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
}
if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER)
if (op_storeLoad.registerGQR.IsValid())
{
op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, translationTable);
op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable);
}
}
else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
{
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
if (op_storeLoad.registerMem.IsValid())
{
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable);
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
}
if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER)
if (op_storeLoad.registerMem2.IsValid())
{
op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable);
op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
}
if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER)
if (op_storeLoad.registerGQR.IsValid())
{
op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, translationTable);
op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable);
}
}
else if (type == PPCREC_IML_TYPE_FPR_R_R)
@ -694,7 +710,7 @@ void IMLInstruction::RewriteGPR(const std::unordered_map<IMLReg, IMLReg>& transl
}
else if (type == PPCREC_IML_TYPE_FPR_COMPARE)
{
op_fpr_compare.regR = replaceRegisterMultiple(op_fpr_compare.regR, translationTable);
op_fpr_compare.regR = replaceRegisterIdMultiple(op_fpr_compare.regR, translationTable);
}
else
{
@ -702,7 +718,7 @@ void IMLInstruction::RewriteGPR(const std::unordered_map<IMLReg, IMLReg>& transl
}
}
void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegisterReplaced[4])
void IMLInstruction::ReplaceFPRs(IMLReg fprRegisterSearched[4], IMLReg fprRegisterReplaced[4])
{
if (type == PPCREC_IML_TYPE_R_NAME)
{
@ -766,54 +782,54 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist
}
else if (type == PPCREC_IML_TYPE_FPR_R_NAME)
{
op_r_name.regR = replaceRegisterMultiple(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced);
op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_FPR_NAME_R)
{
op_r_name.regR = replaceRegisterMultiple(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced);
op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_FPR_LOAD)
{
op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced);
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED)
{
op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced);
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_FPR_STORE)
{
op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced);
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
{
op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced);
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_FPR_R_R)
{
op_fpr_r_r.regR = replaceRegisterMultiple(op_fpr_r_r.regR, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r.regA = replaceRegisterMultiple(op_fpr_r_r.regA, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r.regR, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r.regA, fprRegisterSearched, fprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_FPR_R_R_R)
{
op_fpr_r_r_r.regR = replaceRegisterMultiple(op_fpr_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r.regA = replaceRegisterMultiple(op_fpr_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r.regB = replaceRegisterMultiple(op_fpr_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r.regB = replaceRegisterIdMultiple(op_fpr_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R)
{
op_fpr_r_r_r_r.regR = replaceRegisterMultiple(op_fpr_r_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r_r.regA = replaceRegisterMultiple(op_fpr_r_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r_r.regB = replaceRegisterMultiple(op_fpr_r_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r_r.regC = replaceRegisterMultiple(op_fpr_r_r_r_r.regC, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r_r.regB = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r_r.regC = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regC, fprRegisterSearched, fprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_FPR_R)
{
op_fpr_r.regR = replaceRegisterMultiple(op_fpr_r.regR, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r.regR = replaceRegisterIdMultiple(op_fpr_r.regR, fprRegisterSearched, fprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_FPR_COMPARE)
{
op_fpr_compare.regA = replaceRegisterMultiple(op_fpr_compare.regA, fprRegisterSearched, fprRegisterReplaced);
op_fpr_compare.regB = replaceRegisterMultiple(op_fpr_compare.regB, fprRegisterSearched, fprRegisterReplaced);
op_fpr_compare.regA = replaceRegisterIdMultiple(op_fpr_compare.regA, fprRegisterSearched, fprRegisterReplaced);
op_fpr_compare.regB = replaceRegisterIdMultiple(op_fpr_compare.regB, fprRegisterSearched, fprRegisterReplaced);
}
else
{
@ -821,7 +837,7 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist
}
}
void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterReplaced)
void IMLInstruction::ReplaceFPR(IMLRegID fprRegisterSearched, IMLRegID fprRegisterReplaced)
{
if (type == PPCREC_IML_TYPE_R_NAME)
{
@ -885,49 +901,49 @@ void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterRe
}
else if (type == PPCREC_IML_TYPE_FPR_R_NAME)
{
op_r_name.regR = replaceRegister(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced);
op_r_name.regR = replaceRegisterId(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_FPR_NAME_R)
{
op_r_name.regR = replaceRegister(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced);
op_r_name.regR = replaceRegisterId(op_r_name.regR, fprRegisterSearched, fprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_FPR_LOAD)
{
op_storeLoad.registerData = replaceRegister(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced);
op_storeLoad.registerData = replaceRegisterId(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED)
{
op_storeLoad.registerData = replaceRegister(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced);
op_storeLoad.registerData = replaceRegisterId(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_FPR_STORE)
{
op_storeLoad.registerData = replaceRegister(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced);
op_storeLoad.registerData = replaceRegisterId(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
{
op_storeLoad.registerData = replaceRegister(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced);
op_storeLoad.registerData = replaceRegisterId(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_FPR_R_R)
{
op_fpr_r_r.regR = replaceRegister(op_fpr_r_r.regR, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r.regA = replaceRegister(op_fpr_r_r.regA, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r.regR = replaceRegisterId(op_fpr_r_r.regR, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r.regA = replaceRegisterId(op_fpr_r_r.regA, fprRegisterSearched, fprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_FPR_R_R_R)
{
op_fpr_r_r_r.regR = replaceRegister(op_fpr_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r.regA = replaceRegister(op_fpr_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r.regB = replaceRegister(op_fpr_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r.regR = replaceRegisterId(op_fpr_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r.regA = replaceRegisterId(op_fpr_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r.regB = replaceRegisterId(op_fpr_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R)
{
op_fpr_r_r_r_r.regR = replaceRegister(op_fpr_r_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r_r.regA = replaceRegister(op_fpr_r_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r_r.regB = replaceRegister(op_fpr_r_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r_r.regC = replaceRegister(op_fpr_r_r_r_r.regC, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r_r.regR = replaceRegisterId(op_fpr_r_r_r_r.regR, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r_r.regA = replaceRegisterId(op_fpr_r_r_r_r.regA, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r_r.regB = replaceRegisterId(op_fpr_r_r_r_r.regB, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r_r_r_r.regC = replaceRegisterId(op_fpr_r_r_r_r.regC, fprRegisterSearched, fprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_FPR_R)
{
op_fpr_r.regR = replaceRegister(op_fpr_r.regR, fprRegisterSearched, fprRegisterReplaced);
op_fpr_r.regR = replaceRegisterId(op_fpr_r.regR, fprRegisterSearched, fprRegisterReplaced);
}
else
{

View File

@ -1,7 +1,103 @@
#pragma once
using IMLReg = uint8;
inline constexpr IMLReg IMLREG_INVALID = (IMLReg)-1;
using IMLRegID = uint16; // 16 bit ID
// format of IMLReg:
// 0-15 (16 bit) IMLRegID
// 19-23 (5 bit) Offset In elements, for SIMD registers
// 24-27 (4 bit) IMLRegFormat RegFormat
// 28-31 (4 bit) IMLRegFormat BaseFormat
enum class IMLRegFormat : uint8
{
INVALID_FORMAT,
I64,
I32,
I16,
I8,
// I1 ?
F64,
F32
};
class IMLReg
{
public:
IMLReg()
{
m_raw = 0; // 0 is invalid
}
IMLReg(IMLRegFormat baseRegFormat, IMLRegFormat regFormat, uint8 viewOffset, IMLRegID regId)
{
m_raw = 0;
m_raw |= ((uint8)baseRegFormat << 28);
m_raw |= ((uint8)regFormat << 24);
m_raw |= (uint32)regId;
}
IMLReg(IMLReg&& baseReg, IMLRegFormat viewFormat, uint8 viewOffset, IMLRegID regId)
{
DEBUG_BREAK;
//m_raw = 0;
//m_raw |= ((uint8)baseRegFormat << 28);
//m_raw |= ((uint8)viewFormat << 24);
//m_raw |= (uint32)regId;
}
IMLReg(const IMLReg& other) : m_raw(other.m_raw) {}
IMLRegFormat GetBaseFormat() const
{
return (IMLRegFormat)((m_raw >> 28) & 0xF);
}
IMLRegFormat GetRegFormat() const
{
return (IMLRegFormat)((m_raw >> 24) & 0xF);
}
IMLRegID GetRegID() const
{
cemu_assert_debug(GetBaseFormat() != IMLRegFormat::INVALID_FORMAT);
cemu_assert_debug(GetRegFormat() != IMLRegFormat::INVALID_FORMAT);
return (IMLRegID)(m_raw & 0xFFFF);
}
void SetRegID(IMLRegID regId)
{
cemu_assert_debug(regId <= 0xFFFF);
m_raw &= ~0xFFFF;
m_raw |= (uint32)regId;
}
bool IsInvalid() const
{
return GetBaseFormat() == IMLRegFormat::INVALID_FORMAT;
}
bool IsValid() const
{
return GetBaseFormat() != IMLRegFormat::INVALID_FORMAT;
}
bool IsValidAndSameRegID(IMLRegID regId) const
{
return IsValid() && GetRegID() == regId;
}
// risky
bool operator==(const IMLReg& other) const
{
//__debugbreak();
return m_raw == other.m_raw;
}
private:
uint32 m_raw;
};
static const IMLReg IMLREG_INVALID(IMLRegFormat::INVALID_FORMAT, IMLRegFormat::INVALID_FORMAT, 0, 0);
using IMLName = uint32;
@ -230,6 +326,8 @@ enum
struct IMLUsedRegisters
{
IMLUsedRegisters() {};
// GPR
union
{
@ -256,59 +354,63 @@ struct IMLUsedRegisters
};
};
bool IsGPRWritten(IMLReg imlReg) const
bool IsBaseGPRWritten(IMLReg imlReg) const
{
cemu_assert_debug(imlReg != IMLREG_INVALID);
return writtenGPR1 == imlReg || writtenGPR2 == imlReg;
cemu_assert_debug(imlReg.IsValid());
auto regId = imlReg.GetRegID();
if (writtenGPR1.IsValid() && writtenGPR1.GetRegID() == regId)
return true;
if (writtenGPR2.IsValid() && writtenGPR2.GetRegID() == regId)
return true;
return false;
}
template<typename Fn>
void ForEachWrittenGPR(Fn F) const
{
if (writtenGPR1 != IMLREG_INVALID)
if (writtenGPR1.IsValid())
F(writtenGPR1);
if (writtenGPR2 != IMLREG_INVALID)
if (writtenGPR2.IsValid())
F(writtenGPR2);
}
template<typename Fn>
void ForEachReadGPR(Fn F) const
{
if (readGPR1 != IMLREG_INVALID)
if (readGPR1.IsValid())
F(readGPR1);
if (readGPR2 != IMLREG_INVALID)
if (readGPR2.IsValid())
F(readGPR2);
if (readGPR3 != IMLREG_INVALID)
if (readGPR3.IsValid())
F(readGPR3);
}
template<typename Fn>
void ForEachAccessedGPR(Fn F) const
{
if (readGPR1 != IMLREG_INVALID)
if (readGPR1.IsValid())
F(readGPR1, false);
if (readGPR2 != IMLREG_INVALID)
if (readGPR2.IsValid())
F(readGPR2, false);
if (readGPR3 != IMLREG_INVALID)
if (readGPR3.IsValid())
F(readGPR3, false);
if (writtenGPR1 != IMLREG_INVALID)
if (writtenGPR1.IsValid())
F(writtenGPR1, true);
if (writtenGPR2 != IMLREG_INVALID)
if (writtenGPR2.IsValid())
F(writtenGPR2, true);
}
bool HasFPRReg(sint16 imlReg) const
bool HasSameBaseFPRRegId(IMLRegID regId) const
{
cemu_assert_debug(imlReg != IMLREG_INVALID);
if (readFPR1 == imlReg)
if (readFPR1.IsValid() && readFPR1.GetRegID() == regId)
return true;
if (readFPR2 == imlReg)
if (readFPR2.IsValid() && readFPR2.GetRegID() == regId)
return true;
if (readFPR3 == imlReg)
if (readFPR3.IsValid() && readFPR3.GetRegID() == regId)
return true;
if (readFPR4 == imlReg)
if (readFPR4.IsValid() && readFPR4.GetRegID() == regId)
return true;
if (writtenFPR1 == imlReg)
if (writtenFPR1.IsValid() && writtenFPR1.GetRegID() == regId)
return true;
return false;
}
@ -316,6 +418,12 @@ struct IMLUsedRegisters
struct IMLInstruction
{
IMLInstruction() {}
IMLInstruction(const IMLInstruction& other)
{
memcpy(this, &other, sizeof(IMLInstruction));
}
uint8 type;
uint8 operation;
union
@ -370,6 +478,7 @@ struct IMLInstruction
uint32 param;
uint32 param2;
uint16 paramU16;
IMLReg paramReg;
}op_macro;
struct
{
@ -446,7 +555,7 @@ struct IMLInstruction
struct
{
// r_s32
uint8 regR;
IMLReg regR;
sint32 immS32;
// condition
uint8 crRegisterIndex;
@ -479,16 +588,17 @@ struct IMLInstruction
void make_debugbreak(uint32 currentPPCAddress = 0)
{
make_macro(PPCREC_IML_MACRO_DEBUGBREAK, 0, currentPPCAddress, 0);
make_macro(PPCREC_IML_MACRO_DEBUGBREAK, 0, currentPPCAddress, 0, IMLREG_INVALID);
}
void make_macro(uint32 macroId, uint32 param, uint32 param2, uint16 paramU16)
void make_macro(uint32 macroId, uint32 param, uint32 param2, uint16 paramU16, IMLReg regParam)
{
this->type = PPCREC_IML_TYPE_MACRO;
this->operation = macroId;
this->op_macro.param = param;
this->op_macro.param2 = param2;
this->op_macro.paramU16 = paramU16;
this->op_macro.paramReg = regParam;
}
void make_cjump_cycle_check()
@ -497,85 +607,85 @@ struct IMLInstruction
this->operation = 0;
}
void make_r_r(uint32 operation, uint8 registerResult, uint8 registerA)
void make_r_r(uint32 operation, IMLReg regR, IMLReg regA)
{
this->type = PPCREC_IML_TYPE_R_R;
this->operation = operation;
this->op_r_r.regR = registerResult;
this->op_r_r.regA = registerA;
this->op_r_r.regR = regR;
this->op_r_r.regA = regA;
}
void make_r_s32(uint32 operation, uint8 registerIndex, sint32 immS32)
void make_r_s32(uint32 operation, IMLReg regR, sint32 immS32)
{
this->type = PPCREC_IML_TYPE_R_S32;
this->operation = operation;
this->op_r_immS32.regR = registerIndex;
this->op_r_immS32.regR = regR;
this->op_r_immS32.immS32 = immS32;
}
void make_r_r_r(uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB)
void make_r_r_r(uint32 operation, IMLReg regR, IMLReg regA, IMLReg regB)
{
this->type = PPCREC_IML_TYPE_R_R_R;
this->operation = operation;
this->op_r_r_r.regR = registerResult;
this->op_r_r_r.regA = registerA;
this->op_r_r_r.regB = registerB;
this->op_r_r_r.regR = regR;
this->op_r_r_r.regA = regA;
this->op_r_r_r.regB = regB;
}
void make_r_r_r_carry(uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 registerCarry)
void make_r_r_r_carry(uint32 operation, IMLReg regR, IMLReg regA, IMLReg regB, IMLReg regCarry)
{
this->type = PPCREC_IML_TYPE_R_R_R_CARRY;
this->operation = operation;
this->op_r_r_r_carry.regR = registerResult;
this->op_r_r_r_carry.regA = registerA;
this->op_r_r_r_carry.regB = registerB;
this->op_r_r_r_carry.regCarry = registerCarry;
this->op_r_r_r_carry.regR = regR;
this->op_r_r_r_carry.regA = regA;
this->op_r_r_r_carry.regB = regB;
this->op_r_r_r_carry.regCarry = regCarry;
}
void make_r_r_s32(uint32 operation, uint8 registerResult, uint8 registerA, sint32 immS32)
void make_r_r_s32(uint32 operation, IMLReg regR, IMLReg regA, sint32 immS32)
{
this->type = PPCREC_IML_TYPE_R_R_S32;
this->operation = operation;
this->op_r_r_s32.regR = registerResult;
this->op_r_r_s32.regA = registerA;
this->op_r_r_s32.regR = regR;
this->op_r_r_s32.regA = regA;
this->op_r_r_s32.immS32 = immS32;
}
void make_r_r_s32_carry(uint32 operation, uint8 registerResult, uint8 registerA, sint32 immS32, uint8 registerCarry)
void make_r_r_s32_carry(uint32 operation, IMLReg regR, IMLReg regA, sint32 immS32, IMLReg regCarry)
{
this->type = PPCREC_IML_TYPE_R_R_S32_CARRY;
this->operation = operation;
this->op_r_r_s32_carry.regR = registerResult;
this->op_r_r_s32_carry.regA = registerA;
this->op_r_r_s32_carry.regR = regR;
this->op_r_r_s32_carry.regA = regA;
this->op_r_r_s32_carry.immS32 = immS32;
this->op_r_r_s32_carry.regCarry = registerCarry;
this->op_r_r_s32_carry.regCarry = regCarry;
}
void make_compare(uint8 registerA, uint8 registerB, uint8 registerResult, IMLCondition cond)
void make_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond)
{
this->type = PPCREC_IML_TYPE_COMPARE;
this->operation = -999;
this->op_compare.regR = registerResult;
this->op_compare.regA = registerA;
this->op_compare.regB = registerB;
this->op_compare.regR = regR;
this->op_compare.regA = regA;
this->op_compare.regB = regB;
this->op_compare.cond = cond;
}
void make_compare_s32(uint8 registerA, sint32 immS32, uint8 registerResult, IMLCondition cond)
void make_compare_s32(IMLReg regA, sint32 immS32, IMLReg regR, IMLCondition cond)
{
this->type = PPCREC_IML_TYPE_COMPARE_S32;
this->operation = -999;
this->op_compare_s32.regR = registerResult;
this->op_compare_s32.regA = registerA;
this->op_compare_s32.regR = regR;
this->op_compare_s32.regA = regA;
this->op_compare_s32.immS32 = immS32;
this->op_compare_s32.cond = cond;
}
void make_conditional_jump(uint8 registerBool, bool mustBeTrue)
void make_conditional_jump(IMLReg regBool, bool mustBeTrue)
{
this->type = PPCREC_IML_TYPE_CONDITIONAL_JUMP;
this->operation = -999;
this->op_conditional_jump.registerBool = registerBool;
this->op_conditional_jump.registerBool = regBool;
this->op_conditional_jump.mustBeTrue = mustBeTrue;
}
@ -586,12 +696,12 @@ struct IMLInstruction
}
// load from memory
void make_r_memory(uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian)
void make_r_memory(IMLReg regD, IMLReg regMem, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian)
{
this->type = PPCREC_IML_TYPE_LOAD;
this->operation = 0;
this->op_storeLoad.registerData = registerDestination;
this->op_storeLoad.registerMem = registerMemory;
this->op_storeLoad.registerData = regD;
this->op_storeLoad.registerMem = regMem;
this->op_storeLoad.immS32 = immS32;
this->op_storeLoad.copyWidth = copyWidth;
this->op_storeLoad.flags2.swapEndian = switchEndian;
@ -599,12 +709,12 @@ struct IMLInstruction
}
// store to memory
void make_memory_r(uint8 registerSource, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool switchEndian)
void make_memory_r(IMLReg regS, IMLReg regMem, sint32 immS32, uint32 copyWidth, bool switchEndian)
{
this->type = PPCREC_IML_TYPE_STORE;
this->operation = 0;
this->op_storeLoad.registerData = registerSource;
this->op_storeLoad.registerMem = registerMemory;
this->op_storeLoad.registerData = regS;
this->op_storeLoad.registerMem = regMem;
this->op_storeLoad.immS32 = immS32;
this->op_storeLoad.copyWidth = copyWidth;
this->op_storeLoad.flags2.swapEndian = switchEndian;
@ -633,7 +743,8 @@ struct IMLInstruction
void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const;
void RewriteGPR(const std::unordered_map<IMLReg, IMLReg>& translationTable);
void ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegisterReplaced[4]);
void ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterReplaced);
void RewriteGPR(const std::unordered_map<IMLRegID, IMLRegID>& translationTable);
void ReplaceFPRs(IMLReg fprRegisterSearched[4], IMLReg fprRegisterReplaced[4]);
void ReplaceFPR(IMLRegID fprRegisterSearched, IMLRegID fprRegisterReplaced);
};

View File

@ -8,13 +8,18 @@
bool _RegExceedsFPRSpace(IMLReg r)
{
if (r == IMLREG_INVALID)
if (r.IsInvalid())
return false;
if ((uint32)r >= PPC_X64_FPR_USABLE_REGISTERS)
if (r.GetRegID() >= PPC_X64_FPR_USABLE_REGISTERS)
return true;
return false;
}
IMLReg _FPRRegFromID(IMLRegID regId)
{
return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, regId);
}
bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext)
{
// only xmm0 to xmm14 may be used, xmm15 is reserved
@ -48,34 +53,34 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte
while( true )
{
segIt->imlList[imlIndex].CheckRegisterUsage(&registersUsed);
if(registersUsed.readFPR1 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR2 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR3 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR4 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.writtenFPR1 >= PPC_X64_FPR_USABLE_REGISTERS)
if(_RegExceedsFPRSpace(registersUsed.readFPR1) || _RegExceedsFPRSpace(registersUsed.readFPR2) || _RegExceedsFPRSpace(registersUsed.readFPR3) || _RegExceedsFPRSpace(registersUsed.readFPR4) || _RegExceedsFPRSpace(registersUsed.writtenFPR1) )
{
// get index of register to replace
sint32 fprToReplace = -1;
if(_RegExceedsFPRSpace(registersUsed.readFPR1) )
fprToReplace = registersUsed.readFPR1;
fprToReplace = registersUsed.readFPR1.GetRegID();
else if(_RegExceedsFPRSpace(registersUsed.readFPR2) )
fprToReplace = registersUsed.readFPR2;
fprToReplace = registersUsed.readFPR2.GetRegID();
else if (_RegExceedsFPRSpace(registersUsed.readFPR3))
fprToReplace = registersUsed.readFPR3;
fprToReplace = registersUsed.readFPR3.GetRegID();
else if (_RegExceedsFPRSpace(registersUsed.readFPR4))
fprToReplace = registersUsed.readFPR4;
fprToReplace = registersUsed.readFPR4.GetRegID();
else if(_RegExceedsFPRSpace(registersUsed.writtenFPR1) )
fprToReplace = registersUsed.writtenFPR1;
fprToReplace = registersUsed.writtenFPR1.GetRegID();
if (fprToReplace >= 0)
{
// generate mask of useable registers
uint8 useableRegisterMask = 0x7F; // lowest bit is fpr register 0
if (registersUsed.readFPR1 != -1)
useableRegisterMask &= ~(1 << (registersUsed.readFPR1));
if (registersUsed.readFPR2 != -1)
useableRegisterMask &= ~(1 << (registersUsed.readFPR2));
if (registersUsed.readFPR3 != -1)
useableRegisterMask &= ~(1 << (registersUsed.readFPR3));
if (registersUsed.readFPR4 != -1)
useableRegisterMask &= ~(1 << (registersUsed.readFPR4));
if (registersUsed.writtenFPR1 != -1)
useableRegisterMask &= ~(1 << (registersUsed.writtenFPR1));
if (registersUsed.readFPR1.IsValid())
useableRegisterMask &= ~(1 << (registersUsed.readFPR1.GetRegID()));
if (registersUsed.readFPR2.IsValid())
useableRegisterMask &= ~(1 << (registersUsed.readFPR2.GetRegID()));
if (registersUsed.readFPR3.IsValid())
useableRegisterMask &= ~(1 << (registersUsed.readFPR3.GetRegID()));
if (registersUsed.readFPR4.IsValid())
useableRegisterMask &= ~(1 << (registersUsed.readFPR4.GetRegID()));
if (registersUsed.writtenFPR1.IsValid())
useableRegisterMask &= ~(1 << (registersUsed.writtenFPR1.GetRegID()));
// get highest unused register index (0-6 range)
sint32 unusedRegisterIndex = -1;
for (sint32 f = 0; f < PPC_X64_FPR_USABLE_REGISTERS; f++)
@ -107,7 +112,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte
{
imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R;
imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN;
imlInstructionItr->op_r_name.regR = unusedRegisterIndex;
imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex);
imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex];
}
else
@ -116,14 +121,14 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte
memset(imlInstructionItr, 0x00, sizeof(IMLInstruction));
imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME;
imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN;
imlInstructionItr->op_r_name.regR = unusedRegisterIndex;
imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex);
imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace];
// name_gprToReplace = unusedRegister
imlInstructionItr = segIt->imlList.data() + (imlIndex + 3);
memset(imlInstructionItr, 0x00, sizeof(IMLInstruction));
imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R;
imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN;
imlInstructionItr->op_r_name.regR = unusedRegisterIndex;
imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex);
imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace];
// unusedRegister = name_unusedRegister
imlInstructionItr = segIt->imlList.data() + (imlIndex + 4);
@ -132,7 +137,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte
{
imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME;
imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN;
imlInstructionItr->op_r_name.regR = unusedRegisterIndex;
imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex);
imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex];
}
else
@ -190,8 +195,8 @@ ppcRecRegisterMapping_t* PPCRecompiler_findUnloadableRegister(ppcRecManageRegist
continue;
if( (unloadLockedMask&(1<<i)) != 0 )
continue;
uint32 virtualReg = rCtx->currentMapping[i].virtualReg;
bool isReserved = instructionUsedRegisters->HasFPRReg(virtualReg);
IMLRegID virtualReg = rCtx->currentMapping[i].virtualReg;
bool isReserved = instructionUsedRegisters->HasSameBaseFPRRegId(virtualReg);
if (isReserved)
continue;
if (rCtx->currentMapping[i].lastUseIndex < unloadIndexLastUse)
@ -219,22 +224,22 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon
if (idxInst.IsSuffixInstruction())
break;
idxInst.CheckRegisterUsage(&registersUsed);
sint32 fprMatch[4]; // should be IMLReg, but this code is being dropped soon anyway
sint32 fprReplace[4];
fprMatch[0] = -1; // should be IMLREG_INVALID
fprMatch[1] = -1;
fprMatch[2] = -1;
fprMatch[3] = -1;
fprReplace[0] = -1;
fprReplace[1] = -1;
fprReplace[2] = -1;
fprReplace[3] = -1;
IMLReg fprMatch[4];
IMLReg fprReplace[4];
fprMatch[0] = IMLREG_INVALID;
fprMatch[1] = IMLREG_INVALID;
fprMatch[2] = IMLREG_INVALID;
fprMatch[3] = IMLREG_INVALID;
fprReplace[0] = IMLREG_INVALID;
fprReplace[1] = IMLREG_INVALID;
fprReplace[2] = IMLREG_INVALID;
fprReplace[3] = IMLREG_INVALID;
// generate a mask of registers that we may not free
sint32 numReplacedOperands = 0;
uint32 unloadLockedMask = 0;
for (sint32 f = 0; f < 5; f++)
{
sint32 virtualFpr;
IMLReg virtualFpr;
if (f == 0)
virtualFpr = registersUsed.readFPR1;
else if (f == 1)
@ -245,12 +250,14 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon
virtualFpr = registersUsed.readFPR4;
else if (f == 4)
virtualFpr = registersUsed.writtenFPR1;
if(virtualFpr == IMLREG_INVALID)
if(virtualFpr.IsInvalid())
continue;
cemu_assert_debug(virtualFpr < 64);
cemu_assert_debug(virtualFpr.GetBaseFormat() == IMLRegFormat::F64);
cemu_assert_debug(virtualFpr.GetRegFormat() == IMLRegFormat::F64);
cemu_assert_debug(virtualFpr.GetRegID() < 64);
// check if this virtual FPR is already loaded in any real register
ppcRecRegisterMapping_t* regMapping;
if (rCtx.ppcRegToMapping[virtualFpr] == -1)
if (rCtx.ppcRegToMapping[virtualFpr.GetRegID()] == -1)
{
// not loaded
// find available register
@ -269,7 +276,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon
memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction));
imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R;
imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN;
imlInstructionTemp->op_r_name.regR = (uint8)(unloadRegMapping - rCtx.currentMapping);
imlInstructionTemp->op_r_name.regR = _FPRRegFromID((uint8)(unloadRegMapping - rCtx.currentMapping));
imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unloadRegMapping->virtualReg];
idx++;
// update mapping
@ -285,18 +292,18 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon
memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction));
imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_R_NAME;
imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN;
imlInstructionTemp->op_r_name.regR = (uint8)(regMapping-rCtx.currentMapping);
imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[virtualFpr];
imlInstructionTemp->op_r_name.regR = _FPRRegFromID((uint8)(regMapping-rCtx.currentMapping));
imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[virtualFpr.GetRegID()];
idx++;
// update mapping
regMapping->virtualReg = virtualFpr;
rCtx.ppcRegToMapping[virtualFpr] = (sint32)(regMapping - rCtx.currentMapping);
regMapping->virtualReg = virtualFpr.GetRegID();
rCtx.ppcRegToMapping[virtualFpr.GetRegID()] = (sint32)(regMapping - rCtx.currentMapping);
regMapping->lastUseIndex = rCtx.currentUseIndex;
rCtx.currentUseIndex++;
}
else
{
regMapping = rCtx.currentMapping + rCtx.ppcRegToMapping[virtualFpr];
regMapping = rCtx.currentMapping + rCtx.ppcRegToMapping[virtualFpr.GetRegID()];
regMapping->lastUseIndex = rCtx.currentUseIndex;
rCtx.currentUseIndex++;
}
@ -304,9 +311,9 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon
bool entryFound = false;
for (sint32 t = 0; t < numReplacedOperands; t++)
{
if (fprMatch[t] == virtualFpr)
if (fprMatch[t].IsValid() && fprMatch[t].GetRegID() == virtualFpr.GetRegID())
{
cemu_assert_debug(fprReplace[t] == (regMapping - rCtx.currentMapping));
cemu_assert_debug(fprReplace[t] == _FPRRegFromID(regMapping - rCtx.currentMapping));
entryFound = true;
break;
}
@ -315,7 +322,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon
{
cemu_assert_debug(numReplacedOperands != 4);
fprMatch[numReplacedOperands] = virtualFpr;
fprReplace[numReplacedOperands] = (sint32)(regMapping - rCtx.currentMapping);
fprReplace[numReplacedOperands] = _FPRRegFromID(regMapping - rCtx.currentMapping);
numReplacedOperands++;
}
}
@ -345,7 +352,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon
memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction));
imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R;
imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN;
imlInstructionTemp->op_r_name.regR = i;
imlInstructionTemp->op_r_name.regR = _FPRRegFromID(i);
imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[rCtx.currentMapping[i].virtualReg];
idx++;
}
@ -377,7 +384,7 @@ bool PPCRecompiler_trackRedundantNameLoadInstruction(ppcImlGenContext_t* ppcImlG
imlInstruction->CheckRegisterUsage(&registersUsed);
if( registersUsed.readGPR1 == registerIndex || registersUsed.readGPR2 == registerIndex || registersUsed.readGPR3 == registerIndex )
return false;
if (registersUsed.IsGPRWritten(registerIndex))
if (registersUsed.IsBaseGPRWritten(registerIndex))
return true;
}
// todo: Scan next segment(s)
@ -389,15 +396,15 @@ bool PPCRecompiler_trackRedundantNameLoadInstruction(ppcImlGenContext_t* ppcImlG
*/
bool PPCRecompiler_trackRedundantFPRNameLoadInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth)
{
sint16 registerIndex = nameStoreInstruction->op_r_name.regR;
IMLRegID regId = nameStoreInstruction->op_r_name.regR.GetRegID();
for(size_t i=startIndex; i<imlSegment->imlList.size(); i++)
{
IMLInstruction* imlInstruction = imlSegment->imlList.data() + i;
IMLUsedRegisters registersUsed;
imlInstruction->CheckRegisterUsage(&registersUsed);
if( registersUsed.readFPR1 == registerIndex || registersUsed.readFPR2 == registerIndex || registersUsed.readFPR3 == registerIndex || registersUsed.readFPR4 == registerIndex)
if( registersUsed.readFPR1.IsValidAndSameRegID(regId) || registersUsed.readFPR2.IsValidAndSameRegID(regId) || registersUsed.readFPR3.IsValidAndSameRegID(regId) || registersUsed.readFPR4.IsValidAndSameRegID(regId))
return false;
if( registersUsed.writtenFPR1 == registerIndex )
if( registersUsed.writtenFPR1.IsValidAndSameRegID(regId) )
return true;
}
// todo: Scan next segment(s)
@ -409,13 +416,13 @@ bool PPCRecompiler_trackRedundantFPRNameLoadInstruction(ppcImlGenContext_t* ppcI
*/
bool PPCRecompiler_trackRedundantNameStoreInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth)
{
sint16 registerIndex = nameStoreInstruction->op_r_name.regR;
IMLReg regR = nameStoreInstruction->op_r_name.regR;
for(sint32 i=startIndex; i>=0; i--)
{
IMLInstruction* imlInstruction = imlSegment->imlList.data() + i;
IMLUsedRegisters registersUsed;
imlInstruction->CheckRegisterUsage(&registersUsed);
if( registersUsed.IsGPRWritten(registerIndex) )
if( registersUsed.IsBaseGPRWritten(regR) )
{
if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_NAME )
return true;
@ -468,13 +475,13 @@ bool PPCRecompiler_trackOverwrittenNameStoreInstruction(ppcImlGenContext_t* ppcI
*/
bool PPCRecompiler_trackRedundantFPRNameStoreInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, IMLInstruction* nameStoreInstruction, sint32 scanDepth)
{
sint16 registerIndex = nameStoreInstruction->op_r_name.regR;
IMLRegID regId = nameStoreInstruction->op_r_name.regR.GetRegID();
for(sint32 i=startIndex; i>=0; i--)
{
IMLInstruction* imlInstruction = imlSegment->imlList.data() + i;
IMLUsedRegisters registersUsed;
imlInstruction->CheckRegisterUsage(&registersUsed);
if( registersUsed.writtenFPR1 == registerIndex )
if( registersUsed.writtenFPR1.IsValidAndSameRegID(regId))
{
if(imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME )
return true;
@ -485,8 +492,10 @@ bool PPCRecompiler_trackRedundantFPRNameStoreInstruction(ppcImlGenContext_t* ppc
return false;
}
void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, sint32 fprIndex)
void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg fprReg)
{
IMLRegID fprIndex = fprReg.GetRegID();
IMLInstruction* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad;
if (imlInstructionLoad->op_storeLoad.flags2.notExpanded)
return;
@ -504,7 +513,7 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI
if ((imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0) ||
(imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0))
{
if (imlInstruction->op_storeLoad.registerData == fprIndex)
if (imlInstruction->op_storeLoad.registerData.GetRegID() == fprIndex)
{
if (foundMatch == false)
{
@ -524,15 +533,15 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI
// check if FPR is overwritten (we can actually ignore read operations?)
imlInstruction->CheckRegisterUsage(&registersUsed);
if (registersUsed.writtenFPR1 == fprIndex)
if (registersUsed.writtenFPR1.IsValidAndSameRegID(fprIndex))
break;
if (registersUsed.readFPR1 == fprIndex)
if (registersUsed.readFPR1.IsValidAndSameRegID(fprIndex))
break;
if (registersUsed.readFPR2 == fprIndex)
if (registersUsed.readFPR2.IsValidAndSameRegID(fprIndex))
break;
if (registersUsed.readFPR3 == fprIndex)
if (registersUsed.readFPR3.IsValidAndSameRegID(fprIndex))
break;
if (registersUsed.readFPR4 == fprIndex)
if (registersUsed.readFPR4.IsValidAndSameRegID(fprIndex))
break;
}
@ -540,7 +549,7 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI
{
// insert expand instruction after store
IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, lastStore);
PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, newExpand, PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, fprIndex);
PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, newExpand, PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, _FPRRegFromID(fprIndex));
}
}
@ -574,8 +583,12 @@ void PPCRecompiler_optimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContex
}
}
void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, sint32 gprIndex)
void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg gprReg)
{
cemu_assert_debug(gprReg.GetBaseFormat() == IMLRegFormat::I64); // todo - proper handling required for non-standard sizes
cemu_assert_debug(gprReg.GetRegFormat() == IMLRegFormat::I32);
IMLRegID gprIndex = gprReg.GetRegID();
IMLInstruction* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad;
if ( imlInstructionLoad->op_storeLoad.flags2.swapEndian == false )
return;
@ -591,9 +604,9 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp
// check if GPR is stored
if ((imlInstruction->type == PPCREC_IML_TYPE_STORE && imlInstruction->op_storeLoad.copyWidth == 32 ) )
{
if (imlInstruction->op_storeLoad.registerMem == gprIndex)
if (imlInstruction->op_storeLoad.registerMem.GetRegID() == gprIndex)
break;
if (imlInstruction->op_storeLoad.registerData == gprIndex)
if (imlInstruction->op_storeLoad.registerData.GetRegID() == gprIndex)
{
IMLInstruction* imlInstructionStore = imlInstruction;
if (foundMatch == false)
@ -610,18 +623,18 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp
}
// check if GPR is accessed
imlInstruction->CheckRegisterUsage(&registersUsed);
if (registersUsed.readGPR1 == gprIndex ||
registersUsed.readGPR2 == gprIndex ||
registersUsed.readGPR3 == gprIndex)
if (registersUsed.readGPR1.IsValidAndSameRegID(gprIndex) ||
registersUsed.readGPR2.IsValidAndSameRegID(gprIndex) ||
registersUsed.readGPR3.IsValidAndSameRegID(gprIndex))
{
break;
}
if (registersUsed.IsGPRWritten(gprIndex))
if (registersUsed.IsBaseGPRWritten(gprReg))
return; // GPR overwritten, we don't need to byte swap anymore
}
if (foundMatch)
{
PPCRecompiler_insertInstruction(imlSegment, i)->make_r_r(PPCREC_IML_OP_ENDIAN_SWAP, gprIndex, gprIndex);
PPCRecompiler_insertInstruction(imlSegment, i)->make_r_r(PPCREC_IML_OP_ENDIAN_SWAP, gprReg, gprReg);
}
}
@ -650,15 +663,19 @@ void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenCont
}
}
sint32 _getGQRIndexFromRegister(ppcImlGenContext_t* ppcImlGenContext, sint32 registerIndex)
sint32 _getGQRIndexFromRegister(ppcImlGenContext_t* ppcImlGenContext, IMLReg gqrReg)
{
if (registerIndex == PPC_REC_INVALID_REGISTER)
if (gqrReg.IsInvalid())
return -1;
sint32 namedReg = ppcImlGenContext->mappedRegister[registerIndex];
sint32 namedReg = ppcImlGenContext->mappedRegister[gqrReg.GetRegID()];
if (namedReg >= (PPCREC_NAME_SPR0 + SPR_UGQR0) && namedReg <= (PPCREC_NAME_SPR0 + SPR_UGQR7))
{
return namedReg - (PPCREC_NAME_SPR0 + SPR_UGQR0);
}
else
{
cemu_assert_suspicious();
}
return -1;
}
@ -694,7 +711,7 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext)
instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 )
continue;
// get GQR value
cemu_assert_debug(instIt.op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER);
cemu_assert_debug(instIt.op_storeLoad.registerGQR.IsValid());
sint32 gqrIndex = _getGQRIndexFromRegister(ppcImlGenContext, instIt.op_storeLoad.registerGQR);
cemu_assert(gqrIndex >= 0);
if (ppcImlGenContext->tracking.modifiesGQR[gqrIndex])
@ -720,7 +737,7 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext)
else if (formatType == 7)
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0;
if (instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0)
instIt.op_storeLoad.registerGQR = PPC_REC_INVALID_REGISTER;
instIt.op_storeLoad.registerGQR = IMLREG_INVALID;
}
else if (instIt.op_storeLoad.mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1)
{
@ -735,7 +752,7 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext)
else if (formatType == 7)
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1;
if (instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1)
instIt.op_storeLoad.registerGQR = PPC_REC_INVALID_REGISTER;
instIt.op_storeLoad.registerGQR = IMLREG_INVALID;
}
}
else if (instIt.type == PPCREC_IML_TYPE_FPR_STORE || instIt.type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
@ -744,7 +761,7 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext)
instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1)
continue;
// get GQR value
cemu_assert_debug(instIt.op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER);
cemu_assert_debug(instIt.op_storeLoad.registerGQR.IsValid());
sint32 gqrIndex = _getGQRIndexFromRegister(ppcImlGenContext, instIt.op_storeLoad.registerGQR);
cemu_assert(gqrIndex >= 0 && gqrIndex < 8);
if (ppcImlGenContext->tracking.modifiesGQR[gqrIndex])
@ -769,7 +786,7 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext)
else if (formatType == 7)
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0;
if (instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0)
instIt.op_storeLoad.registerGQR = PPC_REC_INVALID_REGISTER;
instIt.op_storeLoad.registerGQR = IMLREG_INVALID;
}
else if (instIt.op_storeLoad.mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1)
{
@ -784,7 +801,7 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext)
else if (formatType == 7)
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1;
if (instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1)
instIt.op_storeLoad.registerGQR = PPC_REC_INVALID_REGISTER;
instIt.op_storeLoad.registerGQR = IMLREG_INVALID;
}
}
}

View File

@ -123,18 +123,23 @@ void PPCRecRA_identifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* iml
typedef struct
{
uint16 registerIndex;
IMLRegID registerIndex;
uint16 registerName;
}raLoadStoreInfo_t;
void PPCRecRA_insertGPRLoadInstruction(IMLSegment* imlSegment, sint32 insertIndex, sint32 registerIndex, sint32 registerName)
IMLReg _MakeNativeGPR(IMLRegID regId)
{
return IMLReg(IMLRegFormat::I64, IMLRegFormat::I64, 0, regId);
}
void PPCRecRA_insertGPRLoadInstruction(IMLSegment* imlSegment, sint32 insertIndex, IMLRegID registerIndex, sint32 registerName)
{
PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, 1);
IMLInstruction* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + 0);
memset(imlInstructionItr, 0x00, sizeof(IMLInstruction));
imlInstructionItr->type = PPCREC_IML_TYPE_R_NAME;
imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN;
imlInstructionItr->op_r_name.regR = registerIndex;
imlInstructionItr->op_r_name.regR = _MakeNativeGPR(registerIndex);
imlInstructionItr->op_r_name.name = registerName;
}
@ -147,19 +152,19 @@ void PPCRecRA_insertGPRLoadInstructions(IMLSegment* imlSegment, sint32 insertInd
IMLInstruction* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + i);
imlInstructionItr->type = PPCREC_IML_TYPE_R_NAME;
imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN;
imlInstructionItr->op_r_name.regR = (uint8)loadList[i].registerIndex;
imlInstructionItr->op_r_name.regR = _MakeNativeGPR(loadList[i].registerIndex);
imlInstructionItr->op_r_name.name = (uint32)loadList[i].registerName;
}
}
void PPCRecRA_insertGPRStoreInstruction(IMLSegment* imlSegment, sint32 insertIndex, sint32 registerIndex, sint32 registerName)
void PPCRecRA_insertGPRStoreInstruction(IMLSegment* imlSegment, sint32 insertIndex, IMLRegID registerIndex, sint32 registerName)
{
PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, 1);
IMLInstruction* imlInstructionItr = imlSegment->imlList.data() + (insertIndex + 0);
memset(imlInstructionItr, 0x00, sizeof(IMLInstruction));
imlInstructionItr->type = PPCREC_IML_TYPE_NAME_R;
imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN;
imlInstructionItr->op_r_name.regR = registerIndex;
imlInstructionItr->op_r_name.regR = _MakeNativeGPR(registerIndex);
imlInstructionItr->op_r_name.name = registerName;
}
@ -173,7 +178,7 @@ void PPCRecRA_insertGPRStoreInstructions(IMLSegment* imlSegment, sint32 insertIn
memset(imlInstructionItr, 0x00, sizeof(IMLInstruction));
imlInstructionItr->type = PPCREC_IML_TYPE_NAME_R;
imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN;
imlInstructionItr->op_r_name.regR = (uint8)storeList[i].registerIndex;
imlInstructionItr->op_r_name.regR = _MakeNativeGPR(storeList[i].registerIndex);
imlInstructionItr->op_r_name.name = (uint32)storeList[i].registerName;
}
}
@ -368,7 +373,7 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment)
raLivenessSubrange_t* _GetSubrangeByInstructionIndexAndVirtualReg(IMLSegment* imlSegment, IMLReg regToSearch, sint32 instructionIndex)
{
uint32 regId = regToSearch & 0xFF;
uint32 regId = regToSearch.GetRegID();
raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_perVirtualGPR[regId];
while (subrangeItr)
{
@ -828,7 +833,7 @@ void IMLRA_GenerateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IML
sint16 virtualReg2PhysReg[IML_RA_VIRT_REG_COUNT_MAX];
for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++)
virtualReg2PhysReg[i] = -1;
std::unordered_map<IMLReg, IMLReg> virt2PhysRegMap; // key = virtual register, value = physical register
std::unordered_map<IMLRegID, IMLRegID> virtId2PhysRegIdMap; // key = virtual register, value = physical register
IMLRALivenessTimeline livenessTimeline;
sint32 index = 0;
sint32 suffixInstructionCount = imlSegment->HasSuffixInstruction() ? 1 : 0;
@ -850,7 +855,7 @@ void IMLRA_GenerateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IML
assert_dbg();
#endif
virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister;
virt2PhysRegMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister);
virtId2PhysRegIdMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister);
}
// next
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next;
@ -866,7 +871,7 @@ void IMLRA_GenerateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IML
if (virtualReg2PhysReg[expiredRange->range->virtualRegister] == -1)
assert_dbg();
virtualReg2PhysReg[expiredRange->range->virtualRegister] = -1;
virt2PhysRegMap.erase(expiredRange->range->virtualRegister);
virtId2PhysRegIdMap.erase(expiredRange->range->virtualRegister);
// store GPR if required
// special care has to be taken to execute any stores before the suffix instruction since trailing instructions may not get executed
if (expiredRange->hasStore)
@ -900,13 +905,13 @@ void IMLRA_GenerateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IML
// update translation table
cemu_assert_debug(virtualReg2PhysReg[subrangeItr->range->virtualRegister] == -1);
virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister;
virt2PhysRegMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister);
virtId2PhysRegIdMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister);
}
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next;
}
// rewrite registers
if (index < imlSegment->imlList.size())
imlSegment->imlList[index].RewriteGPR(virt2PhysRegMap);
imlSegment->imlList[index].RewriteGPR(virtId2PhysRegIdMap);
// next iml instruction
index++;
}
@ -919,7 +924,7 @@ void IMLRA_GenerateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IML
// update translation table
cemu_assert_debug(virtualReg2PhysReg[liverange->range->virtualRegister] != -1);
virtualReg2PhysReg[liverange->range->virtualRegister] = -1;
virt2PhysRegMap.erase(liverange->range->virtualRegister);
virtId2PhysRegIdMap.erase(liverange->range->virtualRegister);
// store GPR
if (liverange->hasStore)
{
@ -951,7 +956,7 @@ void IMLRA_GenerateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IML
// update translation table
cemu_assert_debug(virtualReg2PhysReg[subrangeItr->range->virtualRegister] == -1);
virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister;
virt2PhysRegMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister);
virtId2PhysRegIdMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister);
}
// next
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next;
@ -1063,7 +1068,8 @@ void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext,
while (index < imlSegment->imlList.size())
{
imlSegment->imlList[index].CheckRegisterUsage(&gprTracking);
gprTracking.ForEachAccessedGPR([&](IMLReg gprId, bool isWritten) {
gprTracking.ForEachAccessedGPR([&](IMLReg gprReg, bool isWritten) {
IMLRegID gprId = gprReg.GetRegID();
cemu_assert_debug(gprId < IML_RA_VIRT_REG_COUNT_MAX);
imlSegment->raDistances.reg[gprId].usageStart = std::min<sint32>(imlSegment->raDistances.reg[gprId].usageStart, index); // index before/at instruction
imlSegment->raDistances.reg[gprId].usageEnd = std::max<sint32>(imlSegment->raDistances.reg[gprId].usageEnd, index + 1); // index after instruction
@ -1156,7 +1162,8 @@ void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext,
while (index < imlSegment->imlList.size())
{
imlSegment->imlList[index].CheckRegisterUsage(&gprTracking);
gprTracking.ForEachAccessedGPR([&](IMLReg gprId, bool isWritten) {
gprTracking.ForEachAccessedGPR([&](IMLReg gprReg, bool isWritten) {
IMLRegID gprId = gprReg.GetRegID();
// add location
PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[gprId], index, !isWritten, isWritten);
#ifdef CEMU_DEBUG_ASSERT

View File

@ -270,9 +270,9 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext)
accessedTempReg[4] = registersUsed.writtenFPR1;
for (sint32 f = 0; f < 5; f++)
{
if (accessedTempReg[f] == IMLREG_INVALID)
if (accessedTempReg[f].IsInvalid())
continue;
uint32 regName = ppcImlGenContext.mappedFPRRegister[accessedTempReg[f]];
uint32 regName = ppcImlGenContext.mappedFPRRegister[accessedTempReg[f].GetRegID()];
if (regName >= PPCREC_NAME_FPR0 && regName < PPCREC_NAME_FPR0 + 32)
{
segIt->ppcFPRUsed[regName - PPCREC_NAME_FPR0] = true;

View File

@ -15,15 +15,15 @@ void PPCRecompilerIml_setSegmentPoint(IMLSegmentPoint* segmentPoint, IMLSegment*
void PPCRecompilerIml_removeSegmentPoint(IMLSegmentPoint* segmentPoint);
// GPR register management
uint32 PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName);
IMLReg PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName);
// FPR register management
uint32 PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew = false);
uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName);
IMLReg PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew = false);
IMLReg PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName);
// IML instruction generation
void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet);
void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister = PPC_REC_INVALID_REGISTER);
void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, IMLReg registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet);
void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 operation, IMLReg registerResult);
// IML generation - FPU
bool PPCRecompilerImlGen_LFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);

View File

@ -53,7 +53,7 @@ IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext
return &inst;
}
void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet)
void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, IMLReg registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet)
{
if(imlInstruction == NULL)
imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
@ -70,8 +70,11 @@ void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenConte
imlInstruction->op_conditional_r_s32.bitMustBeSet = bitMustBeSet;
}
void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory1, uint8 registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian)
void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext_t* ppcImlGenContext, IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian)
{
cemu_assert_debug(registerMemory1.IsValid());
cemu_assert_debug(registerMemory2.IsValid());
cemu_assert_debug(registerDestination.IsValid());
IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
imlInstruction->type = PPCREC_IML_TYPE_LOAD_INDEXED;
imlInstruction->operation = 0;
@ -83,8 +86,11 @@ void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContex
imlInstruction->op_storeLoad.flags2.signExtend = signExtend;
}
void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory1, uint8 registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian)
void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian)
{
cemu_assert_debug(registerMemory1.IsValid());
cemu_assert_debug(registerMemory2.IsValid());
cemu_assert_debug(registerDestination.IsValid());
IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
imlInstruction->type = PPCREC_IML_TYPE_STORE_INDEXED;
imlInstruction->operation = 0;
@ -188,14 +194,14 @@ uint32 PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext_t* ppcIm
return PPC_REC_INVALID_REGISTER;
}
uint32 PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName)
IMLReg PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName)
{
uint32 loadedRegisterIndex = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, mappedName);
if (loadedRegisterIndex != PPC_REC_INVALID_REGISTER)
return loadedRegisterIndex;
return IMLReg(IMLRegFormat::I64, IMLRegFormat::I32, 0, loadedRegisterIndex);
uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, mappedName);
return registerIndex;
return IMLReg(IMLRegFormat::I64, IMLRegFormat::I32, 0, registerIndex);
}
IMLReg _GetRegGPR(ppcImlGenContext_t* ppcImlGenContext, uint32 index)
@ -225,7 +231,7 @@ IMLReg _GetRegTemporary(ppcImlGenContext_t* ppcImlGenContext, uint32 index)
// get throw-away register. Only valid for the scope of a single translated instruction
// be careful to not collide with manually loaded temporary register
uint32 _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index)
IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index)
{
cemu_assert_debug(index < 4);
return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + index);
@ -235,29 +241,29 @@ uint32 _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index)
* Loads a PPC fpr into any of the available IML FPU registers
* If loadNew is false, it will check first if the fpr is already loaded into any IML register
*/
uint32 PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew)
IMLReg PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew)
{
if( loadNew == false )
{
uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName);
if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER )
return loadedRegisterIndex;
return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, loadedRegisterIndex);
}
uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName);
return registerIndex;
return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, registerIndex);
}
/*
* Checks if a PPC fpr register is already loaded into any IML register
* If no, it will create a new undefined temporary IML FPU register and map the name (effectively overwriting the old ppc register)
* If not, it will create a new undefined temporary IML FPU register and map the name (effectively overwriting the old ppc register)
*/
uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName)
IMLReg PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName)
{
uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName);
if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER )
return loadedRegisterIndex;
return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, loadedRegisterIndex);
uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName);
return registerIndex;
return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, registerIndex);
}
bool PPCRecompiler_canInlineFunction(MPTR functionPtr, sint32* functionInstructionCount)
@ -334,16 +340,16 @@ void PPCRecompiler_generateInlinedCode(ppcImlGenContext_t* ppcImlGenContext, uin
}
// for handling RC bit of many instructions
void PPCImlGen_UpdateCR0(ppcImlGenContext_t* ppcImlGenContext, uint32 registerR)
void PPCImlGen_UpdateCR0(ppcImlGenContext_t* ppcImlGenContext, IMLReg regR)
{
IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_LT);
IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_GT);
IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_EQ);
// todo - SO bit?
// todo - SO bit
ppcImlGenContext->emitInst().make_compare_s32(registerR, 0, crBitRegLT, IMLCondition::SIGNED_LT);
ppcImlGenContext->emitInst().make_compare_s32(registerR, 0, crBitRegGT, IMLCondition::SIGNED_GT);
ppcImlGenContext->emitInst().make_compare_s32(registerR, 0, crBitRegEQ, IMLCondition::EQ);
ppcImlGenContext->emitInst().make_compare_s32(regR, 0, crBitRegLT, IMLCondition::SIGNED_LT);
ppcImlGenContext->emitInst().make_compare_s32(regR, 0, crBitRegGT, IMLCondition::SIGNED_GT);
ppcImlGenContext->emitInst().make_compare_s32(regR, 0, crBitRegEQ, IMLCondition::EQ);
//ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, crBitRegSO, 0); // todo - copy from XER
@ -355,7 +361,7 @@ void PPCRecompilerImlGen_TW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
// split before and after to make sure the macro is in an isolated segment that we can make enterable
PPCIMLGen_CreateSplitSegmentAtEnd(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock);
ppcImlGenContext->currentOutputSegment->SetEnterable(ppcImlGenContext->ppcAddressOfCurrentInstruction);
PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext)->make_macro(PPCREC_IML_MACRO_LEAVE, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0);
PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext)->make_macro(PPCREC_IML_MACRO_LEAVE, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0, IMLREG_INVALID);
IMLSegment* middleSeg = PPCIMLGen_CreateSplitSegmentAtEnd(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock);
middleSeg->SetLinkBranchTaken(nullptr);
middleSeg->SetLinkBranchNotTaken(nullptr);
@ -369,12 +375,12 @@ bool PPCRecompilerImlGen_MTSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
IMLReg gprReg = _GetRegGPR(ppcImlGenContext, rD);
if (spr == SPR_CTR || spr == SPR_LR)
{
uint32 sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr);
IMLReg sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr);
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, sprReg, gprReg);
}
else if (spr >= SPR_UGQR0 && spr <= SPR_UGQR7)
{
uint32 sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr);
IMLReg sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr);
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, sprReg, gprReg);
ppcImlGenContext->tracking.modifiesGQR[spr - SPR_UGQR0] = true;
}
@ -391,12 +397,12 @@ bool PPCRecompilerImlGen_MFSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
IMLReg gprReg = _GetRegGPR(ppcImlGenContext, rD);
if (spr == SPR_LR || spr == SPR_CTR)
{
uint32 sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr);
IMLReg sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr);
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprReg, sprReg);
}
else if (spr >= SPR_UGQR0 && spr <= SPR_UGQR7)
{
uint32 sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr);
IMLReg sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr);
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprReg, sprReg);
}
else
@ -417,7 +423,7 @@ bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
{
// TBL / TBU
uint32 param2 = spr | (rD << 16);
ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_MFTB, ppcImlGenContext->ppcAddressOfCurrentInstruction, param2, 0);
ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_MFTB, ppcImlGenContext->ppcAddressOfCurrentInstruction, param2, 0, IMLREG_INVALID);
IMLSegment* middleSeg = PPCIMLGen_CreateSplitSegmentAtEnd(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock);
return true;
@ -515,14 +521,14 @@ bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
if( opcode&PPC_OPC_LK )
{
// function call
ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch);
ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch, IMLREG_INVALID);
return true;
}
// is jump destination within recompiled function?
if (ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest))
ppcImlGenContext->emitInst().make_jump();
else
ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch);
ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch, IMLREG_INVALID);
return true;
}
@ -564,7 +570,7 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock;
IMLSegment* blSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock);
ppcImlGenContext->emitInst().make_conditional_jump(regCRBit, conditionMustBeTrue);
blSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch);
blSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch, IMLREG_INVALID);
return true;
}
return false;
@ -623,18 +629,18 @@ bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
if (!BO.conditionIgnore())
regCRBit = _GetRegCR(ppcImlGenContext, crRegister, crBit);
uint32 branchDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + sprReg);
IMLReg branchDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + sprReg);
if (LK)
{
if (sprReg == SPR_LR)
{
// if the branch target is LR, then preserve it in a temporary
cemu_assert_suspicious(); // this case needs testing
uint32 tmpRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY);
IMLReg tmpRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY);
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, tmpRegister, branchDestReg);
branchDestReg = tmpRegister;
}
uint32 registerLR = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_LR);
IMLReg registerLR = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_LR);
ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerLR, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4);
}
@ -651,14 +657,14 @@ bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock;
IMLSegment* bctrSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock);
ppcImlGenContext->emitInst().make_conditional_jump(regCRBit, !BO.conditionInverted());
bctrSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_B_TO_REG, branchDestReg, 0, 0);
bctrSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_B_TO_REG, 0, 0, 0, branchDestReg);
}
else
{
// branch always, no condition and no decrementer check
cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasContinuedFlow);
cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasBranchTarget);
ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_TO_REG, branchDestReg, 0, 0);
ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_TO_REG, 0, 0, 0, branchDestReg);
}
return true;
}
@ -879,9 +885,9 @@ bool PPCRecompilerImlGen_MULLI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
int rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
uint32 registerResult = _GetRegGPR(ppcImlGenContext, rD);
uint32 registerOperand = _GetRegGPR(ppcImlGenContext, rA);
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_MULTIPLY_SIGNED, registerResult, registerOperand, (sint32)imm);
IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_MULTIPLY_SIGNED, regD, regA, (sint32)imm);
return true;
}
@ -889,17 +895,16 @@ bool PPCRecompilerImlGen_MULLW(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
{
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
//hCPU->gpr[rD] = hCPU->gpr[rA] * hCPU->gpr[rB];
uint32 registerResult = _GetRegGPR(ppcImlGenContext, rD);
uint32 registerOperand1 = _GetRegGPR(ppcImlGenContext, rA);
uint32 registerOperand2 = _GetRegGPR(ppcImlGenContext, rB);
IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
if (opcode & PPC_OPC_OE)
{
return false;
}
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_SIGNED, registerResult, registerOperand1, registerOperand2);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_SIGNED, regD, regA, regB);
if (opcode & PPC_OPC_RC)
PPCImlGen_UpdateCR0(ppcImlGenContext, registerResult);
PPCImlGen_UpdateCR0(ppcImlGenContext, regD);
return true;
}
@ -907,12 +912,12 @@ bool PPCRecompilerImlGen_MULHW(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
{
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
uint32 registerResult = _GetRegGPR(ppcImlGenContext, rD);
uint32 registerOperand1 = _GetRegGPR(ppcImlGenContext, rA);
uint32 registerOperand2 = _GetRegGPR(ppcImlGenContext, rB);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, registerResult, registerOperand1, registerOperand2);
IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, regD, regA, regB);
if (opcode & PPC_OPC_RC)
PPCImlGen_UpdateCR0(ppcImlGenContext, registerResult);
PPCImlGen_UpdateCR0(ppcImlGenContext, regD);
return true;
}
@ -920,12 +925,12 @@ bool PPCRecompilerImlGen_MULHWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
{
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
uint32 registerResult = _GetRegGPR(ppcImlGenContext, rD);
uint32 registerOperand1 = _GetRegGPR(ppcImlGenContext, rA);
uint32 registerOperand2 = _GetRegGPR(ppcImlGenContext, rB);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, registerResult, registerOperand1, registerOperand2);
IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, regD, regA, regB);
if (opcode & PPC_OPC_RC)
PPCImlGen_UpdateCR0(ppcImlGenContext, registerResult);
PPCImlGen_UpdateCR0(ppcImlGenContext, regD);
return true;
}
@ -933,12 +938,12 @@ bool PPCRecompilerImlGen_DIVW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
{
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
uint32 registerResult = _GetRegGPR(ppcImlGenContext, rD);
uint32 registerOperand1 = _GetRegGPR(ppcImlGenContext, rA);
uint32 registerOperand2 = _GetRegGPR(ppcImlGenContext, rB);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_SIGNED, registerResult, registerOperand1, registerOperand2);
IMLReg regR = _GetRegGPR(ppcImlGenContext, rD);
IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_SIGNED, regR, regA, regB);
if (opcode & PPC_OPC_RC)
PPCImlGen_UpdateCR0(ppcImlGenContext, registerResult);
PPCImlGen_UpdateCR0(ppcImlGenContext, regR);
return true;
}
@ -946,13 +951,12 @@ bool PPCRecompilerImlGen_DIVWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
{
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
// hCPU->gpr[rD] = (uint32)a / (uint32)b;
uint32 registerResult = _GetRegGPR(ppcImlGenContext, rD);
uint32 registerOperand1 = _GetRegGPR(ppcImlGenContext, rA);
uint32 registerOperand2 = _GetRegGPR(ppcImlGenContext, rB);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_UNSIGNED, registerResult, registerOperand1, registerOperand2);
IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_UNSIGNED, regD, regA, regB);
if (opcode & PPC_OPC_RC)
PPCImlGen_UpdateCR0(ppcImlGenContext, registerResult);
PPCImlGen_UpdateCR0(ppcImlGenContext, regD);
return true;
}
@ -962,30 +966,30 @@ bool PPCRecompilerImlGen_RLWINM(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
PPC_OPC_TEMPL_M(opcode, rS, rA, SH, MB, ME);
uint32 mask = ppc_mask(MB, ME);
uint32 registerRS = _GetRegGPR(ppcImlGenContext, rS);
uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
if( ME == (31-SH) && MB == 0 )
{
// SLWI
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, registerRA, registerRS, SH);
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, regA, regS, SH);
}
else if( SH == (32-MB) && ME == 31 )
{
// SRWI
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, registerRA, registerRS, MB);
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, regA, regS, MB);
}
else
{
// general handler
if (registerRA != registerRS)
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerRA, registerRS);
if (rA != rS)
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regA, regS);
if (SH != 0)
ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_LEFT_ROTATE, registerRA, SH);
ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_LEFT_ROTATE, regA, SH);
if (mask != 0xFFFFFFFF)
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, registerRA, registerRA, (sint32)mask);
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regA, regA, (sint32)mask);
}
if (opcode & PPC_OPC_RC)
PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA);
PPCImlGen_UpdateCR0(ppcImlGenContext, regA);
return true;
}
@ -994,13 +998,13 @@ bool PPCRecompilerImlGen_RLWIMI(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
int rS, rA, SH, MB, ME;
PPC_OPC_TEMPL_M(opcode, rS, rA, SH, MB, ME);
uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
IMLReg regS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
// pack RLWIMI parameters into single integer
uint32 vImm = MB|(ME<<8)|(SH<<16);
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RLWIMI, registerRA, registerRS, (sint32)vImm);
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RLWIMI, regA, regS, (sint32)vImm);
if (opcode & PPC_OPC_RC)
PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA);
PPCImlGen_UpdateCR0(ppcImlGenContext, regA);
return true;
}
@ -1009,14 +1013,14 @@ bool PPCRecompilerImlGen_RLWNM(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
sint32 rS, rA, rB, MB, ME;
PPC_OPC_TEMPL_M(opcode, rS, rA, rB, MB, ME);
uint32 mask = ppc_mask(MB, ME);
uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB);
uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_LEFT_ROTATE, registerRA, registerRS, registerRB);
IMLReg regS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB);
IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_LEFT_ROTATE, regA, regS, regB);
if( mask != 0xFFFFFFFF )
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, registerRA, registerRA, (sint32)mask);
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regA, regA, (sint32)mask);
if (opcode & PPC_OPC_RC)
PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA);
PPCImlGen_UpdateCR0(ppcImlGenContext, regA);
return true;
}
@ -1026,39 +1030,39 @@ bool PPCRecompilerImlGen_SRAW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
// but only shifts up to register bitwidth minus one are well defined in IML so this requires special handling for shifts >= 32
sint32 rS, rA, rB;
PPC_OPC_TEMPL_X(opcode, rS, rA, rB);
uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB);
uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
uint32 registerCarry = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
IMLReg regS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS);
IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB);
IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
IMLReg regCarry = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
uint32 registerTmpShiftAmount = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
uint32 registerTmpCondBool = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1);
uint32 registerTmp1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 2);
uint32 registerTmp2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 3);
IMLReg regTmpShiftAmount = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
IMLReg regTmpCondBool = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1);
IMLReg regTmp1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 2);
IMLReg regTmp2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 3);
// load masked shift factor into temporary register
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, registerTmpShiftAmount, registerRB, 0x3F);
ppcImlGenContext->emitInst().make_compare_s32(registerTmpShiftAmount, 32, registerTmpCondBool, IMLCondition::UNSIGNED_GT);
ppcImlGenContext->emitInst().make_conditional_jump(registerTmpCondBool, true);
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regTmpShiftAmount, regB, 0x3F);
ppcImlGenContext->emitInst().make_compare_s32(regTmpShiftAmount, 32, regTmpCondBool, IMLCondition::UNSIGNED_GT);
ppcImlGenContext->emitInst().make_conditional_jump(regTmpCondBool, true);
PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock,
[&](ppcImlGenContext_t& genCtx)
{
/* branch taken */
genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_RIGHT_SHIFT_S, registerRA, registerRS, registerTmpShiftAmount);
genCtx.emitInst().make_compare_s32(registerRA, 0, registerCarry, IMLCondition::NEQ); // if the sign bit is still set it also means it was shifted out and we can set carry
genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_RIGHT_SHIFT_S, regA, regS, regTmpShiftAmount);
genCtx.emitInst().make_compare_s32(regA, 0, regCarry, IMLCondition::NEQ); // if the sign bit is still set it also means it was shifted out and we can set carry
},
[&](ppcImlGenContext_t& genCtx)
{
/* branch not taken, shift size below 32 */
genCtx.emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, registerTmp1, registerRS, 31); // signMask = input >> 31 (arithmetic shift)
genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerTmp2, 1); // shiftMask = ((1<<SH)-1)
genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_LEFT_SHIFT, registerTmp2, registerTmp2, registerTmpShiftAmount);
genCtx.emitInst().make_r_r_s32(PPCREC_IML_OP_SUB, registerTmp2, registerTmp2, 1);
genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_AND, registerTmp1, registerTmp1, registerTmp2); // signMask & shiftMask & input
genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_AND, registerTmp1, registerTmp1, registerRS);
genCtx.emitInst().make_compare_s32(registerTmp1, 0, registerCarry, IMLCondition::NEQ);
genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_RIGHT_SHIFT_S, registerRA, registerRS, registerTmpShiftAmount);
genCtx.emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, regTmp1, regS, 31); // signMask = input >> 31 (arithmetic shift)
genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regTmp2, 1); // shiftMask = ((1<<SH)-1)
genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_LEFT_SHIFT, regTmp2, regTmp2, regTmpShiftAmount);
genCtx.emitInst().make_r_r_s32(PPCREC_IML_OP_SUB, regTmp2, regTmp2, 1);
genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_AND, regTmp1, regTmp1, regTmp2); // signMask & shiftMask & input
genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_AND, regTmp1, regTmp1, regS);
genCtx.emitInst().make_compare_s32(regTmp1, 0, regCarry, IMLCondition::NEQ);
genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_RIGHT_SHIFT_S, regA, regS, regTmpShiftAmount);
}
);
return true;
@ -1072,19 +1076,19 @@ bool PPCRecompilerImlGen_SRAWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
cemu_assert_debug(SH < 32);
if (SH == 0)
return false; // becomes a no-op (unless RC bit is set) but also sets ca bit to 0?
uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS);
uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA);
uint32 registerCarry = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
uint32 registerTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
IMLReg regS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS);
IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA);
IMLReg regCarry = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
IMLReg regTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
// calculate CA first
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, registerTmp, registerRS, 31); // signMask = input >> 31 (arithmetic shift)
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, registerTmp, registerTmp, registerRS); // testValue = input & signMask & ((1<<SH)-1)
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, registerTmp, registerTmp, ((1 << SH) - 1));
ppcImlGenContext->emitInst().make_compare_s32(registerTmp, 0, registerCarry, IMLCondition::NEQ); // ca = (testValue != 0)
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, regTmp, regS, 31); // signMask = input >> 31 (arithmetic shift)
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regTmp, regTmp, regS); // testValue = input & signMask & ((1<<SH)-1)
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regTmp, regTmp, ((1 << SH) - 1));
ppcImlGenContext->emitInst().make_compare_s32(regTmp, 0, regCarry, IMLCondition::NEQ); // ca = (testValue != 0)
// do the actual shift
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, registerRA, registerRS, (sint32)SH);
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, regA, regS, (sint32)SH);
if (opcode & PPC_OPC_RC)
PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA);
PPCImlGen_UpdateCR0(ppcImlGenContext, regA);
return true;
}
@ -1093,12 +1097,12 @@ bool PPCRecompilerImlGen_SLW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode
int rS, rA, rB;
PPC_OPC_TEMPL_X(opcode, rS, rA, rB);
IMLReg registerRS = _GetRegGPR(ppcImlGenContext, rS);
IMLReg registerRB = _GetRegGPR(ppcImlGenContext, rB);
IMLReg registerRA = _GetRegGPR(ppcImlGenContext, rA);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SLW, registerRA, registerRS, registerRB);
IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SLW, regA, regS, regB);
if ((opcode & PPC_OPC_RC))
PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA);
PPCImlGen_UpdateCR0(ppcImlGenContext, regA);
return true;
}
@ -1208,7 +1212,7 @@ bool PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uint
regB = regA;
regA = IMLREG_INVALID;
}
if(regA != IMLREG_INVALID)
if(regA.IsValid())
PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, regDst, regA, regB, bitWidth, signExtend, isBigEndian);
else
ppcImlGenContext->emitInst().make_r_memory(regDst, regB, 0, bitWidth, signExtend, isBigEndian);
@ -1235,7 +1239,7 @@ bool PPCRecompilerImlGen_STORE(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
IMLReg regD = _GetRegGPR(ppcImlGenContext, rD);
if (updateAddrReg)
{
if (regD == regA)
if (rD == rA)
{
// make sure to keep source data intact
regD = _GetRegTemporary(ppcImlGenContext, 0);
@ -1270,7 +1274,7 @@ bool PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uin
regB = regA;
regA = IMLREG_INVALID;
}
if (regA == IMLREG_INVALID)
if (regA.IsInvalid())
ppcImlGenContext->emitInst().make_memory_r(regSrc, regB, 0, bitWidth, isBigEndian);
else
PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, regSrc, regA, regB, bitWidth, false, isBigEndian);
@ -1405,7 +1409,7 @@ bool PPCRecompilerImlGen_LWARX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
IMLReg regMemResEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_EA);
IMLReg regMemResVal = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_VAL);
// calculate EA
if (regA != IMLREG_INVALID)
if (regA.IsValid())
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regMemResEA, regA, regB);
else
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regMemResEA, regB);
@ -1426,7 +1430,7 @@ bool PPCRecompilerImlGen_STWCX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
IMLReg regTmpCompareBE = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 3);
// calculate EA
IMLReg regCalcEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY);
if (regA != IMLREG_INVALID)
if (regA.IsValid())
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regCalcEA, regA, regB);
else
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regCalcEA, regB);
@ -1466,7 +1470,7 @@ bool PPCRecompilerImlGen_STWCX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
// I found contradictory information of whether the reservation is cleared in all cases, so unit testing would be required
// Most sources state that it is cleared on successful store. They don't explicitly mention what happens on failure
// "The PowerPC 600 series, part 7: Atomic memory access and cache coherency" states that it is always cleared
// There may also be different behavior between individual PPC generations
// There may also be different behavior between individual PPC architectures
ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regMemResEA, 0);
ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regMemResVal, 0);
@ -1479,8 +1483,8 @@ bool PPCRecompilerImlGen_DCBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
rA = (opcode>>16)&0x1F;
rB = (opcode>>11)&0x1F;
// prepare registers
uint32 gprRegisterA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA):0;
uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB);
IMLReg gprRegisterA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA):IMLREG_INVALID;
IMLReg gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB);
// store
if( rA != 0 )
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_DCBZ, gprRegisterA, gprRegisterB);
@ -1496,7 +1500,7 @@ bool PPCRecompilerImlGen_OR_NOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
if(regS == regB) // check for MR mnemonic
if(rS == rB) // check for MR mnemonic
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regA, regS);
else
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regA, regS, regB);
@ -1515,7 +1519,7 @@ bool PPCRecompilerImlGen_ORC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode
IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0);
sint32 regA = _GetRegGPR(ppcImlGenContext, rA);
IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regB);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regA, regS, regTmp);
if (opcode & PPC_OPC_RC)
@ -1549,7 +1553,7 @@ bool PPCRecompilerImlGen_ANDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0);
sint32 regA = _GetRegGPR(ppcImlGenContext, rA);
IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regB);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regA, regS, regTmp);
if (opcode & PPC_OPC_RC)
@ -1717,7 +1721,7 @@ bool PPCRecompilerImlGen_CREQV(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
bool PPCRecompilerImlGen_HLE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
uint32 hleFuncId = opcode&0xFFFF;
ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_HLE, ppcImlGenContext->ppcAddressOfCurrentInstruction, hleFuncId, 0);
ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_HLE, ppcImlGenContext->ppcAddressOfCurrentInstruction, hleFuncId, 0, IMLREG_INVALID);
return true;
}
@ -2931,7 +2935,7 @@ void PPCRecompiler_HandleCycleCheckCount(ppcImlGenContext_t& ppcImlGenContext, P
IMLSegment* exitSegment = ppcImlGenContext.NewSegment();
splitSeg->SetLinkBranchTaken(exitSegment);
exitSegment->AppendInstruction()->make_macro(PPCREC_IML_MACRO_LEAVE, basicBlockInfo.startAddress, 0, 0);
exitSegment->AppendInstruction()->make_macro(PPCREC_IML_MACRO_LEAVE, basicBlockInfo.startAddress, 0, 0, IMLREG_INVALID);
}
void PPCRecompiler_SetSegmentsUncertainFlow(ppcImlGenContext_t& ppcImlGenContext)

File diff suppressed because it is too large Load Diff