PPCRec: New compare and cond jump instrs, update RA

Storing the condition result in a register instead of imitating PPC CR lets us simplify the backend a lot. Only implemented as PoC for BDZ/BDNZ so far.
This commit is contained in:
Exzap 2022-12-17 21:06:27 +01:00
parent ac22a38d68
commit 91f972753e
19 changed files with 609 additions and 323 deletions

View File

@ -8,13 +8,44 @@
#include "util/MemMapper/MemMapper.h"
#include "Common/cpu_features.h"
static x86Assembler64::GPR32 _reg32(sint8 physRegId)
{
return (x86Assembler64::GPR32)physRegId;
}
static x86Assembler64::GPR8_REX _reg8(sint8 physRegId)
{
return (x86Assembler64::GPR8_REX)physRegId;
}
static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId)
{
return (x86Assembler64::GPR32)regId;
}
X86Cond _x86Cond(IMLCondition imlCond)
{
switch (imlCond)
{
case IMLCondition::EQ:
return X86_CONDITION_Z;
case IMLCondition::NEQ:
return X86_CONDITION_NZ;
default:
break;
}
cemu_assert_suspicious();
return X86_CONDITION_Z;
}
/*
* Remember current instruction output offset for reloc
* The instruction generated after this method has been called will be adjusted
*/
void PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext_t* x64GenContext, void* extraInfo = nullptr)
{
x64GenContext->relocateOffsetTable2.emplace_back(x64GenContext->codeBufferIndex, extraInfo);
x64GenContext->relocateOffsetTable2.emplace_back(x64GenContext->emitter->GetWriteIndex(), extraInfo);
}
/*
@ -37,7 +68,7 @@ void PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction_t* PPCRecFunctio
void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, sint32 jumpInstructionOffset, sint32 destinationOffset)
{
uint8* instructionData = x64GenContext->codeBuffer + jumpInstructionOffset;
uint8* instructionData = x64GenContext->emitter->GetBufferPtr() + jumpInstructionOffset;
if (instructionData[0] == 0x0F && (instructionData[1] >= 0x80 && instructionData[1] <= 0x8F))
{
// far conditional jump
@ -241,7 +272,7 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction,
x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_R13, (uint64)memory_base);
// check if cycles where decreased beyond zero, if yes -> leave recompiler
x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative
sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_CARRY, 0);
//x64Gen_int3(x64GenContext);
//x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RDX, ppcAddress);
@ -254,7 +285,7 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction,
//// JMP [recompilerCallTable+EAX/4*8]
//x64Gen_int3(x64GenContext);
x64Gen_jmp_memReg64(x64GenContext, X86_REG_RAX, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable));
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
// check if instruction pointer was changed
// assign new instruction pointer to EAX
x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_RAX, X86_REG_RSP, offsetof(PPCInterpreter_t, instructionPointer));
@ -537,7 +568,7 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction,
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
// realRegisterMem now holds EA
x64Gen_cmp_reg64Low32_mem32reg64(x64GenContext, realRegisterMem, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemAddr));
sint32 jumpInstructionOffsetJumpToEnd = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffsetJumpToEnd = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_EQUAL, 0);
// EA matches reservation
// backup EAX (since it's an explicit operand of CMPXCHG and will be overwritten)
@ -569,7 +600,7 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction,
x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), 31);
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_SO));
// end
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffsetJumpToEnd, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffsetJumpToEnd, x64GenContext->emitter->GetWriteIndex());
}
else
return false;
@ -690,16 +721,16 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
else
{
x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerA, imlInstruction->op_r_r.registerA);
sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0);
x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult);
x64Gen_add_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, 32-1);
sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, 32);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
}
}
else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED )
@ -909,21 +940,21 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction,
}
x64Gen_add_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32);
}
else if( imlInstruction->operation == PPCREC_IML_OP_SUB )
{
// registerResult -= immS32
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
if (imlInstruction->crRegister == PPCREC_CR_REG_TEMP)
{
// do nothing -> SUB is for BDNZ instruction
}
else if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
{
// update cr register
assert_dbg();
}
x64Gen_sub_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32);
}
//else if( imlInstruction->operation == PPCREC_IML_OP_SUB )
//{
// // registerResult -= immS32
// PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
// if (imlInstruction->crRegister == PPCREC_CR_REG_TEMP)
// {
// // do nothing -> SUB is for BDNZ instruction
// }
// else if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
// {
// // update cr register
// assert_dbg();
// }
// x64Gen_sub_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32);
//}
else if( imlInstruction->operation == PPCREC_IML_OP_AND )
{
// registerResult &= immS32
@ -1349,11 +1380,11 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
x64Gen_not_reg64Low32(x64GenContext, rRegOperandA);
x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperandA);
x64Gen_not_reg64Low32(x64GenContext, rRegOperandA);
sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0);
// reset carry flag + jump destination afterwards
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
// OR ((~a+b+1)<1) == true -> ca = 1
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperandA);
// todo: Optimize by reusing result in REG_RESV_TEMP from above and only add 1
@ -1361,11 +1392,11 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperandB);
x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1);
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1);
sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0);
// reset carry flag + jump destination afterwards
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
// do subtraction
if( rRegOperandB == rRegOperandA )
{
@ -1419,7 +1450,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
for (sint32 b = 0; b < 6; b++)
{
x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1 << b));
sint32 jumpInstructionOffset = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set
if (b == 5)
{
@ -1432,7 +1463,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
else
x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1 << b));
}
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex());
}
x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP);
}
@ -1475,10 +1506,10 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
for(sint32 b=0; b<5; b++)
{
x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1<<b));
sint32 jumpInstructionOffset = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set
x64Gen_rol_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<<b));
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex());
}
x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP);
}
@ -1512,15 +1543,13 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0);
// we use the same shift by register approach as in SLW/SRW, but we have to differentiate by signed/unsigned shift since it influences how the carry flag is set
x64Gen_test_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 0x80000000);
sint32 jumpInstructionJumpToSignedShift = x64GenContext->codeBufferIndex;
sint32 jumpInstructionJumpToSignedShift = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NOT_EQUAL, 0);
//sint32 jumpInstructionJumpToEnd = x64GenContext->codeBufferIndex;
//x64Gen_jmpc(x64GenContext, X86_CONDITION_EQUAL, 0);
// unsigned shift (MSB of input register is not set)
for(sint32 b=0; b<6; b++)
{
x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1<<b));
sint32 jumpInstructionOffset = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set
if( b == 5 )
{
@ -1531,24 +1560,24 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
{
x64Gen_sar_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<<b));
}
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex());
}
sint32 jumpInstructionJumpToEnd = x64GenContext->codeBufferIndex;
sint32 jumpInstructionJumpToEnd = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NONE, 0);
// signed shift
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToSignedShift, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToSignedShift, x64GenContext->emitter->GetWriteIndex());
for(sint32 b=0; b<6; b++)
{
// check if we need to shift by (1<<bit)
x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1<<b));
sint32 jumpInstructionOffset = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set
// set ca if any non-zero bit is shifted out
x64Gen_test_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (1<<(1<<b))-1);
sint32 jumpInstructionJumpToAfterCa = x64GenContext->codeBufferIndex;
sint32 jumpInstructionJumpToAfterCa = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if no bit is set
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToAfterCa, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToAfterCa, x64GenContext->emitter->GetWriteIndex());
// arithmetic shift
if( b == 5 )
{
@ -1560,10 +1589,10 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
{
x64Gen_sar_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<<b));
}
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex());
}
// end
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToEnd, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToEnd, x64GenContext->emitter->GetWriteIndex());
x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP);
// update CR if requested
// todo
@ -1693,22 +1722,67 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
return true;
}
bool PPCRecompilerX64Gen_imlInstruction_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
auto regR = _reg8(imlInstruction->op_compare.registerResult);
auto regA = _reg32(imlInstruction->op_compare.registerOperandA);
auto regB = _reg32(imlInstruction->op_compare.registerOperandB);
X86Cond cond = _x86Cond(imlInstruction->op_compare.cond);
x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); // zero bytes unaffected by SETcc
x64GenContext->emitter->CMP_dd(regA, regB);
x64GenContext->emitter->SETcc_b(cond, regR);
return true;
}
bool PPCRecompilerX64Gen_imlInstruction_compare_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
auto regR = _reg8(imlInstruction->op_compare_s32.registerResult);
auto regA = _reg32(imlInstruction->op_compare_s32.registerOperandA);
sint32 imm = imlInstruction->op_compare_s32.immS32;
X86Cond cond = _x86Cond(imlInstruction->op_compare_s32.cond);
x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); // zero bytes unaffected by SETcc
x64GenContext->emitter->CMP_di32(regA, imm);
x64GenContext->emitter->SETcc_b(cond, regR);
return true;
}
bool PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment)
{
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
auto regBool = _reg8(imlInstruction->op_conditionalJump2.registerBool);
bool mustBeTrue = imlInstruction->op_conditionalJump2.mustBeTrue;
x64GenContext->emitter->TEST_bb(regBool, regBool);
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
x64GenContext->emitter->Jcc_j32(mustBeTrue ? X86_CONDITION_NZ : X86_CONDITION_Z, 0);
return true;
}
bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
sint32 regResult = imlInstruction->op_r_r_s32.registerResult;
sint32 regOperand = imlInstruction->op_r_r_s32.registerA;
uint32 immS32 = imlInstruction->op_r_r_s32.immS32;
if( imlInstruction->operation == PPCREC_IML_OP_ADD )
{
// registerResult = registerOperand + immS32
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult;
sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA;
uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32;
if( rRegResult != rRegOperand )
{
// copy value to destination register before doing addition
x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand);
}
x64Gen_add_reg64Low32_imm32(x64GenContext, rRegResult, (uint32)immU32);
if(regResult != regOperand)
x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand);
x64Gen_add_reg64Low32_imm32(x64GenContext, regResult, (uint32)immU32);
}
else if (imlInstruction->operation == PPCREC_IML_OP_SUB)
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
if (regResult != regOperand)
x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand);
x64Gen_sub_reg64Low32_imm32(x64GenContext, regResult, immS32);
}
else if( imlInstruction->operation == PPCREC_IML_OP_ADD_UPDATE_CARRY )
{
@ -1733,9 +1807,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
assert_dbg();
}
sint32 crRegister = imlInstruction->crRegister;
//x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT));
//x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT));
//x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ));
PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction);
}
}
@ -1761,11 +1832,11 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
x64Gen_not_reg64Low32(x64GenContext, rRegOperand);
x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperand);
x64Gen_not_reg64Low32(x64GenContext, rRegOperand);
sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0);
// reset carry flag + jump destination afterwards
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
// OR ((~a+b+1)<1) == true -> ca = 1
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand);
// todo: Optimize by reusing result in REG_RESV_TEMP from above and only add 1
@ -1773,11 +1844,11 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)immS32);
x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1);
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1);
sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0);
// reset carry flag + jump destination afterwards
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
// do actual computation of value, note: a - b is equivalent to a + ~b + 1
x64Gen_not_reg64Low32(x64GenContext, rRegResult);
x64Gen_add_reg64Low32_imm32(x64GenContext, rRegResult, (uint32)immS32 + 1);
@ -1839,12 +1910,12 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
// SAR registerResult, SH
x64Gen_sar_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, sh);
// JNS <skipInstruction> (if sign not set)
sint32 jumpInstructionOffset = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGN, 0); // todo: Can use 2-byte form of jump instruction here
// MOV BYTE [ESP+xer_ca], 0
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0);
// jump destination
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex());
// CR update
if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER)
{
@ -2147,9 +2218,6 @@ uint8* PPCRecompilerX86_allocateExecutableMemory(sint32 size)
bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext)
{
x64GenContext_t x64GenContext{};
x64GenContext.codeBufferSize = 1024;
x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize);
x64GenContext.codeBufferIndex = 0;
x64GenContext.activeCRRegister = PPC_REC_INVALID_REGISTER;
// generate iml instruction code
@ -2157,7 +2225,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
{
x64GenContext.currentSegment = segIt;
segIt->x64Offset = x64GenContext.codeBufferIndex;
segIt->x64Offset = x64GenContext.emitter->GetWriteIndex();
for(size_t i=0; i<segIt->imlList.size(); i++)
{
IMLInstruction* imlInstruction = segIt->imlList.data() + i;
@ -2198,9 +2266,24 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
codeGenerationFailed = true;
}
}
else if( imlInstruction->type == PPCREC_IML_TYPE_R_R_R )
else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R)
{
if( PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false )
if (PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
{
codeGenerationFailed = true;
}
}
else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE)
{
PPCRecompilerX64Gen_imlInstruction_compare(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
}
else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32)
{
PPCRecompilerX64Gen_imlInstruction_compare_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
}
else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
{
if (PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false)
{
codeGenerationFailed = true;
}
@ -2324,11 +2407,10 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
// handle failed code generation
if( codeGenerationFailed )
{
free(x64GenContext.codeBuffer);
return false;
}
// allocate executable memory
uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.codeBufferIndex);
uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.emitter->GetBuffer().size_bytes());
size_t baseAddress = (size_t)executableMemory;
// fix relocs
for(auto& relocIt : x64GenContext.relocateOffsetTable2)
@ -2341,7 +2423,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
x64Offset = destSegment->x64Offset;
uint32 relocBase = relocIt.offset;
uint8* relocInstruction = x64GenContext.codeBuffer+relocBase;
uint8* relocInstruction = x64GenContext.emitter->GetBufferPtr()+relocBase;
if( relocInstruction[0] == 0x0F && (relocInstruction[1] >= 0x80 && relocInstruction[1] <= 0x8F) )
{
// Jcc relativeImm32
@ -2374,21 +2456,17 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
}
// copy code to executable memory
memcpy(executableMemory, x64GenContext.codeBuffer, x64GenContext.codeBufferIndex);
free(x64GenContext.codeBuffer);
x64GenContext.codeBuffer = nullptr;
std::span<uint8> codeBuffer = x64GenContext.emitter->GetBuffer();
memcpy(executableMemory, codeBuffer.data(), codeBuffer.size_bytes());
// set code
PPCRecFunction->x86Code = executableMemory;
PPCRecFunction->x86Size = x64GenContext.codeBufferIndex;
PPCRecFunction->x86Size = codeBuffer.size_bytes();
return true;
}
void PPCRecompilerX64Gen_generateEnterRecompilerCode()
{
x64GenContext_t x64GenContext{};
x64GenContext.codeBufferSize = 1024;
x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize);
x64GenContext.codeBufferIndex = 0;
x64GenContext.activeCRRegister = PPC_REC_INVALID_REGISTER;
// start of recompiler entry function
@ -2419,7 +2497,7 @@ void PPCRecompilerX64Gen_generateEnterRecompilerCode()
x64Gen_writeU8(&x64GenContext, 0x83);
x64Gen_writeU8(&x64GenContext, 0x04);
x64Gen_writeU8(&x64GenContext, 0x24);
uint32 jmpPatchOffset = x64GenContext.codeBufferIndex;
uint32 jmpPatchOffset = x64GenContext.emitter->GetWriteIndex();
x64Gen_writeU8(&x64GenContext, 0); // skip the distance until after the JMP
x64Emit_mov_mem64_reg64(&x64GenContext, X86_REG_RDX, offsetof(PPCInterpreter_t, rspTemp), X86_REG_RSP);
@ -2434,7 +2512,7 @@ void PPCRecompilerX64Gen_generateEnterRecompilerCode()
//JMP recFunc
x64Gen_jmp_reg64(&x64GenContext, X86_REG_RCX); // call argument 1
x64GenContext.codeBuffer[jmpPatchOffset] = (x64GenContext.codeBufferIndex-(jmpPatchOffset-4));
x64GenContext.emitter->GetBuffer()[jmpPatchOffset] = (x64GenContext.emitter->GetWriteIndex() -(jmpPatchOffset-4));
//recompilerExit1:
x64Gen_pop_reg64(&x64GenContext, X86_REG_R15);
@ -2455,10 +2533,9 @@ void PPCRecompilerX64Gen_generateEnterRecompilerCode()
// RET
x64Gen_ret(&x64GenContext);
uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.codeBufferIndex);
uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.emitter->GetBuffer().size_bytes());
// copy code to executable memory
memcpy(executableMemory, x64GenContext.codeBuffer, x64GenContext.codeBufferIndex);
free(x64GenContext.codeBuffer);
memcpy(executableMemory, x64GenContext.emitter->GetBuffer().data(), x64GenContext.emitter->GetBuffer().size_bytes());
PPCRecompiler_enterRecompilerCode = (void ATTR_MS_ABI (*)(uint64,uint64))executableMemory;
}
@ -2466,9 +2543,6 @@ void PPCRecompilerX64Gen_generateEnterRecompilerCode()
void* PPCRecompilerX64Gen_generateLeaveRecompilerCode()
{
x64GenContext_t x64GenContext{};
x64GenContext.codeBufferSize = 128;
x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize);
x64GenContext.codeBufferIndex = 0;
x64GenContext.activeCRRegister = PPC_REC_INVALID_REGISTER;
// update instruction pointer
@ -2481,10 +2555,9 @@ void* PPCRecompilerX64Gen_generateLeaveRecompilerCode()
// RET
x64Gen_ret(&x64GenContext);
uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.codeBufferIndex);
uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.emitter->GetBuffer().size_bytes());
// copy code to executable memory
memcpy(executableMemory, x64GenContext.codeBuffer, x64GenContext.codeBufferIndex);
free(x64GenContext.codeBuffer);
memcpy(executableMemory, x64GenContext.emitter->GetBuffer().data(), x64GenContext.emitter->GetBuffer().size_bytes());
return executableMemory;
}

View File

@ -1,6 +1,8 @@
#include "../PPCRecompiler.h" // todo - get rid of dependency
#include "x86Emitter.h"
struct x64RelocEntry_t
{
x64RelocEntry_t(uint32 offset, void* extraInfo) : offset(offset), extraInfo(extraInfo) {};
@ -12,10 +14,18 @@ struct x64RelocEntry_t
struct x64GenContext_t
{
IMLSegment* currentSegment{};
x86Assembler64* emitter;
x64GenContext_t()
{
emitter = new x86Assembler64();
}
~x64GenContext_t()
{
delete emitter;
}
uint8* codeBuffer{};
sint32 codeBufferIndex{};
sint32 codeBufferSize{};
// cr state
sint32 activeCRRegister{}; // current x86 condition flags reflect this cr* register
sint32 activeCRState{}; // describes the way in which x86 flags map to the cr register (signed / unsigned)
@ -24,41 +34,41 @@ struct x64GenContext_t
};
// todo - these definitions are part of the x86_64 emitter. Not the backend itself. We should move them eventually
#define X86_REG_EAX 0
#define X86_REG_ECX 1
#define X86_REG_EDX 2
#define X86_REG_EBX 3
#define X86_REG_ESP 4 // reserved for low half of hCPU pointer
#define X86_REG_EBP 5
#define X86_REG_ESI 6
#define X86_REG_EDI 7
#define X86_REG_NONE -1
#define X86_REG_RAX 0
#define X86_REG_RCX 1
#define X86_REG_RDX 2
#define X86_REG_RBX 3
#define X86_REG_RSP 4 // reserved for hCPU pointer
#define X86_REG_RBP 5
#define X86_REG_RSI 6
#define X86_REG_RDI 7
#define X86_REG_R8 8
#define X86_REG_R9 9
#define X86_REG_R10 10
#define X86_REG_R11 11
#define X86_REG_R12 12
#define X86_REG_R13 13 // reserved to hold pointer to memory base? (Not decided yet)
#define X86_REG_R14 14 // reserved as temporary register
#define X86_REG_R15 15 // reserved for pointer to ppcRecompilerInstanceData
#define X86_REG_AL 0
#define X86_REG_CL 1
#define X86_REG_DL 2
#define X86_REG_BL 3
#define X86_REG_AH 4
#define X86_REG_CH 5
#define X86_REG_DH 6
#define X86_REG_BH 7
//#define X86_REG_EAX 0
//#define X86_REG_ECX 1
//#define X86_REG_EDX 2
//#define X86_REG_EBX 3
//#define X86_REG_ESP 4 // reserved for low half of hCPU pointer
//#define X86_REG_EBP 5
//#define X86_REG_ESI 6
//#define X86_REG_EDI 7
//#define X86_REG_NONE -1
//
//#define X86_REG_RAX 0
//#define X86_REG_RCX 1
//#define X86_REG_RDX 2
//#define X86_REG_RBX 3
//#define X86_REG_RSP 4 // reserved for hCPU pointer
//#define X86_REG_RBP 5
//#define X86_REG_RSI 6
//#define X86_REG_RDI 7
//#define X86_REG_R8 8
//#define X86_REG_R9 9
//#define X86_REG_R10 10
//#define X86_REG_R11 11
//#define X86_REG_R12 12
//#define X86_REG_R13 13 // reserved to hold pointer to memory base? (Not decided yet)
//#define X86_REG_R14 14 // reserved as temporary register
//#define X86_REG_R15 15 // reserved for pointer to ppcRecompilerInstanceData
//
//#define X86_REG_AL 0
//#define X86_REG_CL 1
//#define X86_REG_DL 2
//#define X86_REG_BL 3
//#define X86_REG_AH 4 -> Adressable via non-REX only
//#define X86_REG_CH 5
//#define X86_REG_DH 6
//#define X86_REG_BH 7
// reserved registers
#define REG_RESV_TEMP (X86_REG_R14)
@ -72,6 +82,7 @@ struct x64GenContext_t
#define reg32ToReg16(__x) (__x)
// deprecated condition flags
enum
{
X86_CONDITION_EQUAL, // or zero

View File

@ -217,16 +217,16 @@ void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGen
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7);
// jump cases
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 4); // type 4 -> u8
sint32 jumpOffset_caseU8 = x64GenContext->codeBufferIndex;
sint32 jumpOffset_caseU8 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 5); // type 5 -> u16
sint32 jumpOffset_caseU16 = x64GenContext->codeBufferIndex;
sint32 jumpOffset_caseU16 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 6); // type 4 -> s8
sint32 jumpOffset_caseS8 = x64GenContext->codeBufferIndex;
sint32 jumpOffset_caseS8 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); // type 5 -> s16
sint32 jumpOffset_caseS16 = x64GenContext->codeBufferIndex;
sint32 jumpOffset_caseS16 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
// default case -> float
@ -237,31 +237,31 @@ void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGen
uint32 jumpOffset_endOfS8;
PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
jumpOffset_endOfFloat = x64GenContext->codeBufferIndex;
jumpOffset_endOfFloat = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmp_imm32(x64GenContext, 0);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU16, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU16, x64GenContext->emitter->GetWriteIndex());
PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_U16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
jumpOffset_endOfU8 = x64GenContext->codeBufferIndex;
jumpOffset_endOfU8 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmp_imm32(x64GenContext, 0);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS16, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS16, x64GenContext->emitter->GetWriteIndex());
PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_S16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
jumpOffset_endOfU16 = x64GenContext->codeBufferIndex;
jumpOffset_endOfU16 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmp_imm32(x64GenContext, 0);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU8, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU8, x64GenContext->emitter->GetWriteIndex());
PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_U8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
jumpOffset_endOfS8 = x64GenContext->codeBufferIndex;
jumpOffset_endOfS8 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmp_imm32(x64GenContext, 0);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS8, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS8, x64GenContext->emitter->GetWriteIndex());
PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_S8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfFloat, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU8, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU16, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfS8, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfFloat, x64GenContext->emitter->GetWriteIndex());
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU8, x64GenContext->emitter->GetWriteIndex());
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU16, x64GenContext->emitter->GetWriteIndex());
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfS8, x64GenContext->emitter->GetWriteIndex());
}
// load from memory
@ -495,16 +495,16 @@ void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext
}
// max(i, -clampMin)
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMin);
sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGNED_GREATER_EQUAL, 0);
x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMin);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
// min(i, clampMax)
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMax);
sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGNED_LESS_EQUAL, 0);
x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMax);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
// endian swap
if( bitWriteSize == 16)
x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8);
@ -528,16 +528,16 @@ void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGe
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7);
// jump cases
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 4); // type 4 -> u8
sint32 jumpOffset_caseU8 = x64GenContext->codeBufferIndex;
sint32 jumpOffset_caseU8 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 5); // type 5 -> u16
sint32 jumpOffset_caseU16 = x64GenContext->codeBufferIndex;
sint32 jumpOffset_caseU16 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 6); // type 4 -> s8
sint32 jumpOffset_caseS8 = x64GenContext->codeBufferIndex;
sint32 jumpOffset_caseS8 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); // type 5 -> s16
sint32 jumpOffset_caseS16 = x64GenContext->codeBufferIndex;
sint32 jumpOffset_caseS16 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
// default case -> float
@ -548,31 +548,31 @@ void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGe
uint32 jumpOffset_endOfS8;
PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
jumpOffset_endOfFloat = x64GenContext->codeBufferIndex;
jumpOffset_endOfFloat = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmp_imm32(x64GenContext, 0);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU16, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU16, x64GenContext->emitter->GetWriteIndex());
PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_U16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
jumpOffset_endOfU8 = x64GenContext->codeBufferIndex;
jumpOffset_endOfU8 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmp_imm32(x64GenContext, 0);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS16, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS16, x64GenContext->emitter->GetWriteIndex());
PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_S16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
jumpOffset_endOfU16 = x64GenContext->codeBufferIndex;
jumpOffset_endOfU16 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmp_imm32(x64GenContext, 0);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU8, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU8, x64GenContext->emitter->GetWriteIndex());
PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_U8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
jumpOffset_endOfS8 = x64GenContext->codeBufferIndex;
jumpOffset_endOfS8 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmp_imm32(x64GenContext, 0);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS8, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS8, x64GenContext->emitter->GetWriteIndex());
PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_S8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfFloat, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU8, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU16, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfS8, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfFloat, x64GenContext->emitter->GetWriteIndex());
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU8, x64GenContext->emitter->GetWriteIndex());
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU16, x64GenContext->emitter->GetWriteIndex());
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfS8, x64GenContext->emitter->GetWriteIndex());
}
// store to memory
@ -873,18 +873,18 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction
sint32 crRegister = imlInstruction->crRegister;
// if the parity bit is set (NaN) we need to manually set CR LT, GT and EQ to 0 (comisd/ucomisd sets the respective flags to 1 in case of NaN)
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_PARITY, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_SO)); // unordered
sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_PARITY, 0);
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); // same as X64_CONDITION_CARRY
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT));
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ));
sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT), 0);
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT), 0);
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ), 0);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP )
{
@ -1102,50 +1102,50 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerOperandA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0));
sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0);
// select C
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandC);
sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
// select B
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandB);
// end
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_PAIR )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// select bottom
x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerOperandA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0));
sint32 jumpInstructionOffset1_bottom = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffset1_bottom = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0);
// select C bottom
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandC);
sint32 jumpInstructionOffset2_bottom = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffset2_bottom = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
// select B bottom
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_bottom, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_bottom, x64GenContext->emitter->GetWriteIndex());
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandB);
// end
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_bottom, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_bottom, x64GenContext->emitter->GetWriteIndex());
// select top
x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.registerOperandA); // copy top to bottom (todo: May cause stall?)
x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0));
sint32 jumpInstructionOffset1_top = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffset1_top = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0);
// select C top
//x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandC);
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandC, 2);
sint32 jumpInstructionOffset2_top = x64GenContext->codeBufferIndex;
sint32 jumpInstructionOffset2_top = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
// select B top
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_top, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_top, x64GenContext->emitter->GetWriteIndex());
//x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandB);
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandB, 2);
// end
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_top, x64GenContext->codeBufferIndex);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_top, x64GenContext->emitter->GetWriteIndex());
}
else
assert_dbg();

View File

@ -7,46 +7,22 @@
void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v)
{
if( x64GenContext->codeBufferIndex+1 > x64GenContext->codeBufferSize )
{
x64GenContext->codeBufferSize *= 2;
x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize);
}
*(uint8*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v;
x64GenContext->codeBufferIndex++;
x64GenContext->emitter->_emitU8(v);
}
void x64Gen_writeU16(x64GenContext_t* x64GenContext, uint32 v)
{
if( x64GenContext->codeBufferIndex+2 > x64GenContext->codeBufferSize )
{
x64GenContext->codeBufferSize *= 2;
x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize);
}
*(uint16*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v;
x64GenContext->codeBufferIndex += 2;
x64GenContext->emitter->_emitU16(v);
}
void x64Gen_writeU32(x64GenContext_t* x64GenContext, uint32 v)
{
if( x64GenContext->codeBufferIndex+4 > x64GenContext->codeBufferSize )
{
x64GenContext->codeBufferSize *= 2;
x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize);
}
*(uint32*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v;
x64GenContext->codeBufferIndex += 4;
x64GenContext->emitter->_emitU32(v);
}
void x64Gen_writeU64(x64GenContext_t* x64GenContext, uint64 v)
{
if( x64GenContext->codeBufferIndex+8 > x64GenContext->codeBufferSize )
{
x64GenContext->codeBufferSize *= 2;
x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize);
}
*(uint64*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v;
x64GenContext->codeBufferIndex += 8;
x64GenContext->emitter->_emitU64(v);
}
#include "x64Emit.hpp"

View File

@ -26,4 +26,4 @@ void PPCRecompiler_reorderConditionModifyInstructions(struct ppcImlGenContext_t*
// debug
void IMLDebug_DumpSegment(struct ppcImlGenContext_t* ctx, struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false);
void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext);
void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRangeInfo = false);

View File

@ -55,7 +55,7 @@ bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment)
}
/*
* Returns true if the imlInstruction can overwrite CR (depending on value of ->crRegister)
* Returns true if the instruction can overwrite CR (depending on value of ->crRegister)
*/
bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction)
{
@ -63,6 +63,10 @@ bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction)
return true;
if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R)
return true;
if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE || imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32)
return true; // ??
if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
return true; // ??
if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32)
return true;
if (imlInstruction->type == PPCREC_IML_TYPE_R_S32)

View File

@ -121,6 +121,20 @@ std::string IMLDebug_GetSegmentName(ppcImlGenContext_t* ctx, IMLSegment* seg)
return "<SegmentNotInCtx>";
}
std::string IMLDebug_GetConditionName(IMLCondition cond)
{
switch (cond)
{
case IMLCondition::EQ:
return "EQ";
case IMLCondition::NEQ:
return "NEQ";
default:
cemu_assert_unimplemented();
}
return "ukn";
}
void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo)
{
StringBuf strOutput(1024);
@ -143,9 +157,12 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
if (printLivenessRangeInfo)
{
strOutput.reset();
IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, RA_INTER_RANGE_START);
debug_printf("%s\n", strOutput.c_str());
}
//debug_printf("\n");
strOutput.reset();
sint32 lineOffsetParameters = 18;
@ -207,6 +224,37 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
strOutput.addFmt(" -> CR{}", inst.crRegister);
}
}
else if (inst.type == PPCREC_IML_TYPE_COMPARE)
{
strOutput.add("CMP ");
while ((sint32)strOutput.getLen() < lineOffsetParameters)
strOutput.add(" ");
IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.registerOperandA);
IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.registerOperandB);
strOutput.addFmt(", {}", IMLDebug_GetConditionName(inst.op_compare.cond));
strOutput.add(" -> ");
IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.registerResult, true);
}
else if (inst.type == PPCREC_IML_TYPE_COMPARE_S32)
{
strOutput.add("CMP ");
while ((sint32)strOutput.getLen() < lineOffsetParameters)
strOutput.add(" ");
IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.registerOperandA);
strOutput.addFmt("{}", inst.op_compare_s32.immS32);
strOutput.addFmt(", {}", IMLDebug_GetConditionName(inst.op_compare_s32.cond));
strOutput.add(" -> ");
IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.registerResult, true);
}
else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
{
strOutput.add("CJUMP2 ");
while ((sint32)strOutput.getLen() < lineOffsetParameters)
strOutput.add(" ");
IMLDebug_AppendRegisterParam(strOutput, inst.op_conditionalJump2.registerBool, true);
if(!inst.op_conditionalJump2.mustBeTrue)
strOutput.add("(inverted)");
}
else if (inst.type == PPCREC_IML_TYPE_R_R_S32)
{
strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
@ -369,7 +417,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
else
strOutput.add("U");
strOutput.addFmt("{} [t{}+{}]", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32);
strOutput.addFmt("= fpr_t{} mode {}\n", inst.op_storeLoad.registerData, inst.op_storeLoad.mode);
strOutput.addFmt(" = fpr_t{} mode {}", inst.op_storeLoad.registerData, inst.op_storeLoad.mode);
}
else if (inst.type == PPCREC_IML_TYPE_FPR_R_R)
{
@ -388,7 +436,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
}
else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
{
strOutput.addFmt("CYCLE_CHECK\n");
strOutput.addFmt("CYCLE_CHECK");
}
else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32)
{
@ -460,11 +508,11 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
debug_printf("\n");
}
void IMLDebug_Dump(ppcImlGenContext_t* ppcImlGenContext)
void IMLDebug_Dump(ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRangeInfo)
{
for (size_t i = 0; i < ppcImlGenContext->segmentList2.size(); i++)
{
IMLDebug_DumpSegment(ppcImlGenContext, ppcImlGenContext->segmentList2[i], false);
IMLDebug_DumpSegment(ppcImlGenContext, ppcImlGenContext->segmentList2[i], printLivenessRangeInfo);
debug_printf("\n");
}
}

View File

@ -141,6 +141,21 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
else
cemu_assert_unimplemented();
}
else if (type == PPCREC_IML_TYPE_COMPARE)
{
registersUsed->readNamedReg1 = op_compare.registerOperandA;
registersUsed->readNamedReg2 = op_compare.registerOperandB;
registersUsed->writtenNamedReg1 = op_compare.registerResult;
}
else if (type == PPCREC_IML_TYPE_COMPARE_S32)
{
registersUsed->readNamedReg1 = op_compare_s32.registerOperandA;
registersUsed->writtenNamedReg1 = op_compare_s32.registerResult;
}
else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
{
registersUsed->readNamedReg1 = op_conditionalJump2.registerBool;
}
else if (type == PPCREC_IML_TYPE_LOAD)
{
registersUsed->writtenNamedReg1 = op_storeLoad.registerData;
@ -455,17 +470,30 @@ void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegiste
}
else if (type == PPCREC_IML_TYPE_R_R_S32)
{
// in all cases result is written and other operand is read only
op_r_r_s32.registerResult = replaceRegisterMultiple(op_r_r_s32.registerResult, gprRegisterSearched, gprRegisterReplaced);
op_r_r_s32.registerA = replaceRegisterMultiple(op_r_r_s32.registerA, gprRegisterSearched, gprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_R_R_R)
{
// in all cases result is written and other operands are read only
op_r_r_r.registerResult = replaceRegisterMultiple(op_r_r_r.registerResult, gprRegisterSearched, gprRegisterReplaced);
op_r_r_r.registerA = replaceRegisterMultiple(op_r_r_r.registerA, gprRegisterSearched, gprRegisterReplaced);
op_r_r_r.registerB = replaceRegisterMultiple(op_r_r_r.registerB, gprRegisterSearched, gprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_COMPARE)
{
op_compare.registerResult = replaceRegisterMultiple(op_compare.registerResult, gprRegisterSearched, gprRegisterReplaced);
op_compare.registerOperandA = replaceRegisterMultiple(op_compare.registerOperandA, gprRegisterSearched, gprRegisterReplaced);
op_compare.registerOperandB = replaceRegisterMultiple(op_compare.registerOperandB, gprRegisterSearched, gprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_COMPARE_S32)
{
op_compare_s32.registerResult = replaceRegisterMultiple(op_compare_s32.registerResult, gprRegisterSearched, gprRegisterReplaced);
op_compare_s32.registerOperandA = replaceRegisterMultiple(op_compare_s32.registerOperandA, gprRegisterSearched, gprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
{
op_conditionalJump2.registerBool = replaceRegisterMultiple(op_conditionalJump2.registerBool, gprRegisterSearched, gprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
{
// no effect on registers
@ -627,13 +655,17 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist
{
// not affected
}
else if (type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32 || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
{
// not affected
}
else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
{
// no effect on registers
// not affected
}
else if (type == PPCREC_IML_TYPE_NO_OP)
{
// no effect on registers
// not affected
}
else if (type == PPCREC_IML_TYPE_MACRO)
{
@ -737,13 +769,17 @@ void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterRe
{
// not affected
}
else if (type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32 || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
{
// not affected
}
else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
{
// no effect on registers
// not affected
}
else if (type == PPCREC_IML_TYPE_NO_OP)
{
// no effect on registers
// not affected
}
else if (type == PPCREC_IML_TYPE_MACRO)
{

View File

@ -4,8 +4,6 @@ enum
{
PPCREC_IML_OP_ASSIGN, // '=' operator
PPCREC_IML_OP_ENDIAN_SWAP, // '=' operator with 32bit endian swap
PPCREC_IML_OP_ADD, // '+' operator
PPCREC_IML_OP_SUB, // '-' operator
PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, // complex operation, result = operand + ~operand2 + carry bit, updates carry bit
PPCREC_IML_OP_COMPARE_SIGNED, // arithmetic/signed comparison operator (updates cr)
PPCREC_IML_OP_COMPARE_UNSIGNED, // logical/unsigned comparison operator (updates cr)
@ -85,8 +83,19 @@ enum
// PS
PPCREC_IML_OP_FPR_SUM0,
PPCREC_IML_OP_FPR_SUM1,
};
// working towards defining ops per-form
// R_R_R only
// R_R_S32 only
// R_R_R + R_R_S32
PPCREC_IML_OP_ADD,
PPCREC_IML_OP_SUB,
};
#define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN)
enum
@ -118,6 +127,19 @@ enum
};
enum class IMLCondition : uint8
{
EQ,
NEQ,
SIGNED_GT,
SIGNED_LT,
UNSIGNED_GT,
UNSIGNED_LT,
SIGNED_OVERFLOW,
SIGNED_NOVERFLOW,
};
enum
{
PPCREC_CR_MODE_COMPARE_SIGNED,
@ -131,7 +153,7 @@ enum
{
PPCREC_IML_TYPE_NONE,
PPCREC_IML_TYPE_NO_OP, // no-op instruction
PPCREC_IML_TYPE_R_R, // r* (op) *r
PPCREC_IML_TYPE_R_R, // r* = (op) *r (can also be r* (op) *r)
PPCREC_IML_TYPE_R_R_R, // r* = r* (op) r*
PPCREC_IML_TYPE_R_R_S32, // r* = r* (op) s32*
PPCREC_IML_TYPE_LOAD, // r* = [r*+s32*]
@ -145,6 +167,12 @@ enum
PPCREC_IML_TYPE_CJUMP, // conditional jump
PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles < 0
PPCREC_IML_TYPE_CR, // condition register specific operations (one or more operands)
// new style of handling conditions and branches:
PPCREC_IML_TYPE_COMPARE, // r* = r* CMP[cond] r*
PPCREC_IML_TYPE_COMPARE_S32, // r* = r* CMP[cond] imm
PPCREC_IML_TYPE_CONDITIONAL_JUMP, // replaces CJUMP. Jump condition is based on boolean register
// conditional
PPCREC_IML_TYPE_CONDITIONAL_R_S32,
// FPR
@ -294,11 +322,6 @@ struct IMLInstruction
sint32 immS32;
}op_r_immS32;
struct
{
uint32 address;
uint8 flags;
}op_jumpmark;
struct
{
uint32 param;
uint32 param2;
@ -310,7 +333,7 @@ struct IMLInstruction
uint8 crRegisterIndex;
uint8 crBitIndex;
bool bitMustBeSet;
}op_conditionalJump;
}op_conditionalJump; // legacy jump
struct
{
uint8 registerData;
@ -353,16 +376,30 @@ struct IMLInstruction
uint8 registerResult;
}op_fpr_r;
struct
{
uint32 ppcAddress;
uint32 x64Offset;
}op_ppcEnter;
struct
{
uint8 crD; // crBitIndex (result)
uint8 crA; // crBitIndex
uint8 crB; // crBitIndex
}op_cr;
struct
{
uint8 registerResult; // stores the boolean result of the comparison
uint8 registerOperandA;
uint8 registerOperandB;
IMLCondition cond;
}op_compare;
struct
{
uint8 registerResult; // stores the boolean result of the comparison
uint8 registerOperandA;
sint32 immS32;
IMLCondition cond;
}op_compare_s32;
struct
{
uint8 registerBool;
bool mustBeTrue;
}op_conditionalJump2;
// conditional operations (emitted if supported by target platform)
struct
{
@ -385,7 +422,8 @@ struct IMLInstruction
type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_HLE ||
type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_MFTB ||
type == PPCREC_IML_TYPE_CJUMP ||
type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ||
type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
return true;
return false;
}
@ -432,6 +470,17 @@ struct IMLInstruction
this->op_r_r.registerA = registerA;
}
void make_r_s32(uint32 operation, uint8 registerIndex, sint32 immS32, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint32 crMode = 0)
{
this->type = PPCREC_IML_TYPE_R_S32;
this->operation = operation;
this->crRegister = crRegister;
this->crMode = crMode;
this->op_r_immS32.registerIndex = registerIndex;
this->op_r_immS32.immS32 = immS32;
}
void make_r_r_r(uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0)
{
// operation with three register operands (e.g. "t0 = t1 + t4")
@ -456,6 +505,40 @@ struct IMLInstruction
this->op_r_r_s32.immS32 = immS32;
}
void make_compare(uint8 registerA, uint8 registerB, uint8 registerResult, IMLCondition cond)
{
this->type = PPCREC_IML_TYPE_COMPARE;
this->operation = -999;
this->crRegister = PPC_REC_INVALID_REGISTER;
this->crMode = 0;
this->op_compare.registerResult = registerResult;
this->op_compare.registerOperandA = registerA;
this->op_compare.registerOperandB = registerB;
this->op_compare.cond = cond;
}
void make_compare_s32(uint8 registerA, sint32 immS32, uint8 registerResult, IMLCondition cond)
{
this->type = PPCREC_IML_TYPE_COMPARE_S32;
this->operation = -999;
this->crRegister = PPC_REC_INVALID_REGISTER;
this->crMode = 0;
this->op_compare_s32.registerResult = registerResult;
this->op_compare_s32.registerOperandA = registerA;
this->op_compare_s32.immS32 = immS32;
this->op_compare_s32.cond = cond;
}
void make_conditional_jump_new(uint8 registerBool, bool mustBeTrue)
{
this->type = PPCREC_IML_TYPE_CONDITIONAL_JUMP;
this->operation = -999;
this->crRegister = PPC_REC_INVALID_REGISTER;
this->crMode = 0;
this->op_conditionalJump2.registerBool = registerBool;
this->op_conditionalJump2.mustBeTrue = mustBeTrue;
}
// load from memory
void make_r_memory(uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian)
{

View File

@ -835,9 +835,7 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp
}
if (foundMatch)
{
// insert expand instruction
IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, i);
PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, newExpand, PPCREC_IML_OP_ENDIAN_SWAP, gprIndex, gprIndex);
PPCRecompiler_insertInstruction(imlSegment, i)->make_r_r(PPCREC_IML_OP_ENDIAN_SWAP, gprIndex, gprIndex);
}
}

View File

@ -218,10 +218,20 @@ typedef struct
sint32 liveRangesCount;
}raLiveRangeInfo_t;
bool IsRangeOverlapping(raLivenessSubrange_t* rangeA, raLivenessSubrange_t* rangeB)
{
if (rangeA->start.index < rangeB->end.index && rangeA->end.index > rangeB->start.index)
return true;
if ((rangeA->start.index == RA_INTER_RANGE_START && rangeA->start.index == rangeB->start.index))
return true;
if (rangeA->end.index == RA_INTER_RANGE_END && rangeA->end.index == rangeB->end.index)
return true;
return false;
}
// mark occupied registers by any overlapping range as unavailable in physRegSet
void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange_t* range, IMLPhysRegisterSet& physRegSet)
{
//uint32 physRegisterMask = (1 << PPC_X64_GPR_USABLE_REGISTERS) - 1;
for (auto& subrange : range->list_subranges)
{
IMLSegment* imlSegment = subrange->imlSegment;
@ -235,9 +245,10 @@ void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange_t* range, IML
continue;
}
if (subrange->start.index < subrangeItr->end.index && subrange->end.index > subrangeItr->start.index ||
(subrange->start.index == RA_INTER_RANGE_START && subrange->start.index == subrangeItr->start.index) ||
(subrange->end.index == RA_INTER_RANGE_END && subrange->end.index == subrangeItr->end.index) )
//if (subrange->start.index < subrangeItr->end.index && subrange->end.index > subrangeItr->start.index ||
// (subrange->start.index == RA_INTER_RANGE_START && subrange->start.index == subrangeItr->start.index) ||
// (subrange->end.index == RA_INTER_RANGE_END && subrange->end.index == subrangeItr->end.index) )
if(IsRangeOverlapping(subrange, subrangeItr))
{
if (subrangeItr->range->physicalRegister >= 0)
physRegSet.SetReserved(subrangeItr->range->physicalRegister);
@ -272,19 +283,6 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment)
}
// sort
std::sort(subrangeList, subrangeList + count, _livenessRangeStartCompare);
//for (sint32 i1 = 0; i1 < count; i1++)
//{
// for (sint32 i2 = i1+1; i2 < count; i2++)
// {
// if (subrangeList[i1]->start.index > subrangeList[i2]->start.index)
// {
// // swap
// raLivenessSubrange_t* temp = subrangeList[i1];
// subrangeList[i1] = subrangeList[i2];
// subrangeList[i2] = temp;
// }
// }
//}
// reassemble linked list
subrangeList[count] = nullptr;
imlSegment->raInfo.linkedList_allSubranges = subrangeList[0];
@ -478,6 +476,7 @@ bool PPCRecRA_assignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGen
}
}
// evaluate strategy: Split current range to fit in available holes
// todo - are checks required to avoid splitting on the suffix instruction?
spillStrategies.availableRegisterHole.cost = INT_MAX;
spillStrategies.availableRegisterHole.distance = -1;
spillStrategies.availableRegisterHole.physRegister = -1;
@ -770,6 +769,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext,
liveInfo.liveRangesCount = 0;
sint32 index = 0;
sint32 suffixInstructionCount = imlSegment->HasSuffixInstruction() ? 1 : 0;
//sint32 suffixInstructionIndex = imlSegment->imlList.size() - suffixInstructionCount; // if no suffix instruction exists this matches instruction count
// load register ranges that are supplied from previous segments
raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
while(subrangeItr)
@ -806,7 +806,8 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext,
if (virtualReg2PhysReg[liverange->range->virtualRegister] == -1)
assert_dbg();
virtualReg2PhysReg[liverange->range->virtualRegister] = -1;
// store GPR
// store GPR if required
// special care has to be taken to execute any stores before the suffix instruction since trailing instructions may not get executed
if (liverange->hasStore)
{
PPCRecRA_insertGPRStoreInstruction(imlSegment, std::min<sint32>(index, imlSegment->imlList.size() - suffixInstructionCount), liverange->range->physicalRegister, liverange->range->name);
@ -827,6 +828,13 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext,
liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr;
liveInfo.liveRangesCount++;
// load GPR
// similar to stores, any loads for the next segment need to happen before the suffix instruction
// however, starting 17-12-2022 ranges that exit the segment at the end but do not cover the suffix instruction are illegal (e.g. RA_INTER_RANGE_END to RA_INTER_RANGE_END subrange)
// the limitation that name loads (for the follow-up segments) need to happen before the suffix instruction require that the range also reflects this, otherwise the RA would erroneously assume registers to be available during the suffix instruction
if (imlSegment->HasSuffixInstruction())
{
cemu_assert_debug(subrangeItr->start.index <= imlSegment->GetSuffixInstructionIndex());
}
if (subrangeItr->_noLoad == false)
{
PPCRecRA_insertGPRLoadInstruction(imlSegment, std::min<sint32>(index, imlSegment->imlList.size() - suffixInstructionCount), subrangeItr->range->physicalRegister, subrangeItr->range->name);
@ -839,7 +847,8 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext,
}
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next;
}
// replace registers
// rewrite registers
// todo - this can be simplified by using a map or lookup table rather than a check + 4 slot translation table
if (index < imlSegment->imlList.size())
{
IMLUsedRegisters gprTracking;
@ -1004,7 +1013,6 @@ void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext
PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext);
PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext);
PPCRecRA_assignRegisters(ctx, ppcImlGenContext);
PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext);
@ -1095,6 +1103,15 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlG
PPCRecRA_convertToMappedRanges(ppcImlGenContext, it, vGPR, range);
}
}
// for subranges which exit the segment at the end there is a hard requirement that they cover the suffix instruction
// this is due to range load instructions being inserted before the suffix instruction
if (subrange->end.index == RA_INTER_RANGE_END)
{
if (imlSegment->HasSuffixInstruction())
{
cemu_assert_debug(subrange->start.index <= imlSegment->GetSuffixInstructionIndex());
}
}
return subrange;
}
@ -1155,7 +1172,10 @@ void PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext_t* ppcImlGenContext, IM
{
if (_isRangeDefined(imlSegment, vGPR) == false)
{
imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_END;
if(imlSegment->HasSuffixInstruction())
imlSegment->raDistances.reg[vGPR].usageStart = imlSegment->GetSuffixInstructionIndex();
else
imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_END;
imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END;
return;
}

View File

@ -16,6 +16,12 @@ bool IMLSegment::HasSuffixInstruction() const
return imlInstruction.IsSuffixInstruction();
}
sint32 IMLSegment::GetSuffixInstructionIndex() const
{
cemu_assert_debug(HasSuffixInstruction());
return (sint32)(imlList.size() - 1);
}
IMLInstruction* IMLSegment::GetLastInstruction()
{
if (imlList.empty())

View File

@ -135,6 +135,7 @@ struct IMLSegment
IMLInstruction* AppendInstruction();
bool HasSuffixInstruction() const;
sint32 GetSuffixInstructionIndex() const;
IMLInstruction* GetLastInstruction();
// segment points

View File

@ -167,6 +167,20 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
return nullptr;
}
uint32 ppcRecLowerAddr = LaunchSettings::GetPPCRecLowerAddr();
uint32 ppcRecUpperAddr = LaunchSettings::GetPPCRecUpperAddr();
if (ppcRecLowerAddr != 0 && ppcRecUpperAddr != 0)
{
if (ppcRecFunc->ppcAddress < ppcRecLowerAddr || ppcRecFunc->ppcAddress > ppcRecUpperAddr)
{
delete ppcRecFunc;
return nullptr;
}
}
// apply passes
if (!PPCRecompiler_ApplyIMLPasses(ppcImlGenContext))
{

View File

@ -28,7 +28,7 @@ uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGe
// IML instruction generation
void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction);
void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 copyWidth, bool signExtend, bool bigEndian, uint8 crRegister, uint32 crMode);
void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint8 crRegister, uint32 crMode);
void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet);
void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0);

View File

@ -58,15 +58,15 @@ IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext
void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister, uint8 crMode)
{
// operation with two register operands (e.g. "t0 = t1")
if(imlInstruction == NULL)
imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
imlInstruction->type = PPCREC_IML_TYPE_R_R;
imlInstruction->operation = operation;
imlInstruction->crRegister = crRegister;
imlInstruction->crMode = crMode;
imlInstruction->op_r_r.registerResult = registerResult;
imlInstruction->op_r_r.registerA = registerA;
if (imlInstruction)
__debugbreak(); // not supported
ppcImlGenContext->emitInst().make_r_r(operation, registerResult, registerA, crRegister, crMode);
}
void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint8 crRegister, uint32 crMode)
{
ppcImlGenContext->emitInst().make_r_s32(operation, registerIndex, immS32, crRegister, crMode);
}
void PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 crRegister=PPC_REC_INVALID_REGISTER, uint8 crMode=0)
@ -89,20 +89,6 @@ void PPCRecompilerImlGen_generateNewInstruction_name_r(ppcImlGenContext_t* ppcIm
imlInstruction->op_r_name.name = name;
}
void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 copyWidth, bool signExtend, bool bigEndian, uint8 crRegister, uint32 crMode)
{
// two variations:
// operation without store (e.g. "'r3' < 123" which has no effect other than updating a condition flags register)
// operation with store (e.g. "'r3' = 123")
IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
imlInstruction->type = PPCREC_IML_TYPE_R_S32;
imlInstruction->operation = operation;
imlInstruction->crRegister = crRegister;
imlInstruction->crMode = crMode;
imlInstruction->op_r_immS32.registerIndex = registerIndex;
imlInstruction->op_r_immS32.immS32 = immS32;
}
void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet)
{
if(imlInstruction == NULL)
@ -292,6 +278,13 @@ uint32 PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext_t* ppcImlGenCo
return registerIndex;
}
// get throw-away register. Only valid for the scope of a single translated instruction
// be careful to not collide with manually loaded temporary register
uint32 PPCRecompilerImlGen_grabTemporaryS8Register(ppcImlGenContext_t* ppcImlGenContext, uint32 temporaryIndex)
{
return PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + temporaryIndex);
}
/*
* Loads a PPC fpr into any of the available IML FPU registers
* If loadNew is false, it will check first if the fpr is already loaded into any IML register
@ -407,7 +400,7 @@ bool PPCRecompilerImlGen_MFCR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
sint32 rD, rA, rB;
PPC_OPC_TEMPL_X(opcode, rD, rA, rB);
uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_MFCR, gprReg, 0, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_MFCR, gprReg, 0, PPC_REC_INVALID_REGISTER, 0);
return true;
}
@ -417,7 +410,7 @@ bool PPCRecompilerImlGen_MTCRF(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
uint32 crMask;
PPC_OPC_TEMPL_XFX(opcode, rS, crMask);
uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_MTCRF, gprReg, crMask, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_MTCRF, gprReg, crMask, PPC_REC_INVALID_REGISTER, 0);
return true;
}
@ -453,7 +446,7 @@ void PPCRecompilerImlGen_CMPI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
sint32 b = imm;
// load gpr into register
uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_COMPARE_SIGNED, gprRegister, b, 0, false, false, cr, PPCREC_CR_MODE_COMPARE_SIGNED);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_COMPARE_SIGNED, gprRegister, b, cr, PPCREC_CR_MODE_COMPARE_SIGNED);
}
void PPCRecompilerImlGen_CMPLI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
@ -466,7 +459,7 @@ void PPCRecompilerImlGen_CMPLI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
uint32 b = imm;
// load gpr into register
uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_COMPARE_UNSIGNED, gprRegister, (sint32)b, 0, false, false, cr, PPCREC_CR_MODE_COMPARE_UNSIGNED);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_COMPARE_UNSIGNED, gprRegister, (sint32)b, cr, PPCREC_CR_MODE_COMPARE_UNSIGNED);
}
bool PPCRecompiler_canInlineFunction(MPTR functionPtr, sint32* functionInstructionCount)
@ -628,11 +621,10 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
if( ignoreCondition == false )
return false; // not supported for the moment
uint32 ctrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_CTR, false);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_SUB, ctrRegister, 1, 0, false, false, PPCREC_CR_REG_TEMP, PPCREC_CR_MODE_ARITHMETIC);
if( decrementerMustBeZero )
PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, PPCREC_JUMP_CONDITION_E, PPCREC_CR_REG_TEMP, 0, false);
else
PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, PPCREC_JUMP_CONDITION_NE, PPCREC_CR_REG_TEMP, 0, false);
uint32 tmpBoolReg = PPCRecompilerImlGen_grabTemporaryS8Register(ppcImlGenContext, 1);
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_SUB, ctrRegister, ctrRegister, 1);
ppcImlGenContext->emitInst().make_compare_s32(ctrRegister, 0, tmpBoolReg, decrementerMustBeZero ? IMLCondition::EQ : IMLCondition::NEQ);
ppcImlGenContext->emitInst().make_conditional_jump_new(tmpBoolReg, true);
return true;
}
else
@ -709,7 +701,7 @@ bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
branchDestReg = tmpRegister;
}
uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_LR);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4, PPC_REC_INVALID_REGISTER, 0);
}
if (!BO.decrementerIgnore())
@ -901,7 +893,7 @@ bool PPCRecompilerImlGen_ADDI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
// rA not used, instruction is value assignment
// rD = imm
uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerRD, imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerRD, imm, PPC_REC_INVALID_REGISTER, 0);
}
// never updates any cr
return true;
@ -924,7 +916,7 @@ bool PPCRecompilerImlGen_ADDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
// rA not used, instruction turns into simple value assignment
// rD = imm
uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerRD, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerRD, (sint32)imm, PPC_REC_INVALID_REGISTER, 0);
}
// never updates any cr
return true;
@ -1170,15 +1162,15 @@ bool PPCRecompilerImlGen_RLWINM(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
if( registerRA != registerRS )
PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, registerRA, registerRS);
if( SH != 0 )
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_LEFT_ROTATE, registerRA, SH, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_LEFT_ROTATE, registerRA, SH, PPC_REC_INVALID_REGISTER, 0);
if(opcode&PPC_OPC_RC)
{
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 0, false, false, 0, PPCREC_CR_MODE_LOGICAL);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 0, PPCREC_CR_MODE_LOGICAL);
}
else
{
if( mask != 0xFFFFFFFF )
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, PPC_REC_INVALID_REGISTER, 0);
}
return true;
}
@ -1213,12 +1205,12 @@ bool PPCRecompilerImlGen_RLWNM(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_LEFT_ROTATE, registerRA, registerRS, registerRB);
if (opcode & PPC_OPC_RC)
{
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 32, false, false, 0, PPCREC_CR_MODE_LOGICAL);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 0, PPCREC_CR_MODE_LOGICAL);
}
else
{
if( mask != 0xFFFFFFFF )
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 32, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, PPC_REC_INVALID_REGISTER, 0);
}
return true;
}
@ -1438,7 +1430,7 @@ void PPCRecompilerImlGen_LWZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
// load memory gpr into register
uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
// add imm to memory register
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0);
// check if destination register is already loaded
uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
if( destinationRegister == PPC_REC_INVALID_REGISTER )
@ -1482,7 +1474,7 @@ void PPCRecompilerImlGen_LHAU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
// load memory gpr into register
uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
// add imm to memory register
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0);
// check if destination register is already loaded
uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
if( destinationRegister == PPC_REC_INVALID_REGISTER )
@ -1527,7 +1519,7 @@ void PPCRecompilerImlGen_LHZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
// load memory gpr into register
uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
// add imm to memory register
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0);
// check if destination register is already loaded
uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
if( destinationRegister == PPC_REC_INVALID_REGISTER )
@ -1571,7 +1563,7 @@ void PPCRecompilerImlGen_LBZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
// load memory gpr into register
uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
// add imm to memory register
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0);
// check if destination register is already loaded
uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
if( destinationRegister == PPC_REC_INVALID_REGISTER )
@ -1880,12 +1872,12 @@ void PPCRecompilerImlGen_STWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister
// add imm to memory register early if possible
if( rD != rA )
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0);
// store word
PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, (rD==rA)?imm:0, 32, true);
// add imm to memory register late if we couldn't do it early
if( rD == rA )
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0);
}
void PPCRecompilerImlGen_STH(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
@ -1924,12 +1916,12 @@ void PPCRecompilerImlGen_STHU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister
// add imm to memory register early if possible
if( rD != rA )
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0);
// store word
PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, (rD==rA)?imm:0, 16, true);
// add imm to memory register late if we couldn't do it early
if( rD == rA )
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0);
}
void PPCRecompilerImlGen_STB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
@ -1968,12 +1960,12 @@ void PPCRecompilerImlGen_STBU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister
// add imm to memory register early if possible
if( rD != rA )
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0);
// store byte
PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, (rD==rA)?imm:0, 8, true);
// add imm to memory register late if we couldn't do it early
if( rD == rA )
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0);
}
// generic indexed store (STWX, STHX, STBX, STWUX. If bitReversed == true -> STHBRX)
@ -2481,7 +2473,7 @@ void PPCRecompilerImlGen_ANDI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
if( gprDestReg != gprSourceReg )
PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg);
// rA &= imm32
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, gprDestReg, (sint32)imm, 0, false, false, 0, PPCREC_CR_MODE_LOGICAL);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, gprDestReg, (sint32)imm, 0, PPCREC_CR_MODE_LOGICAL);
}
void PPCRecompilerImlGen_ANDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
@ -2496,7 +2488,7 @@ void PPCRecompilerImlGen_ANDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
if( gprDestReg != gprSourceReg )
PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg);
// rA &= imm32
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, gprDestReg, (sint32)imm, 0, false, false, 0, PPCREC_CR_MODE_LOGICAL);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, gprDestReg, (sint32)imm, 0, PPCREC_CR_MODE_LOGICAL);
}
bool PPCRecompilerImlGen_XOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
@ -2623,7 +2615,7 @@ void PPCRecompilerImlGen_ORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode
if( gprDestReg != gprSourceReg )
PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg);
// rA |= imm32
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_OR, gprDestReg, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_OR, gprDestReg, (sint32)imm, PPC_REC_INVALID_REGISTER, 0);
}
void PPCRecompilerImlGen_ORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
@ -2639,7 +2631,7 @@ void PPCRecompilerImlGen_ORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
if( gprDestReg != gprSourceReg )
PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg);
// rA |= imm32
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_OR, gprDestReg, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_OR, gprDestReg, (sint32)imm, PPC_REC_INVALID_REGISTER, 0);
}
void PPCRecompilerImlGen_XORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
@ -2655,7 +2647,7 @@ void PPCRecompilerImlGen_XORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
if( gprDestReg != gprSourceReg )
PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg);
// rA |= imm32
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm, PPC_REC_INVALID_REGISTER, 0);
}
void PPCRecompilerImlGen_XORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
@ -2671,7 +2663,7 @@ void PPCRecompilerImlGen_XORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
if( gprDestReg != gprSourceReg )
PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg);
// rA |= imm32
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm, PPC_REC_INVALID_REGISTER, 0);
}
bool PPCRecompilerImlGen_CROR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)

View File

@ -163,7 +163,7 @@ bool PPCRecompilerImlGen_LFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
// get memory gpr register index
uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
// add imm to memory register
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0);
// get fpr register index
uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD);
if( ppcImlGenContext->LSQE )
@ -258,7 +258,7 @@ bool PPCRecompilerImlGen_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
// get memory gpr register index
uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
// add imm to memory register
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0);
// get fpr register index
uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD);
// emit load iml
@ -326,7 +326,7 @@ bool PPCRecompilerImlGen_STFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
// get memory gpr register index
uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
// add imm to memory register
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0);
// get fpr register index
uint32 fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD);
@ -412,7 +412,7 @@ bool PPCRecompilerImlGen_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
// get memory gpr register index
uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
// add imm to memory register
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0);
// get fpr register index
uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD);
@ -1114,7 +1114,7 @@ bool PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
// get memory gpr register index
uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false);
// add imm to memory register
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0);
// get fpr register index
uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD);
// paired load
@ -1165,7 +1165,7 @@ bool PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext_t* ppcImlGenContext, uint32 op
// get memory gpr register index
uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false);
// add imm to memory register
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0);
PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0);
// get fpr register index
uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD);
// paired store

View File

@ -13,6 +13,7 @@
#include "util/crypto/aes128.h"
#include "Cafe/Filesystem/FST/FST.h"
#include "util/helpers/StringHelpers.h"
void requireConsole();
@ -74,7 +75,9 @@ bool LaunchSettings::HandleCommandline(const std::vector<std::wstring>& args)
po::options_description hidden{ "Hidden options" };
hidden.add_options()
("nsight", po::value<bool>()->implicit_value(true), "NSight debugging options")
("legacy", po::value<bool>()->implicit_value(true), "Intel legacy graphic mode");
("legacy", po::value<bool>()->implicit_value(true), "Intel legacy graphic mode")
("ppcrec-lower-addr", po::value<std::string>(), "For debugging: Lower address allowed for PPC recompilation")
("ppcrec-upper-addr", po::value<std::string>(), "For debugging: Upper address allowed for PPC recompilation");
po::options_description extractor{ "Extractor tool" };
extractor.add_options()
@ -186,6 +189,20 @@ bool LaunchSettings::HandleCommandline(const std::vector<std::wstring>& args)
if (vm.count("output"))
log_path = vm["output"].as<std::wstring>();
// recompiler range limit for debugging
if (vm.count("ppcrec-lower-addr"))
{
uint32 addr = (uint32)StringHelpers::ToInt64(vm["ppcrec-lower-addr"].as<std::string>());
ppcRec_limitLowerAddr = addr;
}
if (vm.count("ppcrec-upper-addr"))
{
uint32 addr = (uint32)StringHelpers::ToInt64(vm["ppcrec-upper-addr"].as<std::string>());
ppcRec_limitUpperAddr = addr;
}
if(ppcRec_limitLowerAddr != 0 && ppcRec_limitUpperAddr != 0)
cemuLog_log(LogType::Force, "PPCRec range limited to 0x{:08x}-0x{:08x}", ppcRec_limitLowerAddr, ppcRec_limitUpperAddr);
if(!extract_path.empty())
{
ExtractorTool(extract_path, output_path, log_path);

View File

@ -29,6 +29,9 @@ public:
static std::optional<uint32> GetPersistentId() { return s_persistent_id; }
static uint32 GetPPCRecLowerAddr() { return ppcRec_limitLowerAddr; };
static uint32 GetPPCRecUpperAddr() { return ppcRec_limitUpperAddr; };
private:
inline static std::optional<fs::path> s_load_game_file{};
inline static std::optional<uint64> s_load_title_id{};
@ -44,6 +47,10 @@ private:
inline static std::optional<uint32> s_persistent_id{};
// for recompiler debugging
inline static uint32 ppcRec_limitLowerAddr{};
inline static uint32 ppcRec_limitUpperAddr{};
static bool ExtractorTool(std::wstring_view wud_path, std::string_view output_path, std::wstring_view log_path);
};