PPCRec: Rework carry bit and generalize carry IML instructions

Carry bit is now resident in a register-allocated GPR instead of being backed directly into IML instructions

All the PowerPC carry ADD* and SUB* instructions as well as SRAW/SRAWI have been reworked to use more generalized IML instructions for handling carry

IML instructions now support two named output registers instead of only one (easily extendable to arbitrary count)
This commit is contained in:
Exzap 2022-12-27 05:20:47 +01:00
parent 8df0281baa
commit 37256ac589
16 changed files with 3894 additions and 958 deletions

View File

@ -67,7 +67,8 @@ struct PPCInterpreter_t
uint32 reservedMemValue;
// temporary storage for recompiler
FPR_t temporaryFPR[8];
uint32 temporaryGPR[4];
uint32 temporaryGPR[4]; // deprecated, refactor away backend dependency on this
uint32 temporaryGPR_reg[4];
// values below this are not used by Cafe OS usermode
struct
{

View File

@ -23,6 +23,11 @@ static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId)
return (x86Assembler64::GPR32)regId;
}
static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId)
{
return (x86Assembler64::GPR8_REX)regId;
}
X86Cond _x86Cond(IMLCondition imlCond)
{
@ -32,6 +37,10 @@ X86Cond _x86Cond(IMLCondition imlCond)
return X86_CONDITION_Z;
case IMLCondition::NEQ:
return X86_CONDITION_NZ;
case IMLCondition::UNSIGNED_GT:
return X86_CONDITION_NBE;
case IMLCondition::UNSIGNED_LT:
return X86_CONDITION_B;
default:
break;
}
@ -758,56 +767,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
else
assert_dbg();
}
else if( imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
// copy operand to result if different registers
if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA )
{
x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
}
// copy xer_ca to eflags carry
x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0);
// add carry bit
x64Gen_adc_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, 0);
// update xer carry
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca));
}
else if( imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_ME )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
// copy operand to result if different registers
if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA )
{
x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
}
// copy xer_ca to eflags carry
x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0);
// add carry bit
x64Gen_adc_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, (uint32)-1);
// update xer carry
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca));
}
else if( imlInstruction->operation == PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// registerResult = ~registerOperand1 + carry
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
sint32 rRegResult = imlInstruction->op_r_r.registerResult;
sint32 rRegOperand1 = imlInstruction->op_r_r.registerA;
// copy operand to result register
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1);
// execute NOT on result
x64Gen_not_reg64Low32(x64GenContext, rRegResult);
// copy xer_ca to eflags carry
x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0);
// add carry
x64Gen_adc_reg64Low32_imm32(x64GenContext, rRegResult, 0);
// update carry
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca));
}
else if( imlInstruction->operation == PPCREC_IML_OP_DCBZ )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
@ -1043,56 +1002,26 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
if( imlInstruction->operation == PPCREC_IML_OP_ADD || imlInstruction->operation == PPCREC_IML_OP_ADD_UPDATE_CARRY || imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY )
if( imlInstruction->operation == PPCREC_IML_OP_ADD)
{
// registerResult = registerOperand1 + registerOperand2
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
sint32 rRegResult = imlInstruction->op_r_r_r.registerResult;
sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA;
sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB;
bool addCarry = imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY;
if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) )
{
// be careful not to overwrite the operand before we use it
if( rRegResult == rRegOperand1 )
{
if( addCarry )
{
x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0);
x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
}
else
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
}
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
else
{
if( addCarry )
{
x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0);
x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1);
}
else
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1);
}
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1);
}
else
{
// copy operand1 to destination register before doing addition
x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1);
// add operand2
if( addCarry )
{
x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0);
x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
}
else
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
}
// update carry
if( imlInstruction->operation == PPCREC_IML_OP_ADD_UPDATE_CARRY || imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY )
{
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca));
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
}
}
else if( imlInstruction->operation == PPCREC_IML_OP_SUB )
@ -1128,52 +1057,25 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
}
}
else if( imlInstruction->operation == PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY )
else if (imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR)
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// registerResult = registerOperand1 - registerOperand2 + carry
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
sint32 rRegResult = imlInstruction->op_r_r_r.registerResult;
sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA;
sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB;
if( rRegOperand1 == rRegOperand2 )
{
// copy xer_ca to eflags carry
x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0);
x64Gen_cmc(x64GenContext);
// result = operand1 - operand1 -> 0
x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult);
}
else if( rRegResult == rRegOperand1 )
{
// copy inverted xer_ca to eflags carry
x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0);
x64Gen_cmc(x64GenContext);
// result = result - operand2
x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
}
else if ( rRegResult == rRegOperand2 )
{
// result = operand1 - result
// NOT result
x64Gen_not_reg64Low32(x64GenContext, rRegResult);
// copy xer_ca to eflags carry
x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0);
// ADC result, operand1
x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1);
}
sint32 rRegA = imlInstruction->op_r_r_r.registerA;
sint32 rRegB = imlInstruction->op_r_r_r.registerB;
if (rRegResult == rRegB)
std::swap(rRegA, rRegB);
if (rRegResult != rRegA)
x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegA);
if (imlInstruction->operation == PPCREC_IML_OP_OR)
x64Gen_or_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegB);
else if (imlInstruction->operation == PPCREC_IML_OP_AND)
x64Gen_and_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegB);
else
{
// copy operand1 to destination register before doing addition
x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1);
// copy xer_ca to eflags carry
x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0);
x64Gen_cmc(x64GenContext);
// sub operand2
x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
}
// update carry flag (todo: is this actually correct in all cases?)
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca));
x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegB);
}
else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED )
{
@ -1198,79 +1100,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
}
}
else if( imlInstruction->operation == PPCREC_IML_OP_SUBFC )
{
// registerResult = registerOperand2(rB) - registerOperand1(rA)
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
// updates carry flag
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
{
return false;
}
sint32 rRegResult = imlInstruction->op_r_r_r.registerResult;
sint32 rRegOperandA = imlInstruction->op_r_r_r.registerA;
sint32 rRegOperandB = imlInstruction->op_r_r_r.registerB;
// update carry flag
// carry flag is detected this way:
//if ((~a+b) < a) {
// return true;
//}
//if ((~a+b+1) < 1) {
// return true;
//}
// set carry to zero
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0);
// ((~a+b)<~a) == true -> ca = 1
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperandA);
x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP);
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperandB);
x64Gen_not_reg64Low32(x64GenContext, rRegOperandA);
x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperandA);
x64Gen_not_reg64Low32(x64GenContext, rRegOperandA);
sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0);
// reset carry flag + jump destination afterwards
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
// OR ((~a+b+1)<1) == true -> ca = 1
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperandA);
// todo: Optimize by reusing result in REG_RESV_TEMP from above and only add 1
x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP);
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperandB);
x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1);
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1);
sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0);
// reset carry flag + jump destination afterwards
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
// do subtraction
if( rRegOperandB == rRegOperandA )
{
// result = operandA - operandA -> 0
x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult);
}
else if( rRegResult == rRegOperandB )
{
// result = result - operandA
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperandA);
}
else if ( rRegResult == rRegOperandA )
{
// result = operandB - result
// NEG result
x64Gen_neg_reg64Low32(x64GenContext, rRegResult);
// ADD result, operandB
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperandB);
}
else
{
// copy operand1 to destination register before doing addition
x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperandB);
// sub operand2
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperandA);
}
}
else if( imlInstruction->operation == PPCREC_IML_OP_SLW || imlInstruction->operation == PPCREC_IML_OP_SRW )
{
// registerResult = registerOperand1(rA) >> registerOperand2(rB) (up to 63 bits)
@ -1351,78 +1180,88 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP);
}
}
else if( imlInstruction->operation == PPCREC_IML_OP_SRAW )
else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S ||
imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U ||
imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
{
// registerResult = (sint32)registerOperand1(rA) >> (sint32)registerOperand2(rB) (up to 63 bits)
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
// x86's shift and rotate instruction have the shift amount hardwired to the CL register
// since our register allocator doesn't support instruction based fixed phys registers yet
// we'll instead have to temporarily shuffle registers around
sint32 rRegResult = imlInstruction->op_r_r_r.registerResult;
sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA;
sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB;
// save cr
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
// we use BMI2's shift instructions until the RA can assign fixed registers
if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
{
return false;
x64Gen_sarx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
}
// todo: Use BMI instructions if available?
// MOV registerResult, registerOperand (if different)
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand1);
// reset carry
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0);
// we use the same shift by register approach as in SLW/SRW, but we have to differentiate by signed/unsigned shift since it influences how the carry flag is set
x64Gen_test_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 0x80000000);
sint32 jumpInstructionJumpToSignedShift = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NOT_EQUAL, 0);
// unsigned shift (MSB of input register is not set)
for(sint32 b=0; b<6; b++)
else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
{
x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1<<b));
sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set
if( b == 5 )
{
x64Gen_sar_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<<b)/2);
x64Gen_sar_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<<b)/2);
}
else
{
x64Gen_sar_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<<b));
}
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex());
x64Gen_shrx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
}
sint32 jumpInstructionJumpToEnd = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NONE, 0);
// signed shift
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToSignedShift, x64GenContext->emitter->GetWriteIndex());
for(sint32 b=0; b<6; b++)
else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
{
// check if we need to shift by (1<<bit)
x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1<<b));
sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set
// set ca if any non-zero bit is shifted out
x64Gen_test_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (1<<(1<<b))-1);
sint32 jumpInstructionJumpToAfterCa = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if no bit is set
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToAfterCa, x64GenContext->emitter->GetWriteIndex());
// arithmetic shift
if( b == 5 )
{
// copy sign bit into all bits
x64Gen_sar_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<<b)/2);
x64Gen_sar_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<<b)/2);
}
else
{
x64Gen_sar_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<<b));
}
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex());
x64Gen_shlx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
}
// end
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToEnd, x64GenContext->emitter->GetWriteIndex());
x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP);
// update CR if requested
// todo
//auto rResult = _reg32(rRegResult);
//auto rOp2 = _reg8_from_reg32(_reg32(rRegOperand2));
//if (rRegResult == rRegOperand2)
//{
// if (rRegResult != rRegOperand1)
// __debugbreak(); // cannot handle yet (we use rRegResult as a temporary reg, but its not possible if it is shared with op2)
//}
//if(rRegOperand1 != rRegResult)
// x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1);
//cemu_assert_debug(rRegOperand1 != X86_REG_ECX);
//if (rRegOperand2 == X86_REG_ECX)
//{
// if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
// x64GenContext->emitter->SAR_d_CL(rResult);
// else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
// x64GenContext->emitter->SHR_d_CL(rResult);
// else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
// x64GenContext->emitter->SHL_d_CL(rResult);
// else
// cemu_assert_unimplemented();
//}
//else
//{
// auto rRegResultOrg = rRegResult;
// if (rRegResult == X86_REG_ECX)
// {
// x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegResult);
// rRegResult = REG_RESV_TEMP;
// rResult = _reg32(rRegResult);
// }
//
// x64Gen_xchg_reg64_reg64(x64GenContext, X86_REG_RCX, rRegOperand2);
//
// if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
// x64GenContext->emitter->SAR_d_CL(rResult);
// else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
// x64GenContext->emitter->SHR_d_CL(rResult);
// else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
// x64GenContext->emitter->SHL_d_CL(rResult);
// else
// cemu_assert_unimplemented();
// x64Gen_xchg_reg64_reg64(x64GenContext, X86_REG_RCX, rRegOperand2);
// // move result back if it was in ECX
// if (rRegResultOrg == X86_REG_ECX)
// {
// x64Gen_mov_reg64_reg64(x64GenContext, rRegResultOrg, REG_RESV_TEMP);
// }
//}
}
else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED )
{
@ -1520,6 +1359,44 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
return true;
}
bool PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
auto regR = _reg32(imlInstruction->op_r_r_r_carry.regR);
auto regA = _reg32(imlInstruction->op_r_r_r_carry.regA);
auto regB = _reg32(imlInstruction->op_r_r_r_carry.regB);
auto regCarry = _reg32(imlInstruction->op_r_r_r_carry.regCarry);
cemu_assert_debug(regCarry != regR && regCarry != regA);
switch (imlInstruction->operation)
{
case PPCREC_IML_OP_ADD:
if (regB == regR)
std::swap(regB, regA);
if (regR != regA)
x64GenContext->emitter->MOV_dd(regR, regA);
x64GenContext->emitter->XOR_dd(regCarry, regCarry);
x64GenContext->emitter->ADD_dd(regR, regB);
x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry)); // below condition checks carry flag
break;
case PPCREC_IML_OP_ADD_WITH_CARRY:
// assumes that carry is already correctly initialized as 0 or 1
if (regB == regR)
std::swap(regB, regA);
if (regR != regA)
x64GenContext->emitter->MOV_dd(regR, regA);
x64GenContext->emitter->BT_du8(regCarry, 0); // copy carry register to x86 carry flag
x64GenContext->emitter->ADC_dd(regR, regB);
x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry));
break;
default:
cemu_assert_unimplemented();
return false;
}
return true;
}
bool PPCRecompilerX64Gen_imlInstruction_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
@ -1557,6 +1434,14 @@ bool PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction_t* PPCRecFunction,
return true;
}
bool PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment)
{
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
x64GenContext->emitter->JMP_j32(0);
return true;
}
bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
@ -1584,65 +1469,20 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand);
x64Gen_sub_reg64Low32_imm32(x64GenContext, regResult, immS32);
}
else if( imlInstruction->operation == PPCREC_IML_OP_ADD_UPDATE_CARRY )
else if (imlInstruction->operation == PPCREC_IML_OP_AND ||
imlInstruction->operation == PPCREC_IML_OP_OR ||
imlInstruction->operation == PPCREC_IML_OP_XOR)
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// registerResult = registerOperand + immS32
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult;
sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA;
uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32;
if( rRegResult != rRegOperand )
{
// copy value to destination register before doing addition
x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand);
}
x64Gen_add_reg64Low32_imm32(x64GenContext, rRegResult, (uint32)immU32);
// update carry flag
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca));
}
else if( imlInstruction->operation == PPCREC_IML_OP_SUBFC )
{
// registerResult = immS32 - registerOperand
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult;
sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA;
sint32 immS32 = (sint32)imlInstruction->op_r_r_s32.immS32;
if( rRegResult != rRegOperand )
{
// copy value to destination register before doing addition
x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand);
}
// set carry to zero
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0);
// ((~a+b)<~a) == true -> ca = 1
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand);
x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP);
x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)immS32);
x64Gen_not_reg64Low32(x64GenContext, rRegOperand);
x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperand);
x64Gen_not_reg64Low32(x64GenContext, rRegOperand);
sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0);
// reset carry flag + jump destination afterwards
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
// OR ((~a+b+1)<1) == true -> ca = 1
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand);
// todo: Optimize by reusing result in REG_RESV_TEMP from above and only add 1
x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP);
x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)immS32);
x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1);
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1);
sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0);
// reset carry flag + jump destination afterwards
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
// do actual computation of value, note: a - b is equivalent to a + ~b + 1
x64Gen_not_reg64Low32(x64GenContext, rRegResult);
x64Gen_add_reg64Low32_imm32(x64GenContext, rRegResult, (uint32)immS32 + 1);
if (regResult != regOperand)
x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand);
if (imlInstruction->operation == PPCREC_IML_OP_AND)
x64Gen_and_reg64Low32_imm32(x64GenContext, regResult, immS32);
else if (imlInstruction->operation == PPCREC_IML_OP_OR)
x64Gen_or_reg64Low32_imm32(x64GenContext, regResult, immS32);
else // XOR
x64Gen_xor_reg64Low32_imm32(x64GenContext, regResult, immS32);
}
else if( imlInstruction->operation == PPCREC_IML_OP_RLWIMI )
{
@ -1679,47 +1519,20 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand);
x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, REG_RESV_TEMP);
}
else if( imlInstruction->operation == PPCREC_IML_OP_SRAW )
{
// registerResult = registerOperand>>SH and set xer ca flag
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
uint32 sh = (uint32)imlInstruction->op_r_r_s32.immS32;
// MOV registerResult, registerOperand (if different)
if( imlInstruction->op_r_r_s32.registerA != imlInstruction->op_r_r_s32.registerResult )
x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerA);
// todo: Detect if we don't need to update carry
// generic case
// TEST registerResult, (1<<(SH+1))-1
uint32 caTestMask = 0;
if (sh >= 31)
caTestMask = 0x7FFFFFFF;
else
caTestMask = (1 << (sh)) - 1;
x64Gen_test_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, caTestMask);
// SETNE/NZ [ESP+XER_CA]
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca));
// SAR registerResult, SH
x64Gen_sar_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, sh);
// JNS <skipInstruction> (if sign not set)
sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGN, 0); // todo: Can use 2-byte form of jump instruction here
// MOV BYTE [ESP+xer_ca], 0
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0);
// jump destination
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex());
}
else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT ||
imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT )
else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT ||
imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U ||
imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
{
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
// MOV registerResult, registerOperand (if different)
if( imlInstruction->op_r_r_s32.registerA != imlInstruction->op_r_r_s32.registerResult )
x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerA);
// Shift
if( imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT )
if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
x64Gen_shl_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.immS32);
else
else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
x64Gen_shr_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.immS32);
else // RIGHT_SHIFT_S
x64Gen_sar_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.immS32);
}
else
{
@ -1729,6 +1542,40 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
return true;
}
bool PPCRecompilerX64Gen_imlInstruction_r_r_s32_carry(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
auto regR = _reg32(imlInstruction->op_r_r_s32_carry.regR);
auto regA = _reg32(imlInstruction->op_r_r_s32_carry.regA);
sint32 immS32 = imlInstruction->op_r_r_s32_carry.immS32;
auto regCarry = _reg32(imlInstruction->op_r_r_s32_carry.regCarry);
cemu_assert_debug(regCarry != regR && regCarry != regA);
switch (imlInstruction->operation)
{
case PPCREC_IML_OP_ADD:
x64GenContext->emitter->XOR_dd(regCarry, regCarry);
if (regR != regA)
x64GenContext->emitter->MOV_dd(regR, regA);
x64GenContext->emitter->ADD_di32(regR, immS32);
x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry));
break;
case PPCREC_IML_OP_ADD_WITH_CARRY:
// assumes that carry is already correctly initialized as 0 or 1
if (regR != regA)
x64GenContext->emitter->MOV_dd(regR, regA);
x64GenContext->emitter->BT_du8(regCarry, 0); // copy carry register to x86 carry flag
x64GenContext->emitter->ADC_di32(regR, immS32);
x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry));
break;
default:
cemu_assert_unimplemented();
return false;
}
return true;
}
bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLSegment* imlSegment, IMLInstruction* imlInstruction)
{
if( imlInstruction->op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE )
@ -1925,7 +1772,11 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction,
}
else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
{
x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY));
x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY));
}
else if (name == PPCREC_NAME_XER_CA)
{
x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca));
}
else
assert_dbg();
@ -1957,7 +1808,11 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction,
}
else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
{
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), imlInstruction->op_r_name.registerIndex);
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), imlInstruction->op_r_name.registerIndex);
}
else if (name == PPCREC_NAME_XER_CA)
{
x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.registerIndex)));
}
else
assert_dbg();
@ -2016,37 +1871,37 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
else if( imlInstruction->type == PPCREC_IML_TYPE_R_R )
{
if( PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false )
{
codeGenerationFailed = true;
}
}
else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32)
{
if (PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
{
codeGenerationFailed = true;
}
}
else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32)
{
if (PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
{
codeGenerationFailed = true;
}
}
else if( imlInstruction->type == PPCREC_IML_TYPE_R_R_S32 )
else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32)
{
if( PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false )
{
if (PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
codeGenerationFailed = true;
}
else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32_CARRY)
{
if (PPCRecompilerX64Gen_imlInstruction_r_r_s32_carry(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
codeGenerationFailed = true;
}
}
else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R)
{
if (PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
{
codeGenerationFailed = true;
}
}
else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R_CARRY)
{
if (PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
codeGenerationFailed = true;
}
else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE)
{
@ -2063,6 +1918,13 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
codeGenerationFailed = true;
}
}
else if (imlInstruction->type == PPCREC_IML_TYPE_JUMP)
{
if (PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false)
{
codeGenerationFailed = true;
}
}
else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP )
{
if( PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction, ppcImlGenContext, &x64GenContext, segIt, imlInstruction) == false )

View File

@ -33,43 +33,6 @@ struct x64GenContext_t
std::vector<x64RelocEntry_t> relocateOffsetTable2;
};
// todo - these definitions are part of the x86_64 emitter. Not the backend itself. We should move them eventually
//#define X86_REG_EAX 0
//#define X86_REG_ECX 1
//#define X86_REG_EDX 2
//#define X86_REG_EBX 3
//#define X86_REG_ESP 4 // reserved for low half of hCPU pointer
//#define X86_REG_EBP 5
//#define X86_REG_ESI 6
//#define X86_REG_EDI 7
//#define X86_REG_NONE -1
//
//#define X86_REG_RAX 0
//#define X86_REG_RCX 1
//#define X86_REG_RDX 2
//#define X86_REG_RBX 3
//#define X86_REG_RSP 4 // reserved for hCPU pointer
//#define X86_REG_RBP 5
//#define X86_REG_RSI 6
//#define X86_REG_RDI 7
//#define X86_REG_R8 8
//#define X86_REG_R9 9
//#define X86_REG_R10 10
//#define X86_REG_R11 11
//#define X86_REG_R12 12
//#define X86_REG_R13 13 // reserved to hold pointer to memory base? (Not decided yet)
//#define X86_REG_R14 14 // reserved as temporary register
//#define X86_REG_R15 15 // reserved for pointer to ppcRecompilerInstanceData
//
//#define X86_REG_AL 0
//#define X86_REG_CL 1
//#define X86_REG_DL 2
//#define X86_REG_BL 3
//#define X86_REG_AH 4 -> Adressable via non-REX only
//#define X86_REG_CH 5
//#define X86_REG_DH 6
//#define X86_REG_BH 7
// reserved registers
#define REG_RESV_TEMP (X86_REG_R14)
#define REG_RESV_HCPU (X86_REG_RSP)
@ -79,8 +42,7 @@ struct x64GenContext_t
// reserved floating-point registers
#define REG_RESV_FPR_TEMP (15)
#define reg32ToReg16(__x) (__x)
#define reg32ToReg16(__x) (__x) // deprecated
// deprecated condition flags
enum
@ -308,4 +270,8 @@ void x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext_t* x64G
void x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister);
void x64Gen_shrx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
void x64Gen_shrx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
void x64Gen_sarx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
void x64Gen_sarx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);
void x64Gen_shlx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB);

View File

@ -68,6 +68,34 @@ void x64Gen_shrx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 regist
x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
}
void x64Gen_shrx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
{
x64Gen_writeU8(x64GenContext, 0xC4);
x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0));
x64Gen_writeU8(x64GenContext, 0x7B - registerB * 8);
x64Gen_writeU8(x64GenContext, 0xF7);
x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
}
void x64Gen_sarx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
{
// SARX reg64, reg64, reg64
x64Gen_writeU8(x64GenContext, 0xC4);
x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0));
x64Gen_writeU8(x64GenContext, 0xFA - registerB * 8);
x64Gen_writeU8(x64GenContext, 0xF7);
x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
}
void x64Gen_sarx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
{
x64Gen_writeU8(x64GenContext, 0xC4);
x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0));
x64Gen_writeU8(x64GenContext, 0x7A - registerB * 8);
x64Gen_writeU8(x64GenContext, 0xF7);
x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
}
void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
{
// SHLX reg64, reg64, reg64
@ -76,4 +104,13 @@ void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 regist
x64Gen_writeU8(x64GenContext, 0xF9 - registerB * 8);
x64Gen_writeU8(x64GenContext, 0xF7);
x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
}
void x64Gen_shlx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB)
{
x64Gen_writeU8(x64GenContext, 0xC4);
x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0));
x64Gen_writeU8(x64GenContext, 0x79 - registerB * 8);
x64Gen_writeU8(x64GenContext, 0xF7);
x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7));
}

View File

@ -623,11 +623,11 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
}
x64Gen_movsd_memReg64_xmmReg(x64GenContext, realRegisterXMM, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR));
// store double low part
// store double low part
x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+0);
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+4, REG_RESV_TEMP);
// store double high part
// store double high part
x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+4);
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+0, REG_RESV_TEMP);

File diff suppressed because it is too large Load Diff

View File

@ -5,10 +5,12 @@
#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h"
/*
* Initializes a single segment and returns true if it is a finite loop
* Analyzes a single segment and returns true if it is a finite loop
*/
bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment)
{
return false; // !!! DISABLED !!!
bool isTightFiniteLoop = false;
// base criteria, must jump to beginning of same segment
if (imlSegment->nextSegmentBranchTaken != imlSegment)
@ -42,9 +44,7 @@ bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment)
if (instIt.type == PPCREC_IML_TYPE_R_S32 && (instIt.operation == PPCREC_IML_OP_ADD || instIt.operation == PPCREC_IML_OP_SUB))
continue;
instIt.CheckRegisterUsage(&registersUsed);
if(registersUsed.writtenNamedReg1 < 0)
continue;
list_modifiedRegisters.remove(registersUsed.writtenNamedReg1);
registersUsed.ForEachWrittenGPR([&](IMLReg r) { list_modifiedRegisters.remove(r); });
}
if (list_modifiedRegisters.count > 0)
{
@ -63,10 +63,6 @@ bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction)
return true;
if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R)
return true;
if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE || imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32)
return true; // ??
if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
return true; // ??
if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32)
return true;
if (imlInstruction->type == PPCREC_IML_TYPE_R_S32)
@ -79,6 +75,18 @@ bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction)
return true;
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R)
return true;
// new instructions
if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE || imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32)
return true;
if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
return true;
if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R_CARRY)
return true;
if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32_CARRY)
return true;
return false;
}

View File

@ -14,10 +14,10 @@ const char* IMLDebug_GetOpcodeName(const IMLInstruction* iml)
return "MOV";
else if (op == PPCREC_IML_OP_ADD)
return "ADD";
else if (op == PPCREC_IML_OP_ADD_WITH_CARRY)
return "ADC";
else if (op == PPCREC_IML_OP_SUB)
return "SUB";
else if (op == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY)
return "ADDCSC";
else if (op == PPCREC_IML_OP_OR)
return "OR";
else if (op == PPCREC_IML_OP_AND)
@ -26,8 +26,12 @@ const char* IMLDebug_GetOpcodeName(const IMLInstruction* iml)
return "XOR";
else if (op == PPCREC_IML_OP_LEFT_SHIFT)
return "LSH";
else if (op == PPCREC_IML_OP_RIGHT_SHIFT)
else if (op == PPCREC_IML_OP_RIGHT_SHIFT_U)
return "RSH";
else if (op == PPCREC_IML_OP_RIGHT_SHIFT_S)
return "ARSH";
else if (op == PPCREC_IML_OP_LEFT_ROTATE)
return "LROT";
else if (op == PPCREC_IML_OP_MULTIPLY_SIGNED)
return "MULS";
else if (op == PPCREC_IML_OP_DIVIDE_SIGNED)
@ -129,6 +133,14 @@ std::string IMLDebug_GetConditionName(IMLCondition cond)
return "EQ";
case IMLCondition::NEQ:
return "NEQ";
case IMLCondition::UNSIGNED_GT:
return "UGT";
case IMLCondition::UNSIGNED_LT:
return "ULT";
case IMLCondition::SIGNED_GT:
return "SGT";
case IMLCondition::SIGNED_LT:
return "SLT";
default:
cemu_assert_unimplemented();
}
@ -224,6 +236,16 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
strOutput.addFmt(" -> CR{}", inst.crRegister);
}
}
else if (inst.type == PPCREC_IML_TYPE_R_R_R_CARRY)
{
strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
while ((sint32)strOutput.getLen() < lineOffsetParameters)
strOutput.add(" ");
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regR);
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regA);
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regB);
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regCarry, true);
}
else if (inst.type == PPCREC_IML_TYPE_COMPARE)
{
strOutput.add("CMP ");
@ -270,6 +292,17 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
strOutput.addFmt(" -> CR{}", inst.crRegister);
}
}
else if (inst.type == PPCREC_IML_TYPE_R_R_S32_CARRY)
{
strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
while ((sint32)strOutput.getLen() < lineOffsetParameters)
strOutput.add(" ");
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regR);
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regA);
IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32_carry.immS32);
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regCarry, true);
}
else if (inst.type == PPCREC_IML_TYPE_R_S32)
{
strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));

View File

@ -10,6 +10,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
registersUsed->readNamedReg2 = -1;
registersUsed->readNamedReg3 = -1;
registersUsed->writtenNamedReg1 = -1;
registersUsed->writtenNamedReg2 = -1;
registersUsed->readFPR1 = -1;
registersUsed->readFPR2 = -1;
registersUsed->readFPR3 = -1;
@ -34,10 +35,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
else if (
operation == PPCREC_IML_OP_OR ||
operation == PPCREC_IML_OP_AND ||
operation == PPCREC_IML_OP_XOR ||
operation == PPCREC_IML_OP_ADD_CARRY || // r_r carry stuff is deprecated
operation == PPCREC_IML_OP_ADD_CARRY_ME ||
operation == PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY)
operation == PPCREC_IML_OP_XOR)
{
// result is read and written, operand is read
registersUsed->writtenNamedReg1 = op_r_r.registerResult;
@ -112,6 +110,24 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
registersUsed->readNamedReg1 = op_r_r_s32.registerA;
}
}
else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY)
{
registersUsed->writtenNamedReg1 = op_r_r_s32_carry.regR;
registersUsed->readNamedReg1 = op_r_r_s32_carry.regA;
// some operations read carry
switch (operation)
{
case PPCREC_IML_OP_ADD_WITH_CARRY:
registersUsed->readNamedReg2 = op_r_r_s32_carry.regCarry;
break;
case PPCREC_IML_OP_ADD:
break;
default:
cemu_assert_unimplemented();
}
// carry is always written
registersUsed->writtenNamedReg2 = op_r_r_s32_carry.regCarry;
}
else if (type == PPCREC_IML_TYPE_R_R_R)
{
// in all cases result is written and other operands are read only
@ -119,6 +135,25 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
registersUsed->readNamedReg1 = op_r_r_r.registerA;
registersUsed->readNamedReg2 = op_r_r_r.registerB;
}
else if (type == PPCREC_IML_TYPE_R_R_R_CARRY)
{
registersUsed->writtenNamedReg1 = op_r_r_r_carry.regR;
registersUsed->readNamedReg1 = op_r_r_r_carry.regA;
registersUsed->readNamedReg2 = op_r_r_r_carry.regB;
// some operations read carry
switch (operation)
{
case PPCREC_IML_OP_ADD_WITH_CARRY:
registersUsed->readNamedReg3 = op_r_r_r_carry.regCarry;
break;
case PPCREC_IML_OP_ADD:
break;
default:
cemu_assert_unimplemented();
}
// carry is always written
registersUsed->writtenNamedReg2 = op_r_r_r_carry.regCarry;
}
else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
{
// no effect on registers
@ -155,6 +190,10 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
{
registersUsed->readNamedReg1 = op_conditionalJump2.registerBool;
}
else if (type == PPCREC_IML_TYPE_JUMP)
{
// no registers affected
}
else if (type == PPCREC_IML_TYPE_LOAD)
{
registersUsed->writtenNamedReg1 = op_storeLoad.registerData;
@ -215,6 +254,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0:
// PS1 remains the same
registersUsed->readFPR4 = op_storeLoad.registerData;
cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER);
break;
case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1:
case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1:
@ -227,6 +267,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1:
case PPCREC_FPR_LD_MODE_PSQ_U8_PS0:
case PPCREC_FPR_LD_MODE_PSQ_S8_PS0:
cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER);
break;
default:
cemu_assert_unimplemented();
@ -251,6 +292,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
break;
case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0:
// PS1 remains the same
cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER);
registersUsed->readFPR4 = op_storeLoad.registerData;
break;
case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1:
@ -263,6 +305,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
case PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1:
case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1:
case PPCREC_FPR_LD_MODE_PSQ_U8_PS0:
cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER);
break;
default:
cemu_assert_unimplemented();
@ -283,6 +326,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
registersUsed->readNamedReg2 = op_storeLoad.registerGQR;
break;
default:
cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER);
break;
}
}
@ -304,6 +348,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
registersUsed->readNamedReg3 = op_storeLoad.registerGQR;
break;
default:
cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER);
break;
}
}
@ -430,8 +475,16 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
#define replaceRegister(__x,__r,__n) (((__x)==(__r))?(__n):(__x))
sint32 replaceRegisterMultiple(sint32 reg, const std::unordered_map<IMLReg, IMLReg>& translationTable)
{
const auto& it = translationTable.find(reg);
cemu_assert_debug(it != translationTable.cend());
return it->second;
}
sint32 replaceRegisterMultiple(sint32 reg, sint32 match[4], sint32 replaced[4])
{
// deprecated but still used for FPRs
for (sint32 i = 0; i < 4; i++)
{
if (match[i] < 0)
@ -444,56 +497,70 @@ sint32 replaceRegisterMultiple(sint32 reg, sint32 match[4], sint32 replaced[4])
return reg;
}
void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4])
//void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4])
void IMLInstruction::RewriteGPR(const std::unordered_map<IMLReg, IMLReg>& translationTable)
{
if (type == PPCREC_IML_TYPE_R_NAME)
{
op_r_name.registerIndex = replaceRegisterMultiple(op_r_name.registerIndex, gprRegisterSearched, gprRegisterReplaced);
op_r_name.registerIndex = replaceRegisterMultiple(op_r_name.registerIndex, translationTable);
}
else if (type == PPCREC_IML_TYPE_NAME_R)
{
op_r_name.registerIndex = replaceRegisterMultiple(op_r_name.registerIndex, gprRegisterSearched, gprRegisterReplaced);
op_r_name.registerIndex = replaceRegisterMultiple(op_r_name.registerIndex, translationTable);
}
else if (type == PPCREC_IML_TYPE_R_R)
{
op_r_r.registerResult = replaceRegisterMultiple(op_r_r.registerResult, gprRegisterSearched, gprRegisterReplaced);
op_r_r.registerA = replaceRegisterMultiple(op_r_r.registerA, gprRegisterSearched, gprRegisterReplaced);
op_r_r.registerResult = replaceRegisterMultiple(op_r_r.registerResult, translationTable);
op_r_r.registerA = replaceRegisterMultiple(op_r_r.registerA, translationTable);
}
else if (type == PPCREC_IML_TYPE_R_S32)
{
op_r_immS32.registerIndex = replaceRegisterMultiple(op_r_immS32.registerIndex, gprRegisterSearched, gprRegisterReplaced);
op_r_immS32.registerIndex = replaceRegisterMultiple(op_r_immS32.registerIndex, translationTable);
}
else if (type == PPCREC_IML_TYPE_CONDITIONAL_R_S32)
{
op_conditional_r_s32.registerIndex = replaceRegisterMultiple(op_conditional_r_s32.registerIndex, gprRegisterSearched, gprRegisterReplaced);
op_conditional_r_s32.registerIndex = replaceRegisterMultiple(op_conditional_r_s32.registerIndex, translationTable);
}
else if (type == PPCREC_IML_TYPE_R_R_S32)
{
op_r_r_s32.registerResult = replaceRegisterMultiple(op_r_r_s32.registerResult, gprRegisterSearched, gprRegisterReplaced);
op_r_r_s32.registerA = replaceRegisterMultiple(op_r_r_s32.registerA, gprRegisterSearched, gprRegisterReplaced);
op_r_r_s32.registerResult = replaceRegisterMultiple(op_r_r_s32.registerResult, translationTable);
op_r_r_s32.registerA = replaceRegisterMultiple(op_r_r_s32.registerA, translationTable);
}
else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY)
{
op_r_r_s32_carry.regR = replaceRegisterMultiple(op_r_r_s32_carry.regR, translationTable);
op_r_r_s32_carry.regA = replaceRegisterMultiple(op_r_r_s32_carry.regA, translationTable);
op_r_r_s32_carry.regCarry = replaceRegisterMultiple(op_r_r_s32_carry.regCarry, translationTable);
}
else if (type == PPCREC_IML_TYPE_R_R_R)
{
op_r_r_r.registerResult = replaceRegisterMultiple(op_r_r_r.registerResult, gprRegisterSearched, gprRegisterReplaced);
op_r_r_r.registerA = replaceRegisterMultiple(op_r_r_r.registerA, gprRegisterSearched, gprRegisterReplaced);
op_r_r_r.registerB = replaceRegisterMultiple(op_r_r_r.registerB, gprRegisterSearched, gprRegisterReplaced);
op_r_r_r.registerResult = replaceRegisterMultiple(op_r_r_r.registerResult, translationTable);
op_r_r_r.registerA = replaceRegisterMultiple(op_r_r_r.registerA, translationTable);
op_r_r_r.registerB = replaceRegisterMultiple(op_r_r_r.registerB, translationTable);
}
else if (type == PPCREC_IML_TYPE_R_R_R_CARRY)
{
op_r_r_r_carry.regR = replaceRegisterMultiple(op_r_r_r_carry.regR, translationTable);
op_r_r_r_carry.regA = replaceRegisterMultiple(op_r_r_r_carry.regA, translationTable);
op_r_r_r_carry.regB = replaceRegisterMultiple(op_r_r_r_carry.regB, translationTable);
op_r_r_r_carry.regCarry = replaceRegisterMultiple(op_r_r_r_carry.regCarry, translationTable);
}
else if (type == PPCREC_IML_TYPE_COMPARE)
{
op_compare.registerResult = replaceRegisterMultiple(op_compare.registerResult, gprRegisterSearched, gprRegisterReplaced);
op_compare.registerOperandA = replaceRegisterMultiple(op_compare.registerOperandA, gprRegisterSearched, gprRegisterReplaced);
op_compare.registerOperandB = replaceRegisterMultiple(op_compare.registerOperandB, gprRegisterSearched, gprRegisterReplaced);
op_compare.registerResult = replaceRegisterMultiple(op_compare.registerResult, translationTable);
op_compare.registerOperandA = replaceRegisterMultiple(op_compare.registerOperandA, translationTable);
op_compare.registerOperandB = replaceRegisterMultiple(op_compare.registerOperandB, translationTable);
}
else if (type == PPCREC_IML_TYPE_COMPARE_S32)
{
op_compare_s32.registerResult = replaceRegisterMultiple(op_compare_s32.registerResult, gprRegisterSearched, gprRegisterReplaced);
op_compare_s32.registerOperandA = replaceRegisterMultiple(op_compare_s32.registerOperandA, gprRegisterSearched, gprRegisterReplaced);
op_compare_s32.registerResult = replaceRegisterMultiple(op_compare_s32.registerResult, translationTable);
op_compare_s32.registerOperandA = replaceRegisterMultiple(op_compare_s32.registerOperandA, translationTable);
}
else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
{
op_conditionalJump2.registerBool = replaceRegisterMultiple(op_conditionalJump2.registerBool, gprRegisterSearched, gprRegisterReplaced);
op_conditionalJump2.registerBool = replaceRegisterMultiple(op_conditionalJump2.registerBool, translationTable);
}
else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP)
{
// no effect on registers
}
@ -509,7 +576,7 @@ void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegiste
}
else if (operation == PPCREC_IML_MACRO_B_TO_REG)
{
op_macro.param = replaceRegisterMultiple(op_macro.param, gprRegisterSearched, gprRegisterReplaced);
op_macro.param = replaceRegisterMultiple(op_macro.param, translationTable);
}
else
{
@ -518,33 +585,33 @@ void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegiste
}
else if (type == PPCREC_IML_TYPE_LOAD)
{
op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced);
op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, translationTable);
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
{
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced);
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable);
}
}
else if (type == PPCREC_IML_TYPE_LOAD_INDEXED)
{
op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced);
op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, translationTable);
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced);
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable);
if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER)
op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced);
op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable);
}
else if (type == PPCREC_IML_TYPE_STORE)
{
op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced);
op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, translationTable);
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced);
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable);
}
else if (type == PPCREC_IML_TYPE_STORE_INDEXED)
{
op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced);
op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, translationTable);
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced);
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable);
if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER)
op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced);
op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable);
}
else if (type == PPCREC_IML_TYPE_CR)
{
@ -562,52 +629,52 @@ void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegiste
{
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
{
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced);
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable);
}
if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER)
{
op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced);
op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, translationTable);
}
}
else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED)
{
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
{
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced);
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable);
}
if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER)
{
op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced);
op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable);
}
if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER)
{
op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced);
op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, translationTable);
}
}
else if (type == PPCREC_IML_TYPE_FPR_STORE)
{
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
{
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced);
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable);
}
if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER)
{
op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced);
op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, translationTable);
}
}
else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
{
if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER)
{
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced);
op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable);
}
if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER)
{
op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced);
op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable);
}
if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER)
{
op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced);
op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, translationTable);
}
}
else if (type == PPCREC_IML_TYPE_FPR_R_R)
@ -654,7 +721,7 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist
{
// not affected
}
else if (type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32 || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
else if (type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32 || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP || type == PPCREC_IML_TYPE_JUMP)
{
// not affected
}
@ -760,15 +827,15 @@ void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterRe
{
// not affected
}
else if (type == PPCREC_IML_TYPE_R_R_S32)
else if (type == PPCREC_IML_TYPE_R_R_S32 || type == PPCREC_IML_TYPE_R_R_S32_CARRY)
{
// not affected
}
else if (type == PPCREC_IML_TYPE_R_R_R)
else if (type == PPCREC_IML_TYPE_R_R_R || type == PPCREC_IML_TYPE_R_R_R_CARRY)
{
// not affected
}
else if (type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32 || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
else if (type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32 || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP || type == PPCREC_IML_TYPE_JUMP)
{
// not affected
}

View File

@ -19,14 +19,13 @@ enum
PPCREC_IML_OP_XOR, // '^' operator
PPCREC_IML_OP_LEFT_ROTATE, // left rotate operator
PPCREC_IML_OP_LEFT_SHIFT, // shift left operator
PPCREC_IML_OP_RIGHT_SHIFT, // right shift operator (unsigned)
PPCREC_IML_OP_RIGHT_SHIFT_U, // right shift operator (unsigned)
PPCREC_IML_OP_RIGHT_SHIFT_S, // right shift operator (signed)
// ppc
PPCREC_IML_OP_RLWIMI, // RLWIMI instruction (rotate, merge based on mask)
PPCREC_IML_OP_SRAW, // SRAWI/SRAW instruction (algebraic shift right, sets ca flag)
PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits)
PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits)
PPCREC_IML_OP_CNTLZW,
PPCREC_IML_OP_SUBFC, // SUBFC and SUBFIC (subtract from and set carry)
PPCREC_IML_OP_DCBZ, // clear 32 bytes aligned to 0x20
PPCREC_IML_OP_MFCR, // copy cr to gpr
PPCREC_IML_OP_MTCRF, // copy gpr to cr (with mask)
@ -83,7 +82,7 @@ enum
// R_R_S32 only
// R_R_R + R_R_S32
PPCREC_IML_OP_ADD,
PPCREC_IML_OP_ADD, // also R_R_R_CARRY
PPCREC_IML_OP_SUB,
// R_R only
@ -92,14 +91,10 @@ enum
PPCREC_IML_OP_ASSIGN_S16_TO_S32,
PPCREC_IML_OP_ASSIGN_S8_TO_S32,
// deprecated
PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, // complex operation, result = operand + ~operand2 + carry bit, updates carry bit
PPCREC_IML_OP_ADD_CARRY, // complex operation, result = operand + carry bit, updates carry bit
PPCREC_IML_OP_ADD_CARRY_ME, // complex operation, result = operand + carry bit + (-1), updates carry bit
PPCREC_IML_OP_ADD_UPDATE_CARRY, // '+' operator but also updates carry flag
PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, // '+' operator and also adds carry, updates carry flag
// R_R_R_carry
PPCREC_IML_OP_ADD_WITH_CARRY, // similar to ADD but also adds carry bit (0 or 1)
};
#define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN)
enum
@ -116,7 +111,7 @@ enum
PPCREC_IML_MACRO_DEBUGBREAK, // throws a debugbreak
};
enum
enum // deprecated condition codes
{
PPCREC_JUMP_CONDITION_NONE,
PPCREC_JUMP_CONDITION_E, // equal / zero
@ -158,7 +153,9 @@ enum
PPCREC_IML_TYPE_NO_OP, // no-op instruction
PPCREC_IML_TYPE_R_R, // r* = (op) *r (can also be r* (op) *r)
PPCREC_IML_TYPE_R_R_R, // r* = r* (op) r*
PPCREC_IML_TYPE_R_R_R_CARRY, // r* = r* (op) r* (reads and/or updates carry)
PPCREC_IML_TYPE_R_R_S32, // r* = r* (op) s32*
PPCREC_IML_TYPE_R_R_S32_CARRY, // r* = r* (op) s32* (reads and/or updates carry)
PPCREC_IML_TYPE_LOAD, // r* = [r*+s32*]
PPCREC_IML_TYPE_LOAD_INDEXED, // r* = [r*+r*]
PPCREC_IML_TYPE_STORE, // [r*+s32*] = r*
@ -174,6 +171,7 @@ enum
// new style of handling conditions and branches:
PPCREC_IML_TYPE_COMPARE, // r* = r* CMP[cond] r*
PPCREC_IML_TYPE_COMPARE_S32, // r* = r* CMP[cond] imm
PPCREC_IML_TYPE_JUMP, // replaces CJUMP. Jump always, no condition
PPCREC_IML_TYPE_CONDITIONAL_JUMP, // replaces CJUMP. Jump condition is based on boolean register
// conditional
@ -199,6 +197,7 @@ enum
PPCREC_NAME_SPR0 = 3000,
PPCREC_NAME_FPR0 = 4000,
PPCREC_NAME_TEMPORARY_FPR0 = 5000, // 0 to 7
PPCREC_NAME_XER_CA = 6000, // carry bit
};
// special cases for LOAD/STORE
@ -260,8 +259,8 @@ struct IMLUsedRegisters
sint16 readNamedReg2;
sint16 readNamedReg3;
sint16 writtenNamedReg1;
sint16 writtenNamedReg2;
};
sint16 gpr[4]; // 3 read + 1 write
};
// FPR
union
@ -275,10 +274,69 @@ struct IMLUsedRegisters
sint16 readFPR4;
sint16 writtenFPR1;
};
sint16 fpr[4];
//sint16 fpr[4];
};
bool IsRegWritten(sint16 imlReg) const // GPRs
{
cemu_assert_debug(imlReg >= 0);
return writtenNamedReg1 == imlReg || writtenNamedReg2 == imlReg;
}
template<typename Fn>
void ForEachWrittenGPR(Fn F)
{
if (writtenNamedReg1 >= 0)
F(writtenNamedReg1);
if (writtenNamedReg2 >= 0)
F(writtenNamedReg2);
}
template<typename Fn>
void ForEachReadGPR(Fn F)
{
if (readNamedReg1 >= 0)
F(readNamedReg1);
if (readNamedReg2 >= 0)
F(readNamedReg2);
if (readNamedReg3 >= 0)
F(readNamedReg3);
}
template<typename Fn>
void ForEachAccessedGPR(Fn F)
{
if (readNamedReg1 >= 0)
F(readNamedReg1, false);
if (readNamedReg2 >= 0)
F(readNamedReg2, false);
if (readNamedReg3 >= 0)
F(readNamedReg3, false);
if (writtenNamedReg1 >= 0)
F(writtenNamedReg1, true);
if (writtenNamedReg2 >= 0)
F(writtenNamedReg2, true);
}
bool HasFPRReg(sint16 imlReg) const
{
cemu_assert_debug(imlReg >= 0);
if (readFPR1 == imlReg)
return true;
if (readFPR2 == imlReg)
return true;
if (readFPR3 == imlReg)
return true;
if (readFPR4 == imlReg)
return true;
if (writtenFPR1 == imlReg)
return true;
return false;
}
};
using IMLReg = uint8;
struct IMLInstruction
{
uint8 type;
@ -307,12 +365,25 @@ struct IMLInstruction
}op_r_r_r;
struct
{
// R = A (op) immS32 [update cr* in mode *]
IMLReg regR;
IMLReg regA;
IMLReg regB;
IMLReg regCarry;
}op_r_r_r_carry;
struct
{
uint8 registerResult;
uint8 registerA;
sint32 immS32;
}op_r_r_s32;
struct
{
IMLReg regR;
IMLReg regA;
sint32 immS32;
IMLReg regCarry;
}op_r_r_s32_carry;
struct
{
// R/F = NAME or NAME = R/F
uint8 registerIndex;
@ -426,6 +497,7 @@ struct IMLInstruction
type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_MFTB ||
type == PPCREC_IML_TYPE_CJUMP ||
type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ||
type == PPCREC_IML_TYPE_JUMP ||
type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
return true;
return false;
@ -496,6 +568,18 @@ struct IMLInstruction
this->op_r_r_r.registerB = registerB;
}
void make_r_r_r_carry(uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 registerCarry)
{
this->type = PPCREC_IML_TYPE_R_R_R_CARRY;
this->operation = operation;
this->crRegister = 0xFF;
this->crMode = 0xFF;
this->op_r_r_r_carry.regR = registerResult;
this->op_r_r_r_carry.regA = registerA;
this->op_r_r_r_carry.regB = registerB;
this->op_r_r_r_carry.regCarry = registerCarry;
}
void make_r_r_s32(uint32 operation, uint8 registerResult, uint8 registerA, sint32 immS32, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0)
{
// operation with two register operands and one signed immediate (e.g. "t0 = t1 + 1234")
@ -508,6 +592,18 @@ struct IMLInstruction
this->op_r_r_s32.immS32 = immS32;
}
void make_r_r_s32_carry(uint32 operation, uint8 registerResult, uint8 registerA, sint32 immS32, uint8 registerCarry)
{
this->type = PPCREC_IML_TYPE_R_R_S32_CARRY;
this->operation = operation;
this->crRegister = 0xFF;
this->crMode = 0xFF;
this->op_r_r_s32_carry.regR = registerResult;
this->op_r_r_s32_carry.regA = registerA;
this->op_r_r_s32_carry.immS32 = immS32;
this->op_r_r_s32_carry.regCarry = registerCarry;
}
void make_compare(uint8 registerA, uint8 registerB, uint8 registerResult, IMLCondition cond)
{
this->type = PPCREC_IML_TYPE_COMPARE;
@ -542,6 +638,14 @@ struct IMLInstruction
this->op_conditionalJump2.mustBeTrue = mustBeTrue;
}
void make_jump_new()
{
this->type = PPCREC_IML_TYPE_JUMP;
this->operation = -999;
this->crRegister = PPC_REC_INVALID_REGISTER;
this->crMode = 0;
}
// load from memory
void make_r_memory(uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian)
{
@ -572,7 +676,8 @@ struct IMLInstruction
void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const;
void ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]);
//void ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]);
void RewriteGPR(const std::unordered_map<IMLReg, IMLReg>& translationTable);
void ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegisterReplaced[4]);
void ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterReplaced);
};

View File

@ -179,15 +179,7 @@ ppcRecRegisterMapping_t* PPCRecompiler_findUnloadableRegister(ppcRecManageRegist
if( (unloadLockedMask&(1<<i)) != 0 )
continue;
uint32 virtualReg = rCtx->currentMapping[i].virtualReg;
bool isReserved = false;
for (sint32 f = 0; f < 4; f++)
{
if (virtualReg == (sint32)instructionUsedRegisters->fpr[f])
{
isReserved = true;
break;
}
}
bool isReserved = instructionUsedRegisters->HasFPRReg(virtualReg);
if (isReserved)
continue;
if (rCtx->currentMapping[i].lastUseIndex < unloadIndexLastUse)
@ -373,7 +365,7 @@ bool PPCRecompiler_trackRedundantNameLoadInstruction(ppcImlGenContext_t* ppcImlG
imlInstruction->CheckRegisterUsage(&registersUsed);
if( registersUsed.readNamedReg1 == registerIndex || registersUsed.readNamedReg2 == registerIndex || registersUsed.readNamedReg3 == registerIndex )
return false;
if( registersUsed.writtenNamedReg1 == registerIndex )
if (registersUsed.IsRegWritten(registerIndex))
return true;
}
// todo: Scan next segment(s)
@ -411,7 +403,7 @@ bool PPCRecompiler_trackRedundantNameStoreInstruction(ppcImlGenContext_t* ppcIml
IMLInstruction* imlInstruction = imlSegment->imlList.data() + i;
IMLUsedRegisters registersUsed;
imlInstruction->CheckRegisterUsage(&registersUsed);
if( registersUsed.writtenNamedReg1 == registerIndex )
if( registersUsed.IsRegWritten(registerIndex) )
{
if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_NAME )
return true;
@ -620,84 +612,84 @@ void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext
}
}
bool PPCRecompiler_checkIfGPRIsModifiedInRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, sint32 endIndex, sint32 vreg)
{
IMLUsedRegisters registersUsed;
for (sint32 i = startIndex; i <= endIndex; i++)
{
IMLInstruction* imlInstruction = imlSegment->imlList.data() + i;
imlInstruction->CheckRegisterUsage(&registersUsed);
if (registersUsed.writtenNamedReg1 == vreg)
return true;
}
return false;
}
//bool PPCRecompiler_checkIfGPRIsModifiedInRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, sint32 endIndex, sint32 vreg)
//{
// IMLUsedRegisters registersUsed;
// for (sint32 i = startIndex; i <= endIndex; i++)
// {
// IMLInstruction* imlInstruction = imlSegment->imlList.data() + i;
// imlInstruction->CheckRegisterUsage(&registersUsed);
// if (registersUsed.IsRegWritten(vreg))
// return true;
// }
// return false;
//}
sint32 PPCRecompiler_scanBackwardsForReusableRegister(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* startSegment, sint32 startIndex, sint32 name)
{
// current segment
sint32 currentIndex = startIndex;
IMLSegment* currentSegment = startSegment;
sint32 segmentIterateCount = 0;
sint32 foundRegister = -1;
while (true)
{
// stop scanning if segment is enterable
if (currentSegment->isEnterable)
return -1;
while (currentIndex >= 0)
{
if (currentSegment->imlList[currentIndex].type == PPCREC_IML_TYPE_NAME_R && currentSegment->imlList[currentIndex].op_r_name.name == name)
{
foundRegister = currentSegment->imlList[currentIndex].op_r_name.registerIndex;
break;
}
// previous instruction
currentIndex--;
}
if (foundRegister >= 0)
break;
// continue at previous segment (if there is only one)
if (segmentIterateCount >= 1)
return -1;
if (currentSegment->list_prevSegments.size() != 1)
return -1;
currentSegment = currentSegment->list_prevSegments[0];
currentIndex = currentSegment->imlList.size() - 1;
segmentIterateCount++;
}
// scan again to make sure the register is not modified inbetween
currentIndex = startIndex;
currentSegment = startSegment;
segmentIterateCount = 0;
IMLUsedRegisters registersUsed;
while (true)
{
while (currentIndex >= 0)
{
// check if register is modified
currentSegment->imlList[currentIndex].CheckRegisterUsage(&registersUsed);
if (registersUsed.writtenNamedReg1 == foundRegister)
return -1;
// check if end of scan reached
if (currentSegment->imlList[currentIndex].type == PPCREC_IML_TYPE_NAME_R && currentSegment->imlList[currentIndex].op_r_name.name == name)
{
return foundRegister;
}
// previous instruction
currentIndex--;
}
// continue at previous segment (if there is only one)
if (segmentIterateCount >= 1)
return -1;
if (currentSegment->list_prevSegments.size() != 1)
return -1;
currentSegment = currentSegment->list_prevSegments[0];
currentIndex = currentSegment->imlList.size() - 1;
segmentIterateCount++;
}
return -1;
}
//sint32 PPCRecompiler_scanBackwardsForReusableRegister(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* startSegment, sint32 startIndex, sint32 name)
//{
// // current segment
// sint32 currentIndex = startIndex;
// IMLSegment* currentSegment = startSegment;
// sint32 segmentIterateCount = 0;
// sint32 foundRegister = -1;
// while (true)
// {
// // stop scanning if segment is enterable
// if (currentSegment->isEnterable)
// return -1;
// while (currentIndex >= 0)
// {
// if (currentSegment->imlList[currentIndex].type == PPCREC_IML_TYPE_NAME_R && currentSegment->imlList[currentIndex].op_r_name.name == name)
// {
// foundRegister = currentSegment->imlList[currentIndex].op_r_name.registerIndex;
// break;
// }
// // previous instruction
// currentIndex--;
// }
// if (foundRegister >= 0)
// break;
// // continue at previous segment (if there is only one)
// if (segmentIterateCount >= 1)
// return -1;
// if (currentSegment->list_prevSegments.size() != 1)
// return -1;
// currentSegment = currentSegment->list_prevSegments[0];
// currentIndex = currentSegment->imlList.size() - 1;
// segmentIterateCount++;
// }
// // scan again to make sure the register is not modified inbetween
// currentIndex = startIndex;
// currentSegment = startSegment;
// segmentIterateCount = 0;
// IMLUsedRegisters registersUsed;
// while (true)
// {
// while (currentIndex >= 0)
// {
// // check if register is modified
// currentSegment->imlList[currentIndex].CheckRegisterUsage(&registersUsed);
// if (registersUsed.IsRegWritten(foundRegister))
// return -1;
// // check if end of scan reached
// if (currentSegment->imlList[currentIndex].type == PPCREC_IML_TYPE_NAME_R && currentSegment->imlList[currentIndex].op_r_name.name == name)
// {
// return foundRegister;
// }
// // previous instruction
// currentIndex--;
// }
// // continue at previous segment (if there is only one)
// if (segmentIterateCount >= 1)
// return -1;
// if (currentSegment->list_prevSegments.size() != 1)
// return -1;
// currentSegment = currentSegment->list_prevSegments[0];
// currentIndex = currentSegment->imlList.size() - 1;
// segmentIterateCount++;
// }
// return -1;
//}
void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, sint32 fprIndex)
{
@ -830,7 +822,7 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp
{
break;
}
if (registersUsed.writtenNamedReg1 == gprIndex)
if (registersUsed.IsRegWritten(gprIndex))
return; // GPR overwritten, we don't need to byte swap anymore
}
if (foundMatch)
@ -933,6 +925,8 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext)
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S8_PS0;
else if (formatType == 7)
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0;
if (instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0)
instIt.op_storeLoad.registerGQR = PPC_REC_INVALID_REGISTER;
}
else if (instIt.op_storeLoad.mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1)
{
@ -946,6 +940,8 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext)
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1;
else if (formatType == 7)
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1;
if (instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1)
instIt.op_storeLoad.registerGQR = PPC_REC_INVALID_REGISTER;
}
}
else if (instIt.type == PPCREC_IML_TYPE_FPR_STORE || instIt.type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
@ -978,6 +974,8 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext)
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S8_PS0;
else if (formatType == 7)
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0;
if (instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0)
instIt.op_storeLoad.registerGQR = PPC_REC_INVALID_REGISTER;
}
else if (instIt.op_storeLoad.mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1)
{
@ -991,127 +989,129 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext)
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1;
else if (formatType == 7)
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1;
if (instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1)
instIt.op_storeLoad.registerGQR = PPC_REC_INVALID_REGISTER;
}
}
}
}
}
/*
* Returns true if registerWrite overwrites any of the registers read by registerRead
*/
bool PPCRecompilerAnalyzer_checkForGPROverwrite(IMLUsedRegisters* registerRead, IMLUsedRegisters* registerWrite)
{
if (registerWrite->writtenNamedReg1 < 0)
return false;
if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg1)
return true;
if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg2)
return true;
if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg3)
return true;
return false;
}
///*
// * Returns true if registerWrite overwrites any of the registers read by registerRead
// */
//bool PPCRecompilerAnalyzer_checkForGPROverwrite(IMLUsedRegisters* registerRead, IMLUsedRegisters* registerWrite)
//{
// if (registerWrite->writtenNamedReg1 < 0)
// return false;
//
// if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg1)
// return true;
// if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg2)
// return true;
// if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg3)
// return true;
// return false;
//}
void _reorderConditionModifyInstructions(IMLSegment* imlSegment)
{
IMLInstruction* lastInstruction = imlSegment->GetLastInstruction();
// last instruction is a conditional branch?
if (lastInstruction == nullptr || lastInstruction->type != PPCREC_IML_TYPE_CJUMP)
return;
if (lastInstruction->op_conditionalJump.crRegisterIndex >= 8)
return;
// get CR bitmask of bit required for conditional jump
PPCRecCRTracking_t crTracking;
IMLAnalyzer_GetCRTracking(lastInstruction, &crTracking);
uint32 requiredCRBits = crTracking.readCRBits;
// scan backwards until we find the instruction that sets the CR
sint32 crSetterInstructionIndex = -1;
sint32 unsafeInstructionIndex = -1;
for (sint32 i = imlSegment->imlList.size() - 2; i >= 0; i--)
{
IMLInstruction* imlInstruction = imlSegment->imlList.data() + i;
IMLAnalyzer_GetCRTracking(imlInstruction, &crTracking);
if (crTracking.readCRBits != 0)
return; // dont handle complex cases for now
if (crTracking.writtenCRBits != 0)
{
if ((crTracking.writtenCRBits&requiredCRBits) != 0)
{
crSetterInstructionIndex = i;
break;
}
else
{
return; // other CR bits overwritten (dont handle complex cases)
}
}
// is safe? (no risk of overwriting x64 eflags)
if ((imlInstruction->type == PPCREC_IML_TYPE_NAME_R || imlInstruction->type == PPCREC_IML_TYPE_R_NAME || imlInstruction->type == PPCREC_IML_TYPE_NO_OP) ||
(imlInstruction->type == PPCREC_IML_TYPE_FPR_NAME_R || imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME) ||
(imlInstruction->type == PPCREC_IML_TYPE_R_S32 && (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)) ||
(imlInstruction->type == PPCREC_IML_TYPE_R_R && (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)) )
continue;
// not safe
if (unsafeInstructionIndex == -1)
unsafeInstructionIndex = i;
}
if (crSetterInstructionIndex < 0)
return;
if (unsafeInstructionIndex < 0)
return; // no danger of overwriting eflags, don't reorder
// check if we can move the CR setter instruction to after unsafeInstructionIndex
PPCRecCRTracking_t crTrackingSetter = crTracking;
IMLUsedRegisters regTrackingCRSetter;
imlSegment->imlList[crSetterInstructionIndex].CheckRegisterUsage(&regTrackingCRSetter);
if (regTrackingCRSetter.writtenFPR1 >= 0 || regTrackingCRSetter.readFPR1 >= 0 || regTrackingCRSetter.readFPR2 >= 0 || regTrackingCRSetter.readFPR3 >= 0 || regTrackingCRSetter.readFPR4 >= 0)
return; // we don't handle FPR dependency yet so just ignore FPR instructions
IMLUsedRegisters registerTracking;
if (regTrackingCRSetter.writtenNamedReg1 >= 0)
{
// CR setter does write GPR
for (sint32 i = crSetterInstructionIndex + 1; i <= unsafeInstructionIndex; i++)
{
imlSegment->imlList[i].CheckRegisterUsage(&registerTracking);
// reads register written by CR setter?
if (PPCRecompilerAnalyzer_checkForGPROverwrite(&registerTracking, &regTrackingCRSetter))
{
return; // cant move CR setter because of dependency
}
// writes register read by CR setter?
if (PPCRecompilerAnalyzer_checkForGPROverwrite(&regTrackingCRSetter, &registerTracking))
{
return; // cant move CR setter because of dependency
}
// overwrites register written by CR setter?
if (regTrackingCRSetter.writtenNamedReg1 == registerTracking.writtenNamedReg1)
return;
}
}
else
{
// CR setter does not write GPR
for (sint32 i = crSetterInstructionIndex + 1; i <= unsafeInstructionIndex; i++)
{
imlSegment->imlList[i].CheckRegisterUsage(&registerTracking);
// writes register read by CR setter?
if (PPCRecompilerAnalyzer_checkForGPROverwrite(&regTrackingCRSetter, &registerTracking))
{
return; // cant move CR setter because of dependency
}
}
}
// move CR setter instruction
#ifdef CEMU_DEBUG_ASSERT
if ((unsafeInstructionIndex + 1) <= crSetterInstructionIndex)
assert_dbg();
#endif
IMLInstruction* newCRSetterInstruction = PPCRecompiler_insertInstruction(imlSegment, unsafeInstructionIndex+1);
memcpy(newCRSetterInstruction, imlSegment->imlList.data() + crSetterInstructionIndex, sizeof(IMLInstruction));
imlSegment->imlList[crSetterInstructionIndex].make_no_op();
// IMLInstruction* lastInstruction = imlSegment->GetLastInstruction();
// // last instruction is a conditional branch?
// if (lastInstruction == nullptr || lastInstruction->type != PPCREC_IML_TYPE_CJUMP)
// return;
// if (lastInstruction->op_conditionalJump.crRegisterIndex >= 8)
// return;
// // get CR bitmask of bit required for conditional jump
// PPCRecCRTracking_t crTracking;
// IMLAnalyzer_GetCRTracking(lastInstruction, &crTracking);
// uint32 requiredCRBits = crTracking.readCRBits;
//
// // scan backwards until we find the instruction that sets the CR
// sint32 crSetterInstructionIndex = -1;
// sint32 unsafeInstructionIndex = -1;
// for (sint32 i = imlSegment->imlList.size() - 2; i >= 0; i--)
// {
// IMLInstruction* imlInstruction = imlSegment->imlList.data() + i;
// IMLAnalyzer_GetCRTracking(imlInstruction, &crTracking);
// if (crTracking.readCRBits != 0)
// return; // dont handle complex cases for now
// if (crTracking.writtenCRBits != 0)
// {
// if ((crTracking.writtenCRBits&requiredCRBits) != 0)
// {
// crSetterInstructionIndex = i;
// break;
// }
// else
// {
// return; // other CR bits overwritten (dont handle complex cases)
// }
// }
// // is safe? (no risk of overwriting x64 eflags)
// if ((imlInstruction->type == PPCREC_IML_TYPE_NAME_R || imlInstruction->type == PPCREC_IML_TYPE_R_NAME || imlInstruction->type == PPCREC_IML_TYPE_NO_OP) ||
// (imlInstruction->type == PPCREC_IML_TYPE_FPR_NAME_R || imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME) ||
// (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)) ||
// (imlInstruction->type == PPCREC_IML_TYPE_R_R && (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)) )
// continue;
// // not safe
// if (unsafeInstructionIndex == -1)
// unsafeInstructionIndex = i;
// }
// if (crSetterInstructionIndex < 0)
// return;
// if (unsafeInstructionIndex < 0)
// return; // no danger of overwriting eflags, don't reorder
// // check if we can move the CR setter instruction to after unsafeInstructionIndex
// PPCRecCRTracking_t crTrackingSetter = crTracking;
// IMLUsedRegisters regTrackingCRSetter;
// imlSegment->imlList[crSetterInstructionIndex].CheckRegisterUsage(&regTrackingCRSetter);
// if (regTrackingCRSetter.writtenFPR1 >= 0 || regTrackingCRSetter.readFPR1 >= 0 || regTrackingCRSetter.readFPR2 >= 0 || regTrackingCRSetter.readFPR3 >= 0 || regTrackingCRSetter.readFPR4 >= 0)
// return; // we don't handle FPR dependency yet so just ignore FPR instructions
// IMLUsedRegisters registerTracking;
// if (regTrackingCRSetter.writtenNamedReg1 >= 0)
// {
// // CR setter does write GPR
// for (sint32 i = crSetterInstructionIndex + 1; i <= unsafeInstructionIndex; i++)
// {
// imlSegment->imlList[i].CheckRegisterUsage(&registerTracking);
// // reads register written by CR setter?
// if (PPCRecompilerAnalyzer_checkForGPROverwrite(&registerTracking, &regTrackingCRSetter))
// {
// return; // cant move CR setter because of dependency
// }
// // writes register read by CR setter?
// if (PPCRecompilerAnalyzer_checkForGPROverwrite(&regTrackingCRSetter, &registerTracking))
// {
// return; // cant move CR setter because of dependency
// }
// // overwrites register written by CR setter?
// if (regTrackingCRSetter.writtenNamedReg1 == registerTracking.writtenNamedReg1)
// return;
// }
// }
// else
// {
// // CR setter does not write GPR
// for (sint32 i = crSetterInstructionIndex + 1; i <= unsafeInstructionIndex; i++)
// {
// imlSegment->imlList[i].CheckRegisterUsage(&registerTracking);
// // writes register read by CR setter?
// if (PPCRecompilerAnalyzer_checkForGPROverwrite(&regTrackingCRSetter, &registerTracking))
// {
// return; // cant move CR setter because of dependency
// }
// }
// }
//
// // move CR setter instruction
//#ifdef CEMU_DEBUG_ASSERT
// if ((unsafeInstructionIndex + 1) <= crSetterInstructionIndex)
// assert_dbg();
//#endif
// IMLInstruction* newCRSetterInstruction = PPCRecompiler_insertInstruction(imlSegment, unsafeInstructionIndex+1);
// memcpy(newCRSetterInstruction, imlSegment->imlList.data() + crSetterInstructionIndex, sizeof(IMLInstruction));
// imlSegment->imlList[crSetterInstructionIndex].make_no_op();
}
/*

View File

@ -764,12 +764,11 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext,
sint16 virtualReg2PhysReg[IML_RA_VIRT_REG_COUNT_MAX];
for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++)
virtualReg2PhysReg[i] = -1;
std::unordered_map<IMLReg, IMLReg> virt2PhysRegMap; // key = virtual register, value = physical register
raLiveRangeInfo_t liveInfo;
liveInfo.liveRangesCount = 0;
sint32 index = 0;
sint32 suffixInstructionCount = imlSegment->HasSuffixInstruction() ? 1 : 0;
//sint32 suffixInstructionIndex = imlSegment->imlList.size() - suffixInstructionCount; // if no suffix instruction exists this matches instruction count
// load register ranges that are supplied from previous segments
raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
while(subrangeItr)
@ -789,6 +788,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext,
assert_dbg();
#endif
virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister;
virt2PhysRegMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister);
}
// next
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next;
@ -806,6 +806,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext,
if (virtualReg2PhysReg[liverange->range->virtualRegister] == -1)
assert_dbg();
virtualReg2PhysReg[liverange->range->virtualRegister] = -1;
virt2PhysRegMap.erase(liverange->range->virtualRegister);
// store GPR if required
// special care has to be taken to execute any stores before the suffix instruction since trailing instructions may not get executed
if (liverange->hasStore)
@ -844,37 +845,13 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext,
// update translation table
cemu_assert_debug(virtualReg2PhysReg[subrangeItr->range->virtualRegister] == -1);
virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister;
virt2PhysRegMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister);
}
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next;
}
// rewrite registers
// todo - this can be simplified by using a map or lookup table rather than a check + 4 slot translation table
if (index < imlSegment->imlList.size())
{
IMLUsedRegisters gprTracking;
imlSegment->imlList[index].CheckRegisterUsage(&gprTracking);
sint32 inputGpr[4];
inputGpr[0] = gprTracking.gpr[0];
inputGpr[1] = gprTracking.gpr[1];
inputGpr[2] = gprTracking.gpr[2];
inputGpr[3] = gprTracking.gpr[3];
sint32 replaceGpr[4];
for (sint32 f = 0; f < 4; f++)
{
sint32 virtualRegister = gprTracking.gpr[f];
if (virtualRegister < 0)
{
replaceGpr[f] = -1;
continue;
}
if (virtualRegister >= IML_RA_VIRT_REG_COUNT_MAX)
assert_dbg();
replaceGpr[f] = virtualReg2PhysReg[virtualRegister];
cemu_assert_debug(replaceGpr[f] >= 0);
}
imlSegment->imlList[index].ReplaceGPR(inputGpr, replaceGpr);
}
imlSegment->imlList[index].RewriteGPR(virt2PhysRegMap);
// next iml instruction
index++;
}
@ -889,6 +866,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext,
// update translation table
cemu_assert_debug(virtualReg2PhysReg[liverange->range->virtualRegister] != -1);
virtualReg2PhysReg[liverange->range->virtualRegister] = -1;
virt2PhysRegMap.erase(liverange->range->virtualRegister);
// store GPR
if (liverange->hasStore)
{
@ -929,6 +907,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext,
// update translation table
cemu_assert_debug(virtualReg2PhysReg[subrangeItr->range->virtualRegister] == -1);
virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister;
virt2PhysRegMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister);
}
// next
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next;
@ -1039,21 +1018,12 @@ void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext,
IMLUsedRegisters gprTracking;
while (index < imlSegment->imlList.size())
{
// end loop at suffix instruction
//if (imlSegment->imlList[index].IsSuffixInstruction())
// break;
// get accessed GPRs
imlSegment->imlList[index].CheckRegisterUsage(&gprTracking);
for (sint32 t = 0; t < 4; t++)
{
sint32 virtualRegister = gprTracking.gpr[t];
if (virtualRegister < 0)
continue;
cemu_assert_debug(virtualRegister < IML_RA_VIRT_REG_COUNT_MAX);
imlSegment->raDistances.reg[virtualRegister].usageStart = std::min<sint32>(imlSegment->raDistances.reg[virtualRegister].usageStart, index); // index before/at instruction
imlSegment->raDistances.reg[virtualRegister].usageEnd = std::max<sint32>(imlSegment->raDistances.reg[virtualRegister].usageEnd, index + 1); // index after instruction
}
// next instruction
gprTracking.ForEachAccessedGPR([&](IMLReg gprId, bool isWritten) {
cemu_assert_debug(gprId < IML_RA_VIRT_REG_COUNT_MAX);
imlSegment->raDistances.reg[gprId].usageStart = std::min<sint32>(imlSegment->raDistances.reg[gprId].usageStart, index); // index before/at instruction
imlSegment->raDistances.reg[gprId].usageEnd = std::max<sint32>(imlSegment->raDistances.reg[gprId].usageEnd, index + 1); // index after instruction
});
index++;
}
}
@ -1141,29 +1111,17 @@ void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext,
IMLUsedRegisters gprTracking;
while (index < imlSegment->imlList.size())
{
// we parse suffix instructions too for any potential input registers (writes not allowed), but note that any spills/stores need to happen before the suffix instruction
//// end loop at suffix instruction
//if (imlSegment->imlList[index].IsSuffixInstruction())
// break;
// get accessed GPRs
imlSegment->imlList[index].CheckRegisterUsage(&gprTracking);
// handle accessed GPR
for (sint32 t = 0; t < 4; t++)
{
sint32 virtualRegister = gprTracking.gpr[t];
if (virtualRegister < 0)
continue;
bool isWrite = (t == 3);
gprTracking.ForEachAccessedGPR([&](IMLReg gprId, bool isWritten) {
// add location
PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[virtualRegister], index, isWrite == false, isWrite);
PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[gprId], index, !isWritten, isWritten);
#ifdef CEMU_DEBUG_ASSERT
if ((sint32)index < vGPR2Subrange[virtualRegister]->start.index)
assert_dbg();
if ((sint32)index + 1 > vGPR2Subrange[virtualRegister]->end.index)
assert_dbg();
if ((sint32)index < vGPR2Subrange[gprId]->start.index)
assert_dbg();
if ((sint32)index + 1 > vGPR2Subrange[gprId]->end.index)
assert_dbg();
#endif
}
// next instruction
});
index++;
}
}

View File

@ -167,13 +167,11 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
return nullptr;
}
uint32 ppcRecLowerAddr = LaunchSettings::GetPPCRecLowerAddr();
uint32 ppcRecUpperAddr = LaunchSettings::GetPPCRecUpperAddr();
if (ppcRecLowerAddr != 0 && ppcRecUpperAddr != 0)
{
if (ppcRecFunc->ppcAddress < ppcRecLowerAddr || ppcRecFunc->ppcAddress > ppcRecUpperAddr)
{
delete ppcRecFunc;
@ -188,11 +186,16 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
return nullptr;
}
//if (ppcRecFunc->ppcAddress == 0x12345678)
//if (ppcRecFunc->ppcAddress == 0x11223344)
//{
// debug_printf("----------------------------------------\n");
// IMLDebug_Dump(&ppcImlGenContext);
// __debugbreak();
// //debug_printf("----------------------------------------\n");
// //IMLDebug_Dump(&ppcImlGenContext);
// //__debugbreak();
//}
//else
//{
// delete ppcRecFunc;
// return nullptr;
//}
// Large functions for testing (botw):

View File

@ -109,6 +109,14 @@ struct ppcImlGenContext_t
segmentList2.insert(segmentList2.begin() + index, 1, newSeg);
return newSeg;
}
std::span<IMLSegment*> InsertSegments(size_t index, size_t count)
{
segmentList2.insert(segmentList2.begin() + index, count, {});
for (size_t i = index; i < (index + count); i++)
segmentList2[i] = new IMLSegment();
return { segmentList2.data() + index, count};
}
};
typedef void ATTR_MS_ABI (*PPCREC_JUMP_ENTRY)();

View File

@ -179,6 +179,39 @@ void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContex
imlInstruction->op_storeLoad.flags2.signExtend = signExtend;
}
// create and fill two segments (branch taken and branch not taken) as a follow up to the current segment and then merge flow afterwards
template<typename F1n, typename F2n>
void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, F1n genSegmentBranchTaken, F2n genSegmentBranchNotTaken)
{
IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend();
std::span<IMLSegment*> segments = ppcImlGenContext.InsertSegments(ppcImlGenContext.GetSegmentIndex(currentWriteSegment) + 1, 3);
IMLSegment* segBranchNotTaken = segments[0];
IMLSegment* segBranchTaken = segments[1];
IMLSegment* segMerge = segments[2];
// link the segments
segMerge->SetLinkBranchTaken(currentWriteSegment->GetBranchTaken());
segMerge->SetLinkBranchNotTaken(currentWriteSegment->GetBranchNotTaken());
currentWriteSegment->SetLinkBranchTaken(segBranchTaken);
currentWriteSegment->SetLinkBranchNotTaken(segBranchNotTaken);
segBranchTaken->SetLinkBranchNotTaken(segMerge);
segBranchNotTaken->SetLinkBranchTaken(segMerge);
// generate code for branch taken segment
ppcImlGenContext.currentOutputSegment = segBranchTaken;
genSegmentBranchTaken(ppcImlGenContext);
cemu_assert_debug(ppcImlGenContext.currentOutputSegment == segBranchTaken);
// generate code for branch not taken segment
ppcImlGenContext.currentOutputSegment = segBranchNotTaken;
genSegmentBranchNotTaken(ppcImlGenContext);
cemu_assert_debug(ppcImlGenContext.currentOutputSegment == segBranchNotTaken);
ppcImlGenContext.emitInst().make_jump_new();
// make merge segment the new write segment
ppcImlGenContext.currentOutputSegment = segMerge;
basicBlockInfo.appendSegment = segMerge;
}
uint32 PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName)
{
if( mappedName == PPCREC_NAME_NONE )
@ -782,96 +815,24 @@ bool PPCRecompilerImlGen_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode
return true;
}
bool PPCRecompilerImlGen_ADDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
//hCPU->gpr[rD] = (int)hCPU->gpr[rA] + (int)hCPU->gpr[rB]; -> Update carry
uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, registerRB);
if ((opcode & PPC_OPC_RC))
PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD);
return true;
}
bool PPCRecompilerImlGen_ADDE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
// hCPU->gpr[rD] = hCPU->gpr[rA] + hCPU->gpr[rB] + ca;
uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA);
if ((opcode & PPC_OPC_RC))
PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD);
return true;
}
bool PPCRecompilerImlGen_ADDZE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
PPC_ASSERT(rB == 0);
//uint32 a = hCPU->gpr[rA];
//uint32 ca = hCPU->xer_ca;
//hCPU->gpr[rD] = a + ca;
uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
// move rA to rD
if( registerRA != registerRD )
{
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerRD, registerRA);
}
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ADD_CARRY, registerRD, registerRD);
if ((opcode & PPC_OPC_RC))
PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD);
return true;
}
bool PPCRecompilerImlGen_ADDME(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
PPC_ASSERT(rB == 0);
//uint32 a = hCPU->gpr[rA];
//uint32 ca = hCPU->xer_ca;
//hCPU->gpr[rD] = a + ca + -1;
uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
// move rA to rD
if( registerRA != registerRD )
{
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerRD, registerRA);
}
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ADD_CARRY_ME, registerRD, registerRD);
if ((opcode & PPC_OPC_RC))
PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD);
return true;
}
bool PPCRecompilerImlGen_ADDI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
sint32 rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
//hCPU->gpr[rD] = (rA ? (int)hCPU->gpr[rA] : 0) + (int)imm;
if( rA != 0 )
if (rA != 0)
{
uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false);
// check if rD is already loaded, else use new temporary register
uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD);
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, registerRD, registerRA, imm);
}
else
{
// rA not used, instruction is value assignment
// rD = imm
uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD);
ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerRD, imm);
}
// never updates any cr
@ -883,48 +844,93 @@ bool PPCRecompilerImlGen_ADDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
int rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_Shift16(opcode, rD, rA, imm);
if( rA != 0 )
if (rA != 0)
{
uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false);
// check if rD is already loaded, else use new temporary register
uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD);
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, registerRD, registerRA, (sint32)imm);
}
else
{
// rA not used, instruction turns into simple value assignment
// rD = imm
uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD);
ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerRD, (sint32)imm, PPC_REC_INVALID_REGISTER, 0);
}
// never updates any cr
return true;
}
bool PPCRecompilerImlGen_ADDIC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
bool PPCRecompilerImlGen_ADDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
sint32 rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
// rD = rA + imm;
uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, imm);
// never updates any cr
// r = a + b -> update carry
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
IMLReg regRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false);
IMLReg regRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false);
IMLReg regRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD);
IMLReg regCa = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD, regRD, regRA, regRB, regCa);
if (opcode & PPC_OPC_RC)
PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regRD);
return true;
}
bool PPCRecompilerImlGen_ADDIC_(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
bool PPCRecompilerImlGen_ADDIC_(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool updateCR0)
{
// this opcode is identical to ADDIC but additionally it updates CR0
sint32 rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
// rD = rA + imm;
uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, imm);
PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD);
IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
IMLReg regCa = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD, regD, regA, (sint32)imm, regCa);
if(updateCR0)
PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regD);
return true;
}
bool PPCRecompilerImlGen_ADDE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
// r = a + b + carry -> update carry
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
IMLReg regRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false);
IMLReg regRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false);
IMLReg regRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD);
IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regRD, regRA, regRB, regCa);
if (opcode & PPC_OPC_RC)
PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regRD);
return true;
}
bool PPCRecompilerImlGen_ADDZE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
// r = a + carry -> update carry
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
IMLReg regRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false);
IMLReg regRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD);
IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regRD, regRA, 0, regCa);
if (opcode & PPC_OPC_RC)
PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regRD);
return true;
}
bool PPCRecompilerImlGen_ADDME(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
// r = a + 0xFFFFFFFF + carry -> update carry
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
IMLReg regRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false);
IMLReg regRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD);
IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regRD, regRA, -1, regCa);
if (opcode & PPC_OPC_RC)
PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regRD);
return true;
}
@ -932,71 +938,80 @@ bool PPCRecompilerImlGen_SUBF(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
{
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
// hCPU->gpr[rD] = ~hCPU->gpr[rA] + hCPU->gpr[rB] + 1;
// rD = rB - rA
uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUB, registerRD, registerRB, registerRA);
// rD = ~rA + rB + 1
IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false);
IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false);
IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUB, regD, regB, regA);
if ((opcode & PPC_OPC_RC))
PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD);
PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regD);
return true;
}
bool PPCRecompilerImlGen_SUBFE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
// d = ~a + b + ca;
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
// hCPU->gpr[rD] = ~hCPU->gpr[rA] + hCPU->gpr[rB] + ca;
uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA);
IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD);
IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA);
ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regD, regTmp, regB, regCa);
if ((opcode & PPC_OPC_RC))
PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD);
PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regD);
return true;
}
bool PPCRecompilerImlGen_SUBFZE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
// d = ~a + ca;
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
if( rB != 0 )
debugBreakpoint();
uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRA);
IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false);
IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD);
IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA);
ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regD, regTmp, 0, regCa);
if ((opcode & PPC_OPC_RC))
PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD);
PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regD);
return true;
}
bool PPCRecompilerImlGen_SUBFC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
// d = ~a + b + 1;
sint32 rD, rA, rB;
PPC_OPC_TEMPL_XO(opcode, rD, rA, rB);
// hCPU->gpr[rD] = ~hCPU->gpr[rA] + hCPU->gpr[rB] + 1;
// rD = rB - rA
uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUBFC, registerRD, registerRA, registerRB);
if (opcode & PPC_OPC_RC)
PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD);
IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false);
IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false);
IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD);
IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
IMLReg regCa = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA);
ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCa, 1); // set input carry to simulate offset of 1
ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regD, regTmp, regB, regCa);
if ((opcode & PPC_OPC_RC))
PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regD);
return true;
}
bool PPCRecompilerImlGen_SUBFIC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
// d = ~a + imm + 1
sint32 rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
//uint32 a = hCPU->gpr[rA];
//hCPU->gpr[rD] = ~a + imm + 1;
// cr0 is never affected
uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD);
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_SUBFC, registerRD, registerRA, imm);
IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false);
IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD);
IMLReg regCa = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA);
ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD, regD, regTmp, (sint32)imm + 1, regCa);
// never affects CR0
return true;
}
@ -1102,7 +1117,7 @@ bool PPCRecompilerImlGen_RLWINM(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
else if( SH == (32-MB) && ME == 31 )
{
// SRWI
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT, registerRA, registerRS, MB);
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, registerRA, registerRS, MB);
}
else
{
@ -1152,14 +1167,45 @@ bool PPCRecompilerImlGen_RLWNM(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
bool PPCRecompilerImlGen_SRAW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
// unlike SRAWI, for SRAW the shift range is 0-63 (6 bits)
// but only shifts up to register bitwidth-1 are well defined in IML so this requires special handling for shifts >= 32
sint32 rS, rA, rB;
PPC_OPC_TEMPL_X(opcode, rS, rA, rB);
uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false);
uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false);
uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SRAW, registerRA, registerRS, registerRB);
if ((opcode & PPC_OPC_RC))
PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA);
uint32 registerCarry = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
uint32 registerTmpShiftAmount = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
uint32 registerTmpCondBool = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1);
uint32 registerTmp1 = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 2);
uint32 registerTmp2 = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 3);
// load masked shift factor into temporary register
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, registerTmpShiftAmount, registerRB, 0x3F);
ppcImlGenContext->emitInst().make_compare_s32(registerTmpShiftAmount, 32, registerTmpCondBool, IMLCondition::UNSIGNED_GT);
ppcImlGenContext->emitInst().make_conditional_jump_new(registerTmpCondBool, true);
PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock,
[&](ppcImlGenContext_t& genCtx)
{
/* branch taken */
genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_RIGHT_SHIFT_S, registerRA, registerRS, registerTmpShiftAmount);
genCtx.emitInst().make_compare_s32(registerRA, 0, registerCarry, IMLCondition::NEQ); // if the sign bit is still set it also means it was shifted out and we can set carry
},
[&](ppcImlGenContext_t& genCtx)
{
/* branch not taken, shift size below 32 */
genCtx.emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, registerTmp1, registerRS, 31); // signMask = input >> 31 (arithmetic shift)
genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerTmp2, 1); // shiftMask = ((1<<SH)-1)
genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_LEFT_SHIFT, registerTmp2, registerTmp2, registerTmpShiftAmount);
genCtx.emitInst().make_r_r_s32(PPCREC_IML_OP_SUB, registerTmp2, registerTmp2, 1);
genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_AND, registerTmp1, registerTmp1, registerTmp2); // signMask & shiftMask & input
genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_AND, registerTmp1, registerTmp1, registerRS);
genCtx.emitInst().make_compare_s32(registerTmp1, 0, registerCarry, IMLCondition::NEQ);
genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_RIGHT_SHIFT_S, registerRA, registerRS, registerTmpShiftAmount);
}
);
return true;
}
@ -1169,9 +1215,20 @@ bool PPCRecompilerImlGen_SRAWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
uint32 SH;
PPC_OPC_TEMPL_X(opcode, rS, rA, SH);
cemu_assert_debug(SH < 32);
uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false);
uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA);
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_SRAW, registerRA, registerRS, (sint32)SH);
if (SH == 0)
return false; // becomes a no-op but also sets ca bit to 0?
uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS, false);
uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA);
uint32 registerCarry = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA);
uint32 registerTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
// calculate CA first
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, registerTmp, registerRS, 31); // signMask = input >> 31 (arithmetic shift)
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, registerTmp, registerTmp, registerRS); // testValue = input & signMask & ((1<<SH)-1)
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, registerTmp, registerTmp, ((1 << SH) - 1));
ppcImlGenContext->emitInst().make_compare_s32(registerTmp, 0, registerCarry, IMLCondition::NEQ); // ca = (testValue != 0)
// do the actual shift
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, registerRA, registerRS, (sint32)SH);
if ((opcode & PPC_OPC_RC))
PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA);
return true;
@ -1999,7 +2056,7 @@ bool PPCRecompilerImlGen_STSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, tmpReg, dataRegister);
sint32 shiftAmount = (3 - b) * 8;
if (shiftAmount)
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT, tmpReg, tmpReg, shiftAmount);
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, tmpReg, tmpReg, shiftAmount);
ppcImlGenContext->emitInst().make_memory_r(tmpReg, memReg, memOffset + b, 8, false);
nb--;
if (nb == 0)
@ -2791,7 +2848,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
PPCRecompilerImlGen_MULLI(ppcImlGenContext, opcode);
break;
case 8: // SUBFIC
PPCRecompilerImlGen_SUBFIC(ppcImlGenContext, opcode);
if( !PPCRecompilerImlGen_SUBFIC(ppcImlGenContext, opcode) )
unsupportedInstructionFound = true;
break;
case 10: // CMPLI
PPCRecompilerImlGen_CMPLI(ppcImlGenContext, opcode);
@ -2800,11 +2858,11 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
PPCRecompilerImlGen_CMPI(ppcImlGenContext, opcode);
break;
case 12: // ADDIC
if (PPCRecompilerImlGen_ADDIC(ppcImlGenContext, opcode) == false)
if (PPCRecompilerImlGen_ADDIC_(ppcImlGenContext, opcode, false) == false)
unsupportedInstructionFound = true;
break;
case 13: // ADDIC.
if (PPCRecompilerImlGen_ADDIC_(ppcImlGenContext, opcode) == false)
if (PPCRecompilerImlGen_ADDIC_(ppcImlGenContext, opcode, true) == false)
unsupportedInstructionFound = true;
break;
case 14: // ADDI
@ -4010,36 +4068,6 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext
// todo: If possible, merge with the segment following conditionalSegment (merging is only possible if the segment is not an entry point or has no other jump sources)
}
// insert cycle counter instruction in every segment that has a cycle count greater zero
//for (IMLSegment* segIt : ppcImlGenContext.segmentList2)
//{
// if( segIt->ppcAddrMin == 0 )
// continue;
// // count number of PPC instructions in segment
// // note: This algorithm correctly counts inlined functions but it doesn't count NO-OP instructions like ISYNC since they generate no IML instructions
// uint32 lastPPCInstAddr = 0;
// uint32 ppcCount2 = 0;
// for (sint32 i = 0; i < segIt->imlList.size(); i++)
// {
// if (segIt->imlList[i].associatedPPCAddress == 0)
// continue;
// if (segIt->imlList[i].associatedPPCAddress == lastPPCInstAddr)
// continue;
// lastPPCInstAddr = segIt->imlList[i].associatedPPCAddress;
// ppcCount2++;
// }
// //uint32 ppcCount = imlSegment->ppcAddrMax-imlSegment->ppcAddrMin+4; -> No longer works with inlined functions
// uint32 cycleCount = ppcCount2;// ppcCount / 4;
// if( cycleCount > 0 )
// {
// PPCRecompiler_pushBackIMLInstructions(segIt, 0, 1);
// segIt->imlList[0].type = PPCREC_IML_TYPE_MACRO;
// segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER;
// segIt->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES;
// segIt->imlList[0].op_macro.param = cycleCount;
// }
//}
return true;
}

View File

@ -49,7 +49,7 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext_t*
imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian;
}
void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerSource, uint8 registerMemory1, uint8 registerMemory2, sint32 immS32, uint32 mode, bool switchEndian, uint8 registerGQR = 0)
void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerSource, uint8 registerMemory1, uint8 registerMemory2, sint32 immS32, uint32 mode, bool switchEndian, uint8 registerGQR = PPC_REC_INVALID_REGISTER)
{
// store to memory
IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);