PPCRec: Rework CR bit handling

CR bits are now resident in registers instead of being baked into the instruction definitions. Same for XER SO, and LWARX reservation EA and value.

Reworked LWARX/STWCX, CRxx ops, compare and branch instructions. As well as RC bit handling. Not all CR-related instructions are reimplemented yet.

Introduced atomic_cmp_store operation to allow implementing STWCX in architecture agnostic IML

Removed legacy CR-based compare and jump operations
This commit is contained in:
Exzap 2023-01-03 00:51:27 +01:00
parent ff09940d03
commit c4b9fff24a
21 changed files with 1115 additions and 1232 deletions

View File

@ -3,12 +3,12 @@ static void PPCInterpreter_setXerOV(PPCInterpreter_t* hCPU, bool hasOverflow)
{
if (hasOverflow)
{
hCPU->spr.XER |= XER_SO;
hCPU->spr.XER |= XER_OV;
hCPU->xer_so = 1;
hCPU->xer_ov = 1;
}
else
{
hCPU->spr.XER &= ~XER_OV;
hCPU->xer_ov = 0;
}
}
@ -246,7 +246,7 @@ static void PPCInterpreter_SUBFCO(PPCInterpreter_t* hCPU, uint32 opcode)
uint32 a = hCPU->gpr[rA];
uint32 b = hCPU->gpr[rB];
hCPU->gpr[rD] = ~a + b + 1;
// update xer
// update carry
if (ppc_carry_3(~a, b, 1))
hCPU->xer_ca = 1;
else
@ -848,8 +848,7 @@ static void PPCInterpreter_CMP(PPCInterpreter_t* hCPU, uint32 opcode)
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
else
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
if ((hCPU->spr.XER & XER_SO) != 0)
hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so;
PPCInterpreter_nextInstruction(hCPU);
}
@ -871,8 +870,7 @@ static void PPCInterpreter_CMPL(PPCInterpreter_t* hCPU, uint32 opcode)
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
else
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
if ((hCPU->spr.XER & XER_SO) != 0)
hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so;
PPCInterpreter_nextInstruction(hCPU);
}
@ -895,8 +893,7 @@ static void PPCInterpreter_CMPI(PPCInterpreter_t* hCPU, uint32 opcode)
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
else
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
if (hCPU->spr.XER & XER_SO)
hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so;
PPCInterpreter_nextInstruction(hCPU);
}
@ -919,8 +916,7 @@ static void PPCInterpreter_CMPLI(PPCInterpreter_t* hCPU, uint32 opcode)
hCPU->cr[cr * 4 + CR_BIT_GT] = 1;
else
hCPU->cr[cr * 4 + CR_BIT_EQ] = 1;
if (hCPU->spr.XER & XER_SO)
hCPU->cr[cr * 4 + CR_BIT_SO] = 1;
hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so;
PPCInterpreter_nextInstruction(hCPU);
}

View File

@ -50,9 +50,9 @@
#define CR_BIT_EQ 2
#define CR_BIT_SO 3
#define XER_SO (1<<31) // summary overflow bit
#define XER_OV (1<<30) // overflow bit
#define XER_BIT_CA (29) // carry bit index. To accelerate frequent access, this bit is stored as a separate uint8
#define XER_BIT_SO (31) // summary overflow, counterpart to CR SO
#define XER_BIT_OV (30)
// FPSCR
#define FPSCR_VXSNAN (1<<24)
@ -118,7 +118,8 @@
static inline void ppc_update_cr0(PPCInterpreter_t* hCPU, uint32 r)
{
hCPU->cr[CR_BIT_SO] = (hCPU->spr.XER&XER_SO) ? 1 : 0;
cemu_assert_debug(hCPU->xer_so <= 1);
hCPU->cr[CR_BIT_SO] = hCPU->xer_so;
hCPU->cr[CR_BIT_LT] = ((r != 0) ? 1 : 0) & ((r & 0x80000000) ? 1 : 0);
hCPU->cr[CR_BIT_EQ] = (r == 0);
hCPU->cr[CR_BIT_GT] = hCPU->cr[CR_BIT_EQ] ^ hCPU->cr[CR_BIT_LT] ^ 1; // this works because EQ and LT can never be set at the same time. So the only case where GT becomes 1 is when LT=0 and EQ=0

View File

@ -85,7 +85,8 @@ static void PPCInterpreter_STWCX(PPCInterpreter_t* hCPU, uint32 Opcode)
ppc_setCRBit(hCPU, CR_BIT_GT, 0);
ppc_setCRBit(hCPU, CR_BIT_EQ, 1);
}
ppc_setCRBit(hCPU, CR_BIT_SO, (hCPU->spr.XER&XER_SO) != 0 ? 1 : 0);
cemu_assert_debug(hCPU->xer_so <= 1);
ppc_setCRBit(hCPU, CR_BIT_SO, hCPU->xer_so);
// remove reservation
hCPU->reservedMemAddr = 0;
hCPU->reservedMemValue = 0;

View File

@ -63,16 +63,24 @@ void PPCInterpreter_setDEC(PPCInterpreter_t* hCPU, uint32 newValue)
uint32 PPCInterpreter_getXER(PPCInterpreter_t* hCPU)
{
uint32 xerValue = hCPU->spr.XER;
xerValue &= ~(1<<XER_BIT_CA);
if( hCPU->xer_ca )
xerValue |= (1<<XER_BIT_CA);
xerValue &= ~(1 << XER_BIT_CA);
xerValue &= ~(1 << XER_BIT_SO);
xerValue &= ~(1 << XER_BIT_OV);
if (hCPU->xer_ca)
xerValue |= (1 << XER_BIT_CA);
if (hCPU->xer_so)
xerValue |= (1 << XER_BIT_SO);
if (hCPU->xer_ov)
xerValue |= (1 << XER_BIT_OV);
return xerValue;
}
void PPCInterpreter_setXER(PPCInterpreter_t* hCPU, uint32 v)
{
hCPU->spr.XER = v;
hCPU->xer_ca = (v>>XER_BIT_CA)&1;
hCPU->xer_ca = (v >> XER_BIT_CA) & 1;
hCPU->xer_so = (v >> XER_BIT_SO) & 1;
hCPU->xer_ov = (v >> XER_BIT_OV) & 1;
}
uint32 PPCInterpreter_getCoreIndex(PPCInterpreter_t* hCPU)

View File

@ -49,6 +49,8 @@ struct PPCInterpreter_t
uint32 fpscr;
uint8 cr[32]; // 0 -> bit not set, 1 -> bit set (upper 7 bits of each byte must always be zero) (cr0 starts at index 0, cr1 at index 4 ..)
uint8 xer_ca; // carry from xer
uint8 xer_so;
uint8 xer_ov;
uint8 LSQE;
uint8 PSE;
// thread remaining cycles
@ -67,7 +69,7 @@ struct PPCInterpreter_t
uint32 reservedMemValue;
// temporary storage for recompiler
FPR_t temporaryFPR[8];
uint32 temporaryGPR[4]; // deprecated, refactor away backend dependency on this
uint32 temporaryGPR[4]; // deprecated, refactor backend dependency on this away
uint32 temporaryGPR_reg[4];
// values below this are not used by Cafe OS usermode
struct

View File

@ -28,6 +28,10 @@ static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId)
return (x86Assembler64::GPR8_REX)regId;
}
static x86Assembler64::GPR64 _reg64_from_reg32(x86Assembler64::GPR32 regId)
{
return (x86Assembler64::GPR64)regId;
}
X86Cond _x86Cond(IMLCondition imlCond)
{
@ -41,6 +45,10 @@ X86Cond _x86Cond(IMLCondition imlCond)
return X86_CONDITION_NBE;
case IMLCondition::UNSIGNED_LT:
return X86_CONDITION_B;
case IMLCondition::SIGNED_GT:
return X86_CONDITION_NLE;
case IMLCondition::SIGNED_LT:
return X86_CONDITION_L;
default:
break;
}
@ -88,18 +96,6 @@ void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, si
}
}
void PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
sint32 crRegister = imlInstruction->crRegister;
if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 )
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); // check for sign instead of _BELOW (CF) which is not set by TEST
if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 )
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT));
if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 )
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ));
// todo: Set CR SO if XER SO bit is set
}
void* ATTR_MS_ABI PPCRecompiler_virtualHLE(PPCInterpreter_t* hCPU, uint32 hleFuncId)
{
void* prevRSPTemp = hCPU->rspTemp;
@ -424,7 +420,6 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p
}
else if( imlInstruction->op_storeLoad.copyWidth == 8 )
{
// todo: Optimize by using only MOVZX/MOVSX
if( indexed )
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
if( signExtend )
@ -434,22 +429,6 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p
if( indexed && realRegisterMem != realRegisterData )
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
}
else if( imlInstruction->op_storeLoad.copyWidth == PPC_REC_LOAD_LWARX_MARKER )
{
if( imlInstruction->op_storeLoad.immS32 != 0 )
assert_dbg(); // not supported
if( indexed )
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, reservedMemAddr), realRegisterMem); // remember EA for reservation
x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32);
if( indexed && realRegisterMem != realRegisterData )
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
if( switchEndian )
x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData);
x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, reservedMemValue), realRegisterData); // remember value for reservation
// LWARX instruction costs extra cycles (this speeds up busy loops)
x64Gen_sub_mem32reg64_imm32(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 20);
}
else
return false;
return true;
@ -529,106 +508,62 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction,
if (indexed)
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
}
else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER)
{
if (imlInstruction->op_storeLoad.immS32 != 0)
assert_dbg(); // todo
// reset cr0 LT, GT and EQ
sint32 crRegister = 0;
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_LT), 0);
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_GT), 0);
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ), 0);
// calculate effective address
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
if (swapEndian)
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
if (indexed)
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
// realRegisterMem now holds EA
x64Gen_cmp_reg64Low32_mem32reg64(x64GenContext, realRegisterMem, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemAddr));
sint32 jumpInstructionOffsetJumpToEnd = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_EQUAL, 0);
// EA matches reservation
// backup EAX (since it's an explicit operand of CMPXCHG and will be overwritten)
x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX);
// backup REG_RESV_MEMBASE
x64Emit_mov_mem64_reg64(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[2]), REG_RESV_MEMBASE);
// add mem register to REG_RESV_MEMBASE
x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem);
// load reserved value in EAX
x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemValue));
// bswap EAX
x64Gen_bswap_reg64Lower32bit(x64GenContext, X86_REG_EAX);
x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext, REG_RESV_MEMBASE, 0, REG_RESV_TEMP);
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ));
// reset reservation
x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, reservedMemAddr), 0);
x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, reservedMemValue), 0);
// restore EAX
x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]));
// restore REG_RESV_MEMBASE
x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_MEMBASE, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[2]));
// copy XER SO to CR0 SO
x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), 31);
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_SO));
// end
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffsetJumpToEnd, x64GenContext->emitter->GetWriteIndex());
}
else
return false;
return true;
}
bool PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
auto regBoolOut = _reg32_from_reg8(_reg8(imlInstruction->op_atomic_compare_store.regBoolOut));
auto regEA = _reg32(imlInstruction->op_atomic_compare_store.regEA);
auto regVal = _reg32(imlInstruction->op_atomic_compare_store.regWriteValue);
auto regCmp = _reg32(imlInstruction->op_atomic_compare_store.regCompareValue);
// make sure non of the regs are in EAX
if (regEA == X86_REG_EAX ||
regBoolOut == X86_REG_EAX ||
regVal == X86_REG_EAX ||
regCmp == X86_REG_EAX)
{
printf("x86: atomic_cmp_store cannot emit due to EAX already being in use\n");
return false;
}
x64GenContext->emitter->XCHG_qq(REG_RESV_TEMP, X86_REG_RAX);
x64GenContext->emitter->MOV_dd(X86_REG_EAX, regCmp);
x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regBoolOut), _reg32_from_reg8(regBoolOut)); // zero bytes unaffected by SETcc
x64GenContext->emitter->LockPrefix();
x64GenContext->emitter->CMPXCHG_dd_l(REG_RESV_MEMBASE, 0, _reg64_from_reg32(regEA), 1, regVal);
x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_Z, regBoolOut);
x64GenContext->emitter->XCHG_qq(REG_RESV_TEMP, X86_REG_RAX);
return true;
}
bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)
{
// registerResult = registerA
if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER)
{
if(imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA)
x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
if (imlInstruction->crMode == PPCREC_CR_MODE_LOGICAL)
{
// since MOV doesn't set eflags we need another test instruction
x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult);
// set cr bits
PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction);
}
else
{
assert_dbg();
}
}
else
{
if (imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA)
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
}
}
else if (imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP)
{
if (imlInstruction->op_r_r.registerA != imlInstruction->op_r_r.registerResult)
assert_dbg();
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); // if movbe is available we can move and swap in a single instruction?
x64Gen_bswap_reg64Lower32bit(x64GenContext, imlInstruction->op_r_r.registerResult);
}
else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32 )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
}
else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32)
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, imlInstruction->op_r_r.registerResult, reg32ToReg16(imlInstruction->op_r_r.registerA));
}
else if( imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
if( imlInstruction->operation == PPCREC_IML_OP_OR )
{
// registerResult |= registerA
@ -647,7 +582,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
}
else if( imlInstruction->operation == PPCREC_IML_OP_NOT )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// copy register content if different registers
if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA )
x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
@ -655,7 +589,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
}
else if (imlInstruction->operation == PPCREC_IML_OP_NEG)
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// copy register content if different registers
if (imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA)
x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
@ -663,9 +596,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
}
else if( imlInstruction->operation == PPCREC_IML_OP_CNTLZW )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// count leading zeros
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5])
if(g_CPUFeatures.x86.lzcnt)
{
@ -686,47 +617,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
}
}
else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED )
{
// registerA CMP registerB (arithmetic compare)
if( imlInstruction->crRegister == PPC_REC_INVALID_REGISTER )
{
return false; // a NO-OP instruction
}
if( imlInstruction->crRegister >= 8 )
{
return false;
}
// create compare instruction
x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
// set cr bits
sint32 crRegister = imlInstruction->crRegister;
if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED )
{
if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 )
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_LESS, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT));
if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 )
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT));
if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 )
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ));
// todo: Also set summary overflow if xer bit is set
}
else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED )
{
if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 )
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT));
if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 )
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT));
if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 )
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ));
// todo: Also set summary overflow if xer bit is set
}
else
assert_dbg();
}
else if( imlInstruction->operation == PPCREC_IML_OP_DCBZ )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA )
{
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.registerA);
@ -758,98 +650,50 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction,
{
if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN )
{
// registerResult = immS32
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32);
}
else if( imlInstruction->operation == PPCREC_IML_OP_AND )
{
// registerResult &= immS32
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
x64Gen_and_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32);
}
else if( imlInstruction->operation == PPCREC_IML_OP_OR )
{
// registerResult |= immS32
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
x64Gen_or_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32);
}
else if( imlInstruction->operation == PPCREC_IML_OP_XOR )
{
// registerResult ^= immS32
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
x64Gen_xor_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32);
}
else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// registerResult <<<= immS32
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
if( (imlInstruction->op_r_immS32.immS32&0x80) )
assert_dbg(); // should not happen
x64Gen_rol_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint8)imlInstruction->op_r_immS32.immS32);
}
else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED )
{
// registerResult CMP immS32 (arithmetic compare)
if( imlInstruction->crRegister == PPC_REC_INVALID_REGISTER )
{
debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): No-Op CMP found\n");
return true; // a NO-OP instruction
}
if( imlInstruction->crRegister >= 8 )
{
debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported CMP with crRegister = 8\n");
return false;
}
// create compare instruction
x64Gen_cmp_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32);
// set cr bits
uint32 crRegister = imlInstruction->crRegister;
if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED )
{
if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 )
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_LESS, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT));
if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 )
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT));
if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 )
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ));
}
else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED )
{
if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 )
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT));
if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 )
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT));
if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 )
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ));
}
else
assert_dbg();
// todo: Also set summary overflow if xer bit is set?
}
else if( imlInstruction->operation == PPCREC_IML_OP_MFCR )
{
uint32 destRegister = imlInstruction->op_r_immS32.registerIndex;
x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister);
for(sint32 f=0; f<32; f++)
{
x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+f, 0);
x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister);
}
__debugbreak();
//uint32 destRegister = imlInstruction->op_r_immS32.registerIndex;
//x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister);
//for(sint32 f=0; f<32; f++)
//{
// x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+f, 0);
// x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister);
//}
}
else if (imlInstruction->operation == PPCREC_IML_OP_MTCRF)
{
uint32 srcRegister = imlInstruction->op_r_immS32.registerIndex;
uint32 crBitMask = ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32);
for (sint32 f = 0; f < 32; f++)
{
if(((crBitMask >> f) & 1) == 0)
continue;
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f), 0);
x64Gen_test_reg64Low32_imm32(x64GenContext, srcRegister, 0x80000000>>f);
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f));
}
__debugbreak();
//uint32 srcRegister = imlInstruction->op_r_immS32.registerIndex;
//uint32 crBitMask = ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32);
//for (sint32 f = 0; f < 32; f++)
//{
// if(((crBitMask >> f) & 1) == 0)
// continue;
// x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f), 0);
// x64Gen_test_reg64Low32_imm32(x64GenContext, srcRegister, 0x80000000>>f);
// x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f));
//}
}
else
{
@ -861,30 +705,29 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction,
bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)
{
// registerResult = immS32 (conditional)
if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER)
{
assert_dbg();
}
cemu_assert_unimplemented();
//if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)
//{
// // registerResult = immS32 (conditional)
// if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER)
// {
// assert_dbg();
// }
x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)imlInstruction->op_conditional_r_s32.immS32);
uint8 crBitIndex = imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex;
x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0);
if (imlInstruction->op_conditional_r_s32.bitMustBeSet)
x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP);
else
x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_NOT_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP);
return true;
}
// x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)imlInstruction->op_conditional_r_s32.immS32);
// uint8 crBitIndex = imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex;
// x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0);
// if (imlInstruction->op_conditional_r_s32.bitMustBeSet)
// x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP);
// else
// x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_NOT_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP);
// return true;
//}
return false;
}
bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
if (imlInstruction->operation == PPCREC_IML_OP_ADD)
{
// registerResult = registerOperand1 + registerOperand2
@ -908,7 +751,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
}
else if( imlInstruction->operation == PPCREC_IML_OP_SUB )
{
// registerResult = registerOperand1 - registerOperand2
sint32 rRegResult = imlInstruction->op_r_r_r.registerResult;
sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA;
sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB;
@ -940,7 +782,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
}
else if (imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR)
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
sint32 rRegResult = imlInstruction->op_r_r_r.registerResult;
sint32 rRegA = imlInstruction->op_r_r_r.registerA;
sint32 rRegB = imlInstruction->op_r_r_r.registerB;
@ -1140,7 +981,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
}
else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
sint32 rRegResult = imlInstruction->op_r_r_r.registerResult;
sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA;
sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB;
@ -1175,7 +1015,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
}
else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED || imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
sint32 rRegResult = imlInstruction->op_r_r_r.registerResult;
sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA;
sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB;
@ -1310,15 +1149,12 @@ bool PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction_t* PPCRecFunction,
bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
sint32 regResult = imlInstruction->op_r_r_s32.registerResult;
sint32 regOperand = imlInstruction->op_r_r_s32.registerA;
uint32 immS32 = imlInstruction->op_r_r_s32.immS32;
if( imlInstruction->operation == PPCREC_IML_OP_ADD )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult;
sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA;
uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32;
@ -1328,7 +1164,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
}
else if (imlInstruction->operation == PPCREC_IML_OP_SUB)
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
if (regResult != regOperand)
x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand);
x64Gen_sub_reg64Low32_imm32(x64GenContext, regResult, immS32);
@ -1337,7 +1172,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
imlInstruction->operation == PPCREC_IML_OP_OR ||
imlInstruction->operation == PPCREC_IML_OP_XOR)
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
if (regResult != regOperand)
x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand);
if (imlInstruction->operation == PPCREC_IML_OP_AND)
@ -1355,8 +1189,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
uint32 me = (vImm>>8)&0xFF;
uint32 sh = (vImm>>16)&0xFF;
uint32 mask = ppc_mask(mb, me);
// save cr
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// copy rS to temporary register
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r_s32.registerA);
// rotate destination register
@ -1434,50 +1266,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32_carry(PPCRecFunction_t* PPCRecFu
return true;
}
bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLSegment* imlSegment, IMLInstruction* imlInstruction)
{
if( imlInstruction->op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE )
{
// jump always
cemu_assert_debug(imlSegment->nextSegmentBranchTaken);
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
x64Gen_jmp_imm32(x64GenContext, 0);
}
else
{
cemu_assert_debug(imlSegment->nextSegmentBranchTaken);
// generate jump update marker
if( imlInstruction->op_conditionalJump.crRegisterIndex == PPCREC_CR_TEMPORARY || imlInstruction->op_conditionalJump.crRegisterIndex >= 8 )
{
// temporary cr is used, which means we use the currently active eflags
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
sint32 condition = imlInstruction->op_conditionalJump.condition;
if( condition == PPCREC_JUMP_CONDITION_E )
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
else if( condition == PPCREC_JUMP_CONDITION_NE )
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NOT_EQUAL, 0);
else
assert_dbg();
}
else
{
uint8 crBitIndex = imlInstruction->op_conditionalJump.crRegisterIndex*4 + imlInstruction->op_conditionalJump.crBitIndex;
x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0);
cemu_assert_debug(imlSegment->GetBranchTaken());
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, (void*)imlSegment->GetBranchTaken());
if( imlInstruction->op_conditionalJump.bitMustBeSet )
{
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0);
}
else
{
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NOT_CARRY, 0);
}
}
}
return true;
}
bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
// some tests (all performed on a i7-4790K)
@ -1492,49 +1280,6 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction
return true;
}
/*
* PPC condition register operation
*/
bool PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR)
{
// clear cr bit
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crD, 0);
return true;
}
else if (imlInstruction->operation == PPCREC_IML_OP_CR_SET)
{
// set cr bit
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crD, 1);
return true;
}
else if(imlInstruction->operation == PPCREC_IML_OP_CR_OR || imlInstruction->operation == PPCREC_IML_OP_CR_ORC ||
imlInstruction->operation == PPCREC_IML_OP_CR_AND || imlInstruction->operation == PPCREC_IML_OP_CR_ANDC )
{
x64Emit_movZX_reg64_mem8(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crB);
if (imlInstruction->operation == PPCREC_IML_OP_CR_ORC || imlInstruction->operation == PPCREC_IML_OP_CR_ANDC)
{
return false; // untested
x64Gen_int3(x64GenContext);
x64Gen_xor_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); // complement
}
if(imlInstruction->operation == PPCREC_IML_OP_CR_OR || imlInstruction->operation == PPCREC_IML_OP_CR_ORC)
x64Gen_or_reg64Low8_mem8Reg64(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crA);
else
x64Gen_and_reg64Low8_mem8Reg64(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crA);
x64Gen_mov_mem8Reg64_reg64Low8(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crD);
return true;
}
else
{
assert_dbg();
}
return false;
}
void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
uint32 name = imlInstruction->op_r_name.name;
@ -1567,6 +1312,22 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction,
{
x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca));
}
else if (name == PPCREC_NAME_XER_SO)
{
x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so));
}
else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST)
{
x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR));
}
else if (name == PPCREC_NAME_CPU_MEMRES_EA)
{
x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr));
}
else if (name == PPCREC_NAME_CPU_MEMRES_VAL)
{
x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue));
}
else
assert_dbg();
}
@ -1603,6 +1364,22 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction,
{
x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.registerIndex)));
}
else if (name == PPCREC_NAME_XER_SO)
{
x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.registerIndex)));
}
else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST)
{
x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.registerIndex)));
}
else if (name == PPCREC_NAME_CPU_MEMRES_EA)
{
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr), imlInstruction->op_r_name.registerIndex);
}
else if (name == PPCREC_NAME_CPU_MEMRES_VAL)
{
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue), imlInstruction->op_r_name.registerIndex);
}
else
assert_dbg();
}
@ -1713,13 +1490,6 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
codeGenerationFailed = true;
}
}
else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP )
{
if( PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction, ppcImlGenContext, &x64GenContext, segIt, imlInstruction) == false )
{
codeGenerationFailed = true;
}
}
else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK )
{
PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
@ -1759,12 +1529,10 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
codeGenerationFailed = true;
}
}
else if( imlInstruction->type == PPCREC_IML_TYPE_CR )
else if (imlInstruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
{
if( PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false )
{
if (!PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction))
codeGenerationFailed = true;
}
}
else if( imlInstruction->type == PPCREC_IML_TYPE_NO_OP )
{
@ -1822,6 +1590,10 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
{
PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
}
else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_COMPARE)
{
PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
}
else
{
debug_printf("PPCRecompiler_generateX64Code(): Unsupported iml type 0x%x\n", imlInstruction->type);

View File

@ -62,11 +62,6 @@ enum
X86_CONDITION_NONE, // no condition, jump always
};
#define PPCREC_CR_TEMPORARY (8) // never stored
#define PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC (0) // for signed arithmetic operations (ADD, CMPI)
#define PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC (1) // for unsigned arithmetic operations (ADD, CMPI)
#define PPCREC_CR_STATE_TYPE_LOGICAL (2) // for unsigned operations (CMPLI)
#define PPC_X64_GPR_USABLE_REGISTERS (16-4)
#define PPC_X64_FPR_USABLE_REGISTERS (16-1) // Use XMM0 - XMM14, XMM15 is the temp register
@ -86,6 +81,8 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
void PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction);
// ASM gen
void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v);
void x64Gen_writeU16(x64GenContext_t* x64GenContext, uint32 v);

View File

@ -5,6 +5,31 @@
#include "asm/x64util.h" // for recompiler_fres / frsqrte
uint32 _regF64(IMLReg r)
{
return (uint32)r;
}
static x86Assembler64::GPR32 _reg32(sint8 physRegId)
{
return (x86Assembler64::GPR32)physRegId;
}
static x86Assembler64::GPR8_REX _reg8(sint8 physRegId)
{
return (x86Assembler64::GPR8_REX)physRegId;
}
static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId)
{
return (x86Assembler64::GPR32)regId;
}
static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId)
{
return (x86Assembler64::GPR8_REX)regId;
}
void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
uint32 name = imlInstruction->op_r_name.name;
@ -690,18 +715,10 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction
{
if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP )
{
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
{
assert_dbg();
}
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP )
{
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
{
assert_dbg();
}
// VPUNPCKHQDQ
if (imlInstruction->op_fpr_r_r.registerResult == imlInstruction->op_fpr_r_r.registerOperand)
{
@ -725,170 +742,73 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
if( imlInstruction->op_fpr_r_r.registerResult != imlInstruction->op_fpr_r_r.registerOperand )
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand);
_swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand, 2);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// use unpckhpd here?
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand, 3);
_swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM )
{
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
{
assert_dbg();
}
x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_PAIR )
{
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
{
assert_dbg();
}
x64Gen_mulpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_BOTTOM )
{
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
{
assert_dbg();
}
x64Gen_divsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand);
}
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_PAIR)
{
if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER)
{
assert_dbg();
}
x64Gen_divpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM )
{
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
{
assert_dbg();
}
x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_PAIR )
{
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
{
assert_dbg();
}
x64Gen_addpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR )
{
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
{
assert_dbg();
}
x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM )
{
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
{
assert_dbg();
}
x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand);
}
else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN )
{
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
{
assert_dbg();
}
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand);
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ )
{
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
{
assert_dbg();
}
x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext, REG_RESV_TEMP, imlInstruction->op_fpr_r_r.registerOperand);
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP);
// move to FPR register
x64Gen_movq_xmmReg_reg64(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, REG_RESV_TEMP);
}
else if(imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_BOTTOM ||
imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_TOP ||
imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPO_BOTTOM )
{
if( imlInstruction->crRegister == PPC_REC_INVALID_REGISTER )
{
assert_dbg();
}
if (imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_BOTTOM)
x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand);
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_TOP)
{
// temporarily switch top/bottom of both operands and compare
if (imlInstruction->op_fpr_r_r.registerResult == imlInstruction->op_fpr_r_r.registerOperand)
{
_swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult);
x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand);
_swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult);
}
else
{
_swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult);
_swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerOperand);
x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand);
_swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult);
_swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerOperand);
}
}
else
x64Gen_comisd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand);
// todo: handle FPSCR updates
// update cr
sint32 crRegister = imlInstruction->crRegister;
// if the parity bit is set (NaN) we need to manually set CR LT, GT and EQ to 0 (comisd/ucomisd sets the respective flags to 1 in case of NaN)
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_PARITY, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_SO)); // unordered
sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_PARITY, 0);
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); // same as X64_CONDITION_CARRY
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT));
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ));
sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT), 0);
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT), 0);
x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ), 0);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP )
{
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
{
assert_dbg();
}
// move register to XMM15
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.registerOperand);
@ -901,7 +821,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction
}
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT)
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// move register to XMM15
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.registerOperand);
@ -914,7 +833,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_PAIR )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// copy register
if( imlInstruction->op_fpr_r_r.registerResult != imlInstruction->op_fpr_r_r.registerOperand )
{
@ -925,7 +843,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_PAIR )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// copy register
if( imlInstruction->op_fpr_r_r.registerResult != imlInstruction->op_fpr_r_r.registerOperand )
{
@ -936,7 +853,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR || imlInstruction->operation == PPCREC_IML_OP_FPR_FRSQRTE_PAIR)
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// calculate bottom half of result
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.registerOperand);
if(imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR)
@ -968,10 +884,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti
{
if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM)
{
if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER)
{
assert_dbg();
}
if (imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA)
{
x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB);
@ -988,8 +900,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti
}
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM)
{
// registerResult(fp0) = registerOperandA(fp0) + registerOperandB(fp0)
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// todo: Use AVX 3-operand VADDSD if available
if (imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA)
{
@ -1008,7 +918,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR)
{
// registerResult = registerOperandA - registerOperandB
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
if( imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA )
{
x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB);
@ -1031,7 +940,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
if( imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA )
{
x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB);
@ -1059,8 +967,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc
{
if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM0 )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// todo: Investigate if there are other optimizations possible if the operand registers overlap
// generic case
// 1) move frA bottom to frTemp bottom and top
@ -1074,7 +980,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM1 )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// todo: Investigate if there are other optimizations possible if the operand registers overlap
// 1) move frA bottom to frTemp bottom and top
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.registerOperandA);
@ -1094,7 +999,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_BOTTOM )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerOperandA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0));
sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0);
@ -1110,7 +1014,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_PAIR )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// select bottom
x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerOperandA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0));
sint32 jumpInstructionOffset1_bottom = x64GenContext->emitter->GetWriteIndex();
@ -1145,32 +1048,22 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc
assert_dbg();
}
/*
* Single FPR operation
*/
void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_BOTTOM )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// toggle sign bit
x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.registerResult, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom));
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_BOTTOM )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// mask out sign bit
x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.registerResult, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskBottom));
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// set sign bit
x64Gen_orps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.registerResult, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom));
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// convert to 32bit single
x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult);
// convert back to 64bit double
@ -1178,7 +1071,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction,
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR )
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// convert to 32bit singles
x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult);
// convert back to 64bit doubles
@ -1186,7 +1078,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction,
}
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64)
{
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// convert bottom to 64bit double
x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult);
// copy to top half
@ -1197,3 +1088,44 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction,
cemu_assert_unimplemented();
}
}
void PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
auto regR = _reg8(imlInstruction->op_fpr_compare.regR);
auto regA = _regF64(imlInstruction->op_fpr_compare.regA);
auto regB = _regF64(imlInstruction->op_fpr_compare.regB);
x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR));
x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, regA, regB);
if (imlInstruction->op_fpr_compare.cond == IMLCondition::UNORDERED_GT)
{
// GT case can be covered with a single SETnbe which checks CF==0 && ZF==0 (unordered sets both)
x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_NBE, regR);
return;
}
else if (imlInstruction->op_fpr_compare.cond == IMLCondition::UNORDERED_U)
{
// unordered case can be checked via PF
x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_PE, regR);
return;
}
// remember unordered state
auto regTmp = _reg32_from_reg8(_reg32(REG_RESV_TEMP));
x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_PO, regTmp); // by reversing the parity we can avoid having to XOR the value for masking the LT/EQ conditions
X86Cond x86Cond;
switch (imlInstruction->op_fpr_compare.cond)
{
case IMLCondition::UNORDERED_LT:
x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_B, regR);
break;
case IMLCondition::UNORDERED_EQ:
x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_Z, regR);
break;
default:
cemu_assert_unimplemented();
}
x64GenContext->emitter->AND_bb(_reg8_from_reg32(regR), _reg8_from_reg32(regTmp)); // if unordered (PF=1) then force LT/GT/EQ to zero
}

View File

@ -84,6 +84,7 @@ public:
using GPR64 = X86Reg;
using GPR32 = X86Reg;
using GPR8_REX = X86Reg;
void LockPrefix() { _emitU8(0xF0); };
void ADD_bb(GPR8_REX dst, GPR8_REX src)
{
if ((src >= 4) || (dst >= 4))
@ -3194,6 +3195,124 @@ public:
if (mod == 1) _emitU8((u8)offset);
else if (mod == 2) _emitU32((u32)offset);
}
void XCHG_bb(GPR8_REX dst, GPR8_REX src)
{
if ((dst >= 4) || (src >= 4))
{
_emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1));
}
_emitU8(0x86);
_emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
}
void XCHG_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
{
uint8 mod;
if (offset == 0 && (memReg & 7) != 5) mod = 0;
else if (offset == (s32)(s8)offset) mod = 1;
else mod = 2;
bool sib_use = (scaler != 0 && index != X86_REG_NONE);
if ((memReg & 7) == 4)
{
cemu_assert_debug(index == X86_REG_NONE);
index = memReg;
sib_use = true;
}
if (sib_use)
{
if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
_emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
}
else
{
if ((dst >= 4) || (memReg & 8))
_emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
}
_emitU8(0x86);
_emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
if (sib_use)
{
_emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
}
if (mod == 1) _emitU8((u8)offset);
else if (mod == 2) _emitU32((u32)offset);
}
void XCHG_dd(GPR32 dst, GPR32 src)
{
if (((dst & 8) != 0) || ((src & 8) != 0))
{
_emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1));
}
_emitU8(0x87);
_emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
}
void XCHG_qq(GPR64 dst, GPR64 src)
{
_emitU8(0x48 | ((src & 8) >> 3) | ((dst & 8) >> 1));
_emitU8(0x87);
_emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7));
}
void XCHG_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
{
uint8 mod;
if (offset == 0 && (memReg & 7) != 5) mod = 0;
else if (offset == (s32)(s8)offset) mod = 1;
else mod = 2;
bool sib_use = (scaler != 0 && index != X86_REG_NONE);
if ((memReg & 7) == 4)
{
cemu_assert_debug(index == X86_REG_NONE);
index = memReg;
sib_use = true;
}
if (sib_use)
{
if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
_emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
}
else
{
if ((dst & 8) || (memReg & 8))
_emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1));
}
_emitU8(0x87);
_emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
if (sib_use)
{
_emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
}
if (mod == 1) _emitU8((u8)offset);
else if (mod == 2) _emitU32((u32)offset);
}
void XCHG_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler)
{
uint8 mod;
if (offset == 0 && (memReg & 7) != 5) mod = 0;
else if (offset == (s32)(s8)offset) mod = 1;
else mod = 2;
bool sib_use = (scaler != 0 && index != X86_REG_NONE);
if ((memReg & 7) == 4)
{
cemu_assert_debug(index == X86_REG_NONE);
index = memReg;
sib_use = true;
}
if (sib_use)
{
_emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
}
else
{
_emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
}
_emitU8(0x87);
_emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
if (sib_use)
{
_emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
}
if (mod == 1) _emitU8((u8)offset);
else if (mod == 2) _emitU32((u32)offset);
}
void MOV_bb(GPR8_REX dst, GPR8_REX src)
{
if ((src >= 4) || (dst >= 4))
@ -4032,6 +4151,102 @@ public:
if (mod == 1) _emitU8((u8)offset);
else if (mod == 2) _emitU32((u32)offset);
}
void CMPXCHG_dd(GPR32 dst, GPR32 src)
{
if (((src & 8) != 0) || ((dst & 8) != 0))
{
_emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1));
}
_emitU8(0x0f);
_emitU8(0xb1);
_emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
}
void CMPXCHG_qq(GPR64 dst, GPR64 src)
{
_emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1));
_emitU8(0x0f);
_emitU8(0xb1);
_emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7));
}
void CMPXCHG_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src)
{
uint8 mod;
if (offset == 0 && (memReg & 7) != 5) mod = 0;
else if (offset == (s32)(s8)offset) mod = 1;
else mod = 2;
bool sib_use = (scaler != 0 && index != X86_REG_NONE);
if ((memReg & 7) == 4)
{
cemu_assert_debug(index == X86_REG_NONE);
index = memReg;
sib_use = true;
}
if (sib_use)
{
if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8)))
_emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2));
}
else
{
if ((src & 8) || (memReg & 8))
_emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1));
}
_emitU8(0x0f);
_emitU8(0xb1);
_emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
if (sib_use)
{
_emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
}
if (mod == 1) _emitU8((u8)offset);
else if (mod == 2) _emitU32((u32)offset);
}
void CMPXCHG_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src)
{
uint8 mod;
if (offset == 0 && (memReg & 7) != 5) mod = 0;
else if (offset == (s32)(s8)offset) mod = 1;
else mod = 2;
bool sib_use = (scaler != 0 && index != X86_REG_NONE);
if ((memReg & 7) == 4)
{
cemu_assert_debug(index == X86_REG_NONE);
index = memReg;
sib_use = true;
}
if (sib_use)
{
_emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08);
}
else
{
_emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08);
}
_emitU8(0x0f);
_emitU8(0xb1);
_emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7)));
if (sib_use)
{
_emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3));
}
if (mod == 1) _emitU8((u8)offset);
else if (mod == 2) _emitU32((u32)offset);
}
void BSWAP_d(GPR32 dst)
{
if (((dst & 8) != 0))
{
_emitU8(0x40 | ((dst & 8) >> 3));
}
_emitU8(0x0f);
_emitU8(0xc8 | ((dst) & 7));
}
void BSWAP_q(GPR64 dst)
{
_emitU8(0x48 | ((dst & 8) >> 3));
_emitU8(0x0f);
_emitU8(0xc8 | ((dst) & 7));
}
void BT_du8(GPR32 dst, u8 imm)
{
if (((dst & 8) != 0))

View File

@ -18,7 +18,7 @@ bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment)
// loops using BDNZ are assumed to always be finite
for(const IMLInstruction& instIt : imlSegment->imlList)
{
if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_SUB && instIt.crRegister == 8)
if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_SUB)
{
return true;
}
@ -92,59 +92,60 @@ bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction)
void IMLAnalyzer_GetCRTracking(IMLInstruction* imlInstruction, PPCRecCRTracking_t* crTracking)
{
crTracking->readCRBits = 0;
crTracking->writtenCRBits = 0;
if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP)
{
if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE)
{
uint32 crBitFlag = 1 << (imlInstruction->op_conditionalJump.crRegisterIndex * 4 + imlInstruction->op_conditionalJump.crBitIndex);
crTracking->readCRBits = (crBitFlag);
}
}
else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32)
{
uint32 crBitFlag = 1 << (imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex);
crTracking->readCRBits = crBitFlag;
}
else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MFCR)
{
crTracking->readCRBits = 0xFFFFFFFF;
}
else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MTCRF)
{
crTracking->writtenCRBits |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32);
}
else if (imlInstruction->type == PPCREC_IML_TYPE_CR)
{
if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR ||
imlInstruction->operation == PPCREC_IML_OP_CR_SET)
{
uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD);
crTracking->writtenCRBits = crBitFlag;
}
else if (imlInstruction->operation == PPCREC_IML_OP_CR_OR ||
imlInstruction->operation == PPCREC_IML_OP_CR_ORC ||
imlInstruction->operation == PPCREC_IML_OP_CR_AND ||
imlInstruction->operation == PPCREC_IML_OP_CR_ANDC)
{
uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD);
crTracking->writtenCRBits = crBitFlag;
crBitFlag = 1 << (imlInstruction->op_cr.crA);
crTracking->readCRBits = crBitFlag;
crBitFlag = 1 << (imlInstruction->op_cr.crB);
crTracking->readCRBits |= crBitFlag;
}
else
assert_dbg();
}
else if (IMLAnalyzer_CanTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7)
{
crTracking->writtenCRBits |= (0xF << (imlInstruction->crRegister * 4));
}
else if ((imlInstruction->type == PPCREC_IML_TYPE_STORE || imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) && imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER)
{
// overwrites CR0
crTracking->writtenCRBits |= (0xF << 0);
}
__debugbreak();
//crTracking->readCRBits = 0;
//crTracking->writtenCRBits = 0;
//if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP)
//{
// if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE)
// {
// uint32 crBitFlag = 1 << (imlInstruction->op_conditionalJump.crRegisterIndex * 4 + imlInstruction->op_conditionalJump.crBitIndex);
// crTracking->readCRBits = (crBitFlag);
// }
//}
//else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32)
//{
// uint32 crBitFlag = 1 << (imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex);
// crTracking->readCRBits = crBitFlag;
//}
//else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MFCR)
//{
// crTracking->readCRBits = 0xFFFFFFFF;
//}
//else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MTCRF)
//{
// crTracking->writtenCRBits |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32);
//}
//else if (imlInstruction->type == PPCREC_IML_TYPE_CR)
//{
// if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR ||
// imlInstruction->operation == PPCREC_IML_OP_CR_SET)
// {
// uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD);
// crTracking->writtenCRBits = crBitFlag;
// }
// else if (imlInstruction->operation == PPCREC_IML_OP_CR_OR ||
// imlInstruction->operation == PPCREC_IML_OP_CR_ORC ||
// imlInstruction->operation == PPCREC_IML_OP_CR_AND ||
// imlInstruction->operation == PPCREC_IML_OP_CR_ANDC)
// {
// uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD);
// crTracking->writtenCRBits = crBitFlag;
// crBitFlag = 1 << (imlInstruction->op_cr.crA);
// crTracking->readCRBits = crBitFlag;
// crBitFlag = 1 << (imlInstruction->op_cr.crB);
// crTracking->readCRBits |= crBitFlag;
// }
// else
// assert_dbg();
//}
//else if (IMLAnalyzer_CanTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7)
//{
// crTracking->writtenCRBits |= (0xF << (imlInstruction->crRegister * 4));
//}
//else if ((imlInstruction->type == PPCREC_IML_TYPE_STORE || imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) && imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER)
//{
// // overwrites CR0
// crTracking->writtenCRBits |= (0xF << 0);
//}
}

View File

@ -206,6 +206,18 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
{
strOutput.addFmt("spr{}", inst.op_r_name.name - PPCREC_NAME_SPR0);
}
else if (inst.op_r_name.name >= PPCREC_NAME_CR && inst.op_r_name.name <= PPCREC_NAME_CR_LAST)
strOutput.addFmt("cr{}", inst.op_r_name.name - PPCREC_NAME_CR);
else if (inst.op_r_name.name == PPCREC_NAME_XER_CA)
strOutput.add("xer.ca");
else if (inst.op_r_name.name == PPCREC_NAME_XER_SO)
strOutput.add("xer.so");
else if (inst.op_r_name.name == PPCREC_NAME_XER_OV)
strOutput.add("xer.ov");
else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_EA)
strOutput.add("cpuReservation.ea");
else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_VAL)
strOutput.add("cpuReservation.value");
else
strOutput.add("ukn");
strOutput.add(")");
@ -217,11 +229,6 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
strOutput.add(" ");
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.registerResult);
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.registerA, true);
if (inst.crRegister != PPC_REC_INVALID_REGISTER)
{
strOutput.addFmt(" -> CR{}", inst.crRegister);
}
}
else if (inst.type == PPCREC_IML_TYPE_R_R_R)
{
@ -231,10 +238,6 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.registerResult);
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.registerA);
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.registerB, true);
if (inst.crRegister != PPC_REC_INVALID_REGISTER)
{
strOutput.addFmt(" -> CR{}", inst.crRegister);
}
}
else if (inst.type == PPCREC_IML_TYPE_R_R_R_CARRY)
{
@ -274,9 +277,13 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
while ((sint32)strOutput.getLen() < lineOffsetParameters)
strOutput.add(" ");
IMLDebug_AppendRegisterParam(strOutput, inst.op_conditionalJump2.registerBool, true);
if(!inst.op_conditionalJump2.mustBeTrue)
if (!inst.op_conditionalJump2.mustBeTrue)
strOutput.add("(inverted)");
}
else if (inst.type == PPCREC_IML_TYPE_JUMP)
{
strOutput.add("JUMP");
}
else if (inst.type == PPCREC_IML_TYPE_R_R_S32)
{
strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst));
@ -286,11 +293,6 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.registerResult);
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.registerA);
IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32.immS32, true);
if (inst.crRegister != PPC_REC_INVALID_REGISTER)
{
strOutput.addFmt(" -> CR{}", inst.crRegister);
}
}
else if (inst.type == PPCREC_IML_TYPE_R_R_S32_CARRY)
{
@ -311,55 +313,42 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_immS32.registerIndex);
IMLDebug_AppendS32Param(strOutput, inst.op_r_immS32.immS32, true);
if (inst.crRegister != PPC_REC_INVALID_REGISTER)
{
strOutput.addFmt(" -> CR{}", inst.crRegister);
}
}
else if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_STORE ||
inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED)
{
if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED)
strOutput.add("LD_");
else
strOutput.add("ST_");
{
if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED)
strOutput.add("LD_");
else
strOutput.add("ST_");
if (inst.op_storeLoad.flags2.signExtend)
strOutput.add("S");
else
strOutput.add("U");
strOutput.addFmt("{}", inst.op_storeLoad.copyWidth);
if (inst.op_storeLoad.flags2.signExtend)
strOutput.add("S");
else
strOutput.add("U");
strOutput.addFmt("{}", inst.op_storeLoad.copyWidth);
while ((sint32)strOutput.getLen() < lineOffsetParameters)
strOutput.add(" ");
IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData);
if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED)
strOutput.addFmt("[t{}+t{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.registerMem2);
else
strOutput.addFmt("[t{}+{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32);
}
else if (inst.type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
{
strOutput.add("ATOMIC_ST_U32");
while ((sint32)strOutput.getLen() < lineOffsetParameters)
strOutput.add(" ");
IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData);
if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED)
strOutput.addFmt("[t{}+t{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.registerMem2);
else
strOutput.addFmt("[t{}+{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32);
}
else if (inst.type == PPCREC_IML_TYPE_CJUMP)
{
if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_E)
strOutput.add("JE");
else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NE)
strOutput.add("JNE");
else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_G)
strOutput.add("JG");
else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_GE)
strOutput.add("JGE");
else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_L)
strOutput.add("JL");
else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_LE)
strOutput.add("JLE");
else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE)
strOutput.add("JALW"); // jump always
else
cemu_assert_unimplemented();
strOutput.addFmt(" (cr{})", inst.crRegister);
IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regEA);
IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regCompareValue);
IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regWriteValue);
IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regBoolOut, true);
}
else if (inst.type == PPCREC_IML_TYPE_NO_OP)
{
@ -487,10 +476,6 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
else
strOutput.addFmt(" {}", inst.op_conditional_r_s32.immS32);
strOutput.add(" (conditional)");
if (inst.crRegister != PPC_REC_INVALID_REGISTER)
{
strOutput.addFmt(" -> and update CR{}", inst.crRegister);
}
}
else
{

View File

@ -154,7 +154,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
// carry is always written
registersUsed->writtenNamedReg2 = op_r_r_r_carry.regCarry;
}
else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
{
// no effect on registers
}
@ -222,9 +222,12 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER)
registersUsed->readNamedReg3 = op_storeLoad.registerMem2;
}
else if (type == PPCREC_IML_TYPE_CR)
else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
{
// only affects cr register
registersUsed->readNamedReg1 = op_atomic_compare_store.regEA;
registersUsed->readNamedReg2 = op_atomic_compare_store.regCompareValue;
registersUsed->readNamedReg3 = op_atomic_compare_store.regWriteValue;
registersUsed->writtenNamedReg1 = op_atomic_compare_store.regBoolOut;
}
else if (type == PPCREC_IML_TYPE_FPR_R_NAME)
{
@ -467,6 +470,12 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
else
cemu_assert_unimplemented();
}
else if (type == PPCREC_IML_TYPE_FPR_COMPARE)
{
registersUsed->writtenNamedReg1 = op_fpr_compare.regR;
registersUsed->readFPR1 = op_fpr_compare.regA;
registersUsed->readFPR2 = op_fpr_compare.regB;
}
else
{
cemu_assert_unimplemented();
@ -560,7 +569,7 @@ void IMLInstruction::RewriteGPR(const std::unordered_map<IMLReg, IMLReg>& transl
{
op_conditionalJump2.registerBool = replaceRegisterMultiple(op_conditionalJump2.registerBool, translationTable);
}
else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP)
else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP)
{
// no effect on registers
}
@ -613,9 +622,12 @@ void IMLInstruction::RewriteGPR(const std::unordered_map<IMLReg, IMLReg>& transl
if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER)
op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable);
}
else if (type == PPCREC_IML_TYPE_CR)
else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
{
// only affects cr register
op_atomic_compare_store.regEA = replaceRegisterMultiple(op_atomic_compare_store.regEA, translationTable);
op_atomic_compare_store.regCompareValue = replaceRegisterMultiple(op_atomic_compare_store.regCompareValue, translationTable);
op_atomic_compare_store.regWriteValue = replaceRegisterMultiple(op_atomic_compare_store.regWriteValue, translationTable);
op_atomic_compare_store.regBoolOut = replaceRegisterMultiple(op_atomic_compare_store.regBoolOut, translationTable);
}
else if (type == PPCREC_IML_TYPE_FPR_R_NAME)
{
@ -689,6 +701,10 @@ void IMLInstruction::RewriteGPR(const std::unordered_map<IMLReg, IMLReg>& transl
else if (type == PPCREC_IML_TYPE_FPR_R)
{
}
else if (type == PPCREC_IML_TYPE_FPR_COMPARE)
{
op_fpr_compare.regR = replaceRegisterMultiple(op_fpr_compare.regR, translationTable);
}
else
{
cemu_assert_unimplemented();
@ -725,7 +741,7 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist
{
// not affected
}
else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
{
// not affected
}
@ -753,9 +769,9 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist
{
// not affected
}
else if (type == PPCREC_IML_TYPE_CR)
else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
{
// only affects cr register
;
}
else if (type == PPCREC_IML_TYPE_FPR_R_NAME)
{
@ -803,6 +819,11 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist
{
op_fpr_r.registerResult = replaceRegisterMultiple(op_fpr_r.registerResult, fprRegisterSearched, fprRegisterReplaced);
}
else if (type == PPCREC_IML_TYPE_FPR_COMPARE)
{
op_fpr_compare.regA = replaceRegisterMultiple(op_fpr_compare.regA, fprRegisterSearched, fprRegisterReplaced);
op_fpr_compare.regB = replaceRegisterMultiple(op_fpr_compare.regB, fprRegisterSearched, fprRegisterReplaced);
}
else
{
cemu_assert_unimplemented();
@ -839,7 +860,7 @@ void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterRe
{
// not affected
}
else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
{
// not affected
}
@ -867,9 +888,9 @@ void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterRe
{
// not affected
}
else if (type == PPCREC_IML_TYPE_CR)
else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
{
// only affects cr register
;
}
else if (type == PPCREC_IML_TYPE_FPR_R_NAME)
{

View File

@ -29,13 +29,6 @@ enum
PPCREC_IML_OP_DCBZ, // clear 32 bytes aligned to 0x20
PPCREC_IML_OP_MFCR, // copy cr to gpr
PPCREC_IML_OP_MTCRF, // copy gpr to cr (with mask)
// condition register
PPCREC_IML_OP_CR_CLEAR, // clear cr bit
PPCREC_IML_OP_CR_SET, // set cr bit
PPCREC_IML_OP_CR_OR, // OR cr bits
PPCREC_IML_OP_CR_ORC, // OR cr bits, complement second input operand bit first
PPCREC_IML_OP_CR_AND, // AND cr bits
PPCREC_IML_OP_CR_ANDC, // AND cr bits, complement second input operand bit first
// FPU
PPCREC_IML_OP_FPR_ADD_BOTTOM,
PPCREC_IML_OP_FPR_ADD_PAIR,
@ -54,9 +47,9 @@ enum
PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED,
PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, // expand bottom f32 to f64 in bottom and top half
PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP, // calculate reciprocal with Espresso accuracy of source bottom half and write result to destination bottom and top half
PPCREC_IML_OP_FPR_FCMPO_BOTTOM,
PPCREC_IML_OP_FPR_FCMPU_BOTTOM,
PPCREC_IML_OP_FPR_FCMPU_TOP,
PPCREC_IML_OP_FPR_FCMPO_BOTTOM, // deprecated
PPCREC_IML_OP_FPR_FCMPU_BOTTOM, // deprecated
PPCREC_IML_OP_FPR_FCMPU_TOP, // deprecated
PPCREC_IML_OP_FPR_NEGATE_BOTTOM,
PPCREC_IML_OP_FPR_NEGATE_PAIR,
PPCREC_IML_OP_FPR_ABS_BOTTOM, // abs(fp0)
@ -111,21 +104,6 @@ enum
PPCREC_IML_MACRO_DEBUGBREAK, // throws a debugbreak
};
enum // deprecated condition codes
{
PPCREC_JUMP_CONDITION_NONE,
PPCREC_JUMP_CONDITION_E, // equal / zero
PPCREC_JUMP_CONDITION_NE, // not equal / not zero
PPCREC_JUMP_CONDITION_LE, // less or equal
PPCREC_JUMP_CONDITION_L, // less
PPCREC_JUMP_CONDITION_GE, // greater or equal
PPCREC_JUMP_CONDITION_G, // greater
// special case:
PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW, // needs special handling
PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW, // not summaryoverflow
};
enum class IMLCondition : uint8
{
EQ,
@ -137,14 +115,17 @@ enum class IMLCondition : uint8
SIGNED_OVERFLOW,
SIGNED_NOVERFLOW,
};
enum
{
PPCREC_CR_MODE_COMPARE_SIGNED,
PPCREC_CR_MODE_COMPARE_UNSIGNED, // alias logic compare
// floating point conditions
UNORDERED_GT, // a > b, false if either is NaN
UNORDERED_LT, // a < b, false if either is NaN
UNORDERED_EQ, // a == b, false if either is NaN
UNORDERED_U, // unordered (true if either operand is NaN)
PPCREC_CR_MODE_LOGICAL,
ORDERED_GT,
ORDERED_LT,
ORDERED_EQ,
ORDERED_U
};
enum
@ -164,18 +145,20 @@ enum
PPCREC_IML_TYPE_NAME_R, // name* = r*
PPCREC_IML_TYPE_R_S32, // r* (op) imm
PPCREC_IML_TYPE_MACRO,
PPCREC_IML_TYPE_CJUMP, // conditional jump
PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles < 0
PPCREC_IML_TYPE_CR, // condition register specific operations (one or more operands)
// new style of handling conditions and branches:
// conditions and branches
PPCREC_IML_TYPE_COMPARE, // r* = r* CMP[cond] r*
PPCREC_IML_TYPE_COMPARE_S32, // r* = r* CMP[cond] imm
PPCREC_IML_TYPE_JUMP, // replaces CJUMP. Jump always, no condition
PPCREC_IML_TYPE_CONDITIONAL_JUMP, // replaces CJUMP. Jump condition is based on boolean register
PPCREC_IML_TYPE_JUMP, // jump always
PPCREC_IML_TYPE_CONDITIONAL_JUMP, // jump conditionally based on boolean value in register
// conditional
// atomic
PPCREC_IML_TYPE_ATOMIC_CMP_STORE,
// conditional (legacy)
PPCREC_IML_TYPE_CONDITIONAL_R_S32,
// FPR
PPCREC_IML_TYPE_FPR_R_NAME, // name = f*
PPCREC_IML_TYPE_FPR_NAME_R, // f* = name
@ -187,6 +170,8 @@ enum
PPCREC_IML_TYPE_FPR_R_R_R,
PPCREC_IML_TYPE_FPR_R_R_R_R,
PPCREC_IML_TYPE_FPR_R,
PPCREC_IML_TYPE_FPR_COMPARE, // r* = r* CMP[cond] r*
};
enum
@ -197,15 +182,18 @@ enum
PPCREC_NAME_SPR0 = 3000,
PPCREC_NAME_FPR0 = 4000,
PPCREC_NAME_TEMPORARY_FPR0 = 5000, // 0 to 7
PPCREC_NAME_XER_CA = 6000, // carry bit
PPCREC_NAME_XER_CA = 6000, // carry bit from XER
PPCREC_NAME_XER_OV = 6001, // overflow bit from XER
PPCREC_NAME_XER_SO = 6002, // summary overflow bit from XER
PPCREC_NAME_CR = 7000, // CR register bits (31 to 0)
PPCREC_NAME_CR_LAST = PPCREC_NAME_CR+31,
PPCREC_NAME_CPU_MEMRES_EA = 8000,
PPCREC_NAME_CPU_MEMRES_VAL = 8001
};
// special cases for LOAD/STORE
#define PPC_REC_LOAD_LWARX_MARKER (100) // lwarx instruction (similar to LWZX but sets reserved address/value)
#define PPC_REC_STORE_STWCX_MARKER (100) // stwcx instruction (similar to STWX but writes only if reservation from LWARX is valid)
#define PPC_REC_INVALID_REGISTER 0xFF
#define PPC_REC_INVALID_REGISTER 0xFF // deprecated. Use IMLREG_INVALID instead
// deprecated, use Espresso namespace
#define PPCREC_CR_BIT_LT 0
#define PPCREC_CR_BIT_GT 1
#define PPCREC_CR_BIT_EQ 2
@ -337,13 +325,12 @@ struct IMLUsedRegisters
using IMLReg = uint8;
inline constexpr IMLReg IMLREG_INVALID = (IMLReg)-1;
struct IMLInstruction
{
uint8 type;
uint8 operation;
uint8 crRegister; // set to 0xFF if not set, not all IML instruction types support cr.
uint8 crMode; // only used when crRegister is valid, used to differentiate between various forms of condition flag set/clear behavior
uint32 crIgnoreMask; // bit set for every respective CR bit that doesn't need to be updated
union
{
struct
@ -352,13 +339,11 @@ struct IMLInstruction
}padding;
struct
{
// R (op) A [update cr* in mode *]
uint8 registerResult;
uint8 registerA;
}op_r_r;
struct
{
// R = A (op) B [update cr* in mode *]
uint8 registerResult;
uint8 registerA;
uint8 registerB;
@ -385,13 +370,11 @@ struct IMLInstruction
}op_r_r_s32_carry;
struct
{
// R/F = NAME or NAME = R/F
uint8 registerIndex;
uint32 name;
}op_r_name;
}op_r_name; // alias op_name_r
struct
{
// R (op) s32 [update cr* in mode *]
uint8 registerIndex;
sint32 immS32;
}op_r_immS32;
@ -402,13 +385,6 @@ struct IMLInstruction
uint16 paramU16;
}op_macro;
struct
{
uint8 condition; // only used when crRegisterIndex is 8 or above (update: Apparently only used to mark jumps without a condition? -> Cleanup)
uint8 crRegisterIndex;
uint8 crBitIndex;
bool bitMustBeSet;
}op_conditionalJump; // legacy jump
struct
{
uint8 registerData;
uint8 registerMem;
@ -450,6 +426,13 @@ struct IMLInstruction
uint8 registerResult;
}op_fpr_r;
struct
{
IMLReg regR; // stores the boolean result of the comparison
IMLReg regA;
IMLReg regB;
IMLCondition cond;
}op_fpr_compare;
struct
{
uint8 crD; // crBitIndex (result)
uint8 crA; // crBitIndex
@ -474,6 +457,13 @@ struct IMLInstruction
uint8 registerBool;
bool mustBeTrue;
}op_conditionalJump2;
struct
{
IMLReg regEA;
IMLReg regCompareValue;
IMLReg regWriteValue;
IMLReg regBoolOut; // boolean 0/1
}op_atomic_compare_store;
// conditional operations (emitted if supported by target platform)
struct
{
@ -495,7 +485,6 @@ struct IMLInstruction
type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_LEAVE ||
type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_HLE ||
type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_MFTB ||
type == PPCREC_IML_TYPE_CJUMP ||
type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ||
type == PPCREC_IML_TYPE_JUMP ||
type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
@ -508,8 +497,6 @@ struct IMLInstruction
{
type = PPCREC_IML_TYPE_NO_OP;
operation = 0;
crRegister = PPC_REC_INVALID_REGISTER;
crMode = 0;
}
void make_debugbreak(uint32 currentPPCAddress = 0)
@ -530,7 +517,6 @@ struct IMLInstruction
{
this->type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK;
this->operation = 0;
this->crRegister = PPC_REC_INVALID_REGISTER;
}
@ -539,8 +525,6 @@ struct IMLInstruction
// operation with two register operands (e.g. "t0 = t1")
this->type = PPCREC_IML_TYPE_R_R;
this->operation = operation;
this->crRegister = crRegister;
this->crMode = crMode;
this->op_r_r.registerResult = registerResult;
this->op_r_r.registerA = registerA;
}
@ -550,8 +534,6 @@ struct IMLInstruction
{
this->type = PPCREC_IML_TYPE_R_S32;
this->operation = operation;
this->crRegister = crRegister;
this->crMode = crMode;
this->op_r_immS32.registerIndex = registerIndex;
this->op_r_immS32.immS32 = immS32;
}
@ -561,8 +543,6 @@ struct IMLInstruction
// operation with three register operands (e.g. "t0 = t1 + t4")
this->type = PPCREC_IML_TYPE_R_R_R;
this->operation = operation;
this->crRegister = crRegister;
this->crMode = crMode;
this->op_r_r_r.registerResult = registerResult;
this->op_r_r_r.registerA = registerA;
this->op_r_r_r.registerB = registerB;
@ -572,8 +552,6 @@ struct IMLInstruction
{
this->type = PPCREC_IML_TYPE_R_R_R_CARRY;
this->operation = operation;
this->crRegister = 0xFF;
this->crMode = 0xFF;
this->op_r_r_r_carry.regR = registerResult;
this->op_r_r_r_carry.regA = registerA;
this->op_r_r_r_carry.regB = registerB;
@ -585,8 +563,6 @@ struct IMLInstruction
// operation with two register operands and one signed immediate (e.g. "t0 = t1 + 1234")
this->type = PPCREC_IML_TYPE_R_R_S32;
this->operation = operation;
this->crRegister = crRegister;
this->crMode = crMode;
this->op_r_r_s32.registerResult = registerResult;
this->op_r_r_s32.registerA = registerA;
this->op_r_r_s32.immS32 = immS32;
@ -596,8 +572,6 @@ struct IMLInstruction
{
this->type = PPCREC_IML_TYPE_R_R_S32_CARRY;
this->operation = operation;
this->crRegister = 0xFF;
this->crMode = 0xFF;
this->op_r_r_s32_carry.regR = registerResult;
this->op_r_r_s32_carry.regA = registerA;
this->op_r_r_s32_carry.immS32 = immS32;
@ -608,8 +582,6 @@ struct IMLInstruction
{
this->type = PPCREC_IML_TYPE_COMPARE;
this->operation = -999;
this->crRegister = PPC_REC_INVALID_REGISTER;
this->crMode = 0;
this->op_compare.registerResult = registerResult;
this->op_compare.registerOperandA = registerA;
this->op_compare.registerOperandB = registerB;
@ -620,8 +592,6 @@ struct IMLInstruction
{
this->type = PPCREC_IML_TYPE_COMPARE_S32;
this->operation = -999;
this->crRegister = PPC_REC_INVALID_REGISTER;
this->crMode = 0;
this->op_compare_s32.registerResult = registerResult;
this->op_compare_s32.registerOperandA = registerA;
this->op_compare_s32.immS32 = immS32;
@ -632,8 +602,6 @@ struct IMLInstruction
{
this->type = PPCREC_IML_TYPE_CONDITIONAL_JUMP;
this->operation = -999;
this->crRegister = PPC_REC_INVALID_REGISTER;
this->crMode = 0;
this->op_conditionalJump2.registerBool = registerBool;
this->op_conditionalJump2.mustBeTrue = mustBeTrue;
}
@ -642,8 +610,6 @@ struct IMLInstruction
{
this->type = PPCREC_IML_TYPE_JUMP;
this->operation = -999;
this->crRegister = PPC_REC_INVALID_REGISTER;
this->crMode = 0;
}
// load from memory
@ -651,7 +617,6 @@ struct IMLInstruction
{
this->type = PPCREC_IML_TYPE_LOAD;
this->operation = 0;
this->crRegister = PPC_REC_INVALID_REGISTER;
this->op_storeLoad.registerData = registerDestination;
this->op_storeLoad.registerMem = registerMemory;
this->op_storeLoad.immS32 = immS32;
@ -665,7 +630,6 @@ struct IMLInstruction
{
this->type = PPCREC_IML_TYPE_STORE;
this->operation = 0;
this->crRegister = PPC_REC_INVALID_REGISTER;
this->op_storeLoad.registerData = registerSource;
this->op_storeLoad.registerMem = registerMemory;
this->op_storeLoad.immS32 = immS32;
@ -674,6 +638,26 @@ struct IMLInstruction
this->op_storeLoad.flags2.signExtend = false;
}
void make_atomic_cmp_store(IMLReg regEA, IMLReg regCompareValue, IMLReg regWriteValue, IMLReg regSuccessOutput)
{
this->type = PPCREC_IML_TYPE_ATOMIC_CMP_STORE;
this->operation = 0;
this->op_atomic_compare_store.regEA = regEA;
this->op_atomic_compare_store.regCompareValue = regCompareValue;
this->op_atomic_compare_store.regWriteValue = regWriteValue;
this->op_atomic_compare_store.regBoolOut = regSuccessOutput;
}
void make_fpr_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond)
{
this->type = PPCREC_IML_TYPE_FPR_COMPARE;
this->operation = -999;
this->op_fpr_compare.regR = regR;
this->op_fpr_compare.regA = regA;
this->op_fpr_compare.regB = regB;
this->op_fpr_compare.cond = cond;
}
void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const;
//void ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]);

View File

@ -511,6 +511,8 @@ uint32 _PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, I
*/
uint32 PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
{
__debugbreak(); // deprecated
if (imlSegment->nextSegmentIsUncertain)
{
return 0;
@ -535,81 +537,83 @@ uint32 PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, IM
void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext)
{
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
{
for(IMLInstruction& instIt : segIt->imlList)
{
if (instIt.type == PPCREC_IML_TYPE_CJUMP)
{
if (instIt.op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE)
{
uint32 crBitFlag = 1 << (instIt.op_conditionalJump.crRegisterIndex * 4 + instIt.op_conditionalJump.crBitIndex);
segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written
segIt->crBitsRead |= (crBitFlag);
}
}
else if (instIt.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32)
{
uint32 crBitFlag = 1 << (instIt.op_conditional_r_s32.crRegisterIndex * 4 + instIt.op_conditional_r_s32.crBitIndex);
segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written
segIt->crBitsRead |= (crBitFlag);
}
else if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_MFCR)
{
segIt->crBitsRead |= 0xFFFFFFFF;
}
else if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_MTCRF)
{
segIt->crBitsWritten |= ppc_MTCRFMaskToCRBitMask((uint32)instIt.op_r_immS32.immS32);
}
else if( instIt.type == PPCREC_IML_TYPE_CR )
{
if (instIt.operation == PPCREC_IML_OP_CR_CLEAR ||
instIt.operation == PPCREC_IML_OP_CR_SET)
{
uint32 crBitFlag = 1 << (instIt.op_cr.crD);
segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten);
}
else if (instIt.operation == PPCREC_IML_OP_CR_OR ||
instIt.operation == PPCREC_IML_OP_CR_ORC ||
instIt.operation == PPCREC_IML_OP_CR_AND ||
instIt.operation == PPCREC_IML_OP_CR_ANDC)
{
uint32 crBitFlag = 1 << (instIt.op_cr.crD);
segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten);
crBitFlag = 1 << (instIt.op_cr.crA);
segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead);
crBitFlag = 1 << (instIt.op_cr.crB);
segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead);
}
else
cemu_assert_unimplemented();
}
else if (IMLAnalyzer_CanTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7)
{
segIt->crBitsWritten |= (0xF<<(instIt.crRegister*4));
}
else if( (instIt.type == PPCREC_IML_TYPE_STORE || instIt.type == PPCREC_IML_TYPE_STORE_INDEXED) && instIt.op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER )
{
// overwrites CR0
segIt->crBitsWritten |= (0xF<<0);
}
}
}
// flag instructions that write to CR where we can ignore individual CR bits
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
{
for (IMLInstruction& instIt : segIt->imlList)
{
if (IMLAnalyzer_CanTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7)
{
uint32 crBitFlags = 0xF<<((uint32)instIt.crRegister*4);
uint32 crOverwriteMask = PPCRecompiler_getCROverwriteMask(ppcImlGenContext, segIt);
uint32 crIgnoreMask = crOverwriteMask & ~segIt->crBitsRead;
instIt.crIgnoreMask = crIgnoreMask;
}
}
}
__debugbreak(); // deprecated
//for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
//{
// for(IMLInstruction& instIt : segIt->imlList)
// {
// if (instIt.type == PPCREC_IML_TYPE_CJUMP)
// {
// if (instIt.op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE)
// {
// uint32 crBitFlag = 1 << (instIt.op_conditionalJump.crRegisterIndex * 4 + instIt.op_conditionalJump.crBitIndex);
// segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written
// segIt->crBitsRead |= (crBitFlag);
// }
// }
// else if (instIt.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32)
// {
// uint32 crBitFlag = 1 << (instIt.op_conditional_r_s32.crRegisterIndex * 4 + instIt.op_conditional_r_s32.crBitIndex);
// segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written
// segIt->crBitsRead |= (crBitFlag);
// }
// else if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_MFCR)
// {
// segIt->crBitsRead |= 0xFFFFFFFF;
// }
// else if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_MTCRF)
// {
// segIt->crBitsWritten |= ppc_MTCRFMaskToCRBitMask((uint32)instIt.op_r_immS32.immS32);
// }
// else if( instIt.type == PPCREC_IML_TYPE_CR )
// {
// if (instIt.operation == PPCREC_IML_OP_CR_CLEAR ||
// instIt.operation == PPCREC_IML_OP_CR_SET)
// {
// uint32 crBitFlag = 1 << (instIt.op_cr.crD);
// segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten);
// }
// else if (instIt.operation == PPCREC_IML_OP_CR_OR ||
// instIt.operation == PPCREC_IML_OP_CR_ORC ||
// instIt.operation == PPCREC_IML_OP_CR_AND ||
// instIt.operation == PPCREC_IML_OP_CR_ANDC)
// {
// uint32 crBitFlag = 1 << (instIt.op_cr.crD);
// segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten);
// crBitFlag = 1 << (instIt.op_cr.crA);
// segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead);
// crBitFlag = 1 << (instIt.op_cr.crB);
// segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead);
// }
// else
// cemu_assert_unimplemented();
// }
// else if (IMLAnalyzer_CanTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7)
// {
// segIt->crBitsWritten |= (0xF<<(instIt.crRegister*4));
// }
// else if( (instIt.type == PPCREC_IML_TYPE_STORE || instIt.type == PPCREC_IML_TYPE_STORE_INDEXED) && instIt.op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER )
// {
// // overwrites CR0
// segIt->crBitsWritten |= (0xF<<0);
// }
// }
//}
//// flag instructions that write to CR where we can ignore individual CR bits
//for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
//{
// for (IMLInstruction& instIt : segIt->imlList)
// {
// if (IMLAnalyzer_CanTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7)
// {
// uint32 crBitFlags = 0xF<<((uint32)instIt.crRegister*4);
// uint32 crOverwriteMask = PPCRecompiler_getCROverwriteMask(ppcImlGenContext, segIt);
// uint32 crIgnoreMask = crOverwriteMask & ~segIt->crBitsRead;
// instIt.crIgnoreMask = crIgnoreMask;
// }
// }
//}
}
//bool PPCRecompiler_checkIfGPRIsModifiedInRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, sint32 endIndex, sint32 vreg)

View File

@ -1,7 +1,7 @@
#pragma once
#include "IMLInstruction.h"
#define IML_RA_VIRT_REG_COUNT_MAX 40 // should match PPC_REC_MAX_VIRTUAL_GPR -> todo: Make this dynamic
#define IML_RA_VIRT_REG_COUNT_MAX (40 + 32) // should match PPC_REC_MAX_VIRTUAL_GPR -> todo: Make this dynamic
struct IMLSegmentPoint
{

View File

@ -186,6 +186,14 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
return nullptr;
}
//if (ppcRecFunc->ppcAddress == 0x30DF5F8)
//{
// debug_printf("----------------------------------------\n");
// IMLDebug_Dump(&ppcImlGenContext);
// __debugbreak();
//}
//if (ppcRecFunc->ppcAddress == 0x11223344)
//{
// //debug_printf("----------------------------------------\n");
@ -302,9 +310,8 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext)
IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext, raParam);
// remove redundant name load and store instructions
PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext);
PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext);
//PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext);
//PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext);
return true;
}

View File

@ -6,7 +6,7 @@
#define PPC_REC_ALIGN_TO_4MB(__v) (((__v)+4*1024*1024-1)&~(4*1024*1024-1))
#define PPC_REC_MAX_VIRTUAL_GPR (40) // enough to store 32 GPRs + a few SPRs + temp registers (usually only 1-2)
#define PPC_REC_MAX_VIRTUAL_GPR (40 + 32) // enough to store 32 GPRs + a few SPRs + temp registers (usually only 1-2)
struct ppcRecRange_t
{

View File

@ -23,13 +23,7 @@ uint32 PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext,
uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName);
// IML instruction generation
void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction);
void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet);
// IML instruction generation (new style, can generate new instructions but also overwrite existing ones)
void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister = PPC_REC_INVALID_REGISTER);
// IML generation - FPU

File diff suppressed because it is too large Load Diff

View File

@ -1,14 +1,16 @@
#include "Cafe/HW/Espresso/EspressoISA.h"
#include "../Interpreter/PPCInterpreterInternal.h"
#include "PPCRecompiler.h"
#include "PPCRecompilerIml.h"
#include "Cafe/GameProfile/GameProfile.h"
IMLReg _GetCRReg(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit);
void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 mode, bool switchEndian, uint8 registerGQR = PPC_REC_INVALID_REGISTER)
{
// load from memory
IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
imlInstruction->type = PPCREC_IML_TYPE_FPR_LOAD;
imlInstruction->crRegister = PPC_REC_INVALID_REGISTER;
imlInstruction->operation = 0;
imlInstruction->op_storeLoad.registerData = registerDestination;
imlInstruction->op_storeLoad.registerMem = registerMemory;
@ -23,7 +25,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory_indexed(ppcImlGenCo
// load from memory
IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
imlInstruction->type = PPCREC_IML_TYPE_FPR_LOAD_INDEXED;
imlInstruction->crRegister = PPC_REC_INVALID_REGISTER;
imlInstruction->operation = 0;
imlInstruction->op_storeLoad.registerData = registerDestination;
imlInstruction->op_storeLoad.registerMem = registerMemory1;
@ -39,7 +40,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext_t*
// store to memory
IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
imlInstruction->type = PPCREC_IML_TYPE_FPR_STORE;
imlInstruction->crRegister = PPC_REC_INVALID_REGISTER;
imlInstruction->operation = 0;
imlInstruction->op_storeLoad.registerData = registerSource;
imlInstruction->op_storeLoad.registerMem = registerMemory;
@ -54,7 +54,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenCo
// store to memory
IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
imlInstruction->type = PPCREC_IML_TYPE_FPR_STORE_INDEXED;
imlInstruction->crRegister = PPC_REC_INVALID_REGISTER;
imlInstruction->operation = 0;
imlInstruction->op_storeLoad.registerData = registerSource;
imlInstruction->op_storeLoad.registerMem = registerMemory1;
@ -73,7 +72,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext_t* ppcI
imlInstruction->operation = operation;
imlInstruction->op_fpr_r_r.registerResult = registerResult;
imlInstruction->op_fpr_r_r.registerOperand = registerOperand;
imlInstruction->crRegister = crRegister;
imlInstruction->op_fpr_r_r.flags = 0;
}
@ -86,7 +84,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext_t* pp
imlInstruction->op_fpr_r_r_r.registerResult = registerResult;
imlInstruction->op_fpr_r_r_r.registerOperandA = registerOperand1;
imlInstruction->op_fpr_r_r_r.registerOperandB = registerOperand2;
imlInstruction->crRegister = crRegister;
imlInstruction->op_fpr_r_r_r.flags = 0;
}
@ -100,7 +97,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext_t*
imlInstruction->op_fpr_r_r_r_r.registerOperandA = registerOperandA;
imlInstruction->op_fpr_r_r_r_r.registerOperandB = registerOperandB;
imlInstruction->op_fpr_r_r_r_r.registerOperandC = registerOperandC;
imlInstruction->crRegister = crRegister;
imlInstruction->op_fpr_r_r_r_r.flags = 0;
}
@ -112,7 +108,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcIml
imlInstruction->type = PPCREC_IML_TYPE_FPR_R;
imlInstruction->operation = operation;
imlInstruction->op_fpr_r.registerResult = registerResult;
imlInstruction->crRegister = crRegister;
}
/*
@ -916,12 +911,33 @@ bool PPCRecompilerImlGen_FNMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 op
bool PPCRecompilerImlGen_FCMPO(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
sint32 crfD, frA, frB;
PPC_OPC_TEMPL_X(opcode, crfD, frA, frB);
crfD >>= 2;
uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA);
uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB);
PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPO_BOTTOM, fprRegisterA, fprRegisterB, crfD);
printf("FCMPO: Not implemented\n");
return false;
//sint32 crfD, frA, frB;
//PPC_OPC_TEMPL_X(opcode, crfD, frA, frB);
//crfD >>= 2;
//IMLReg regFprA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA);
//IMLReg regFprB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB);
//IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT);
//IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT);
//IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ);
//IMLReg crBitRegSO = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO);
//ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegLT, IMLCondition::UNORDERED_LT);
//ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegGT, IMLCondition::UNORDERED_GT);
//ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegEQ, IMLCondition::UNORDERED_EQ);
//ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegSO, IMLCondition::UNORDERED_U);
// todo - set fpscr
//sint32 crfD, frA, frB;
//PPC_OPC_TEMPL_X(opcode, crfD, frA, frB);
//crfD >>= 2;
//uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA);
//uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB);
//PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPO_BOTTOM, fprRegisterA, fprRegisterB, crfD);
return true;
}
@ -930,9 +946,21 @@ bool PPCRecompilerImlGen_FCMPU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
sint32 crfD, frA, frB;
PPC_OPC_TEMPL_X(opcode, crfD, frA, frB);
crfD >>= 2;
uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA);
uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB);
PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPU_BOTTOM, fprRegisterA, fprRegisterB, crfD);
IMLReg regFprA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA);
IMLReg regFprB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB);
IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT);
IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT);
IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ);
IMLReg crBitRegSO = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO);
ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegLT, IMLCondition::UNORDERED_LT);
ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegGT, IMLCondition::UNORDERED_GT);
ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegEQ, IMLCondition::UNORDERED_EQ);
ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegSO, IMLCondition::UNORDERED_U);
// todo: set fpscr
return true;
}
@ -1837,6 +1865,9 @@ bool PPCRecompilerImlGen_PS_MERGE11(ppcImlGenContext_t* ppcImlGenContext, uint32
bool PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
printf("PS_CMPO0: Not implemented\n");
return false;
sint32 crfD, frA, frB;
uint32 c=0;
frB = (opcode>>11)&0x1F;
@ -1851,6 +1882,9 @@ bool PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext_t* ppcImlGenContext, uint32 o
bool PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
printf("PS_CMPU0: Not implemented\n");
return false;
sint32 crfD, frA, frB;
frB = (opcode >> 11) & 0x1F;
frA = (opcode >> 16) & 0x1F;
@ -1863,6 +1897,9 @@ bool PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext_t* ppcImlGenContext, uint32 o
bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
printf("PS_CMPU1: Not implemented\n");
return false;
sint32 crfD, frA, frB;
frB = (opcode >> 11) & 0x1F;
frA = (opcode >> 16) & 0x1F;

View File

@ -81,7 +81,7 @@ void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenCont
entrySegment->enterPPCAddress = imlSegment->enterPPCAddress;
// create jump instruction
PPCRecompiler_pushBackIMLInstructions(entrySegment, 0, 1);
PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext, entrySegment->imlList.data() + 0);
entrySegment->imlList.data()[0].make_jump_new();
IMLSegment_SetLinkBranchTaken(entrySegment, imlSegment);
// remove enterable flag from original segment
imlSegment->isEnterable = false;