PPCRec: Make LSWI/STWSI more generic + GPR temporaries storage

This commit is contained in:
Exzap 2022-12-13 05:41:26 +01:00
parent 9dc820795f
commit d308252177
7 changed files with 161 additions and 319 deletions

View File

@ -415,7 +415,6 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p
// todo: Optimize by using only MOVZX/MOVSX
if( indexed )
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
// todo: Use sign extend move from memory instead of separate sign-extend?
if( signExtend )
x64Gen_movSignExtend_reg64Low32_mem8Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32);
else
@ -440,28 +439,6 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p
// LWARX instruction costs extra cycles (this speeds up busy loops)
x64Gen_sub_mem32reg64_imm32(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 20);
}
else if( imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_LSWI_3 )
{
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
if( switchEndian == false )
assert_dbg();
if( indexed )
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); // can be replaced with LEA temp, [memReg1+memReg2] (this way we can avoid the SUB instruction after the move)
if(g_CPUFeatures.x86.movbe)
{
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32);
if( indexed && realRegisterMem != realRegisterData )
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
}
else
{
x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32);
if( indexed && realRegisterMem != realRegisterData )
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData);
}
x64Gen_and_reg64Low32_imm32(x64GenContext, realRegisterData, 0xFFFFFF00);
}
else
return false;
return true;
@ -599,36 +576,6 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction,
// end
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffsetJumpToEnd, x64GenContext->codeBufferIndex);
}
else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STSWI_2)
{
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 16); // store upper 2 bytes ..
x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); // .. as big-endian
if (indexed)
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
if (indexed)
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
}
else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STSWI_3)
{
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
if (indexed)
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8);
x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 2, REG_RESV_TEMP);
x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8);
x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 1, REG_RESV_TEMP);
x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8);
x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 0, REG_RESV_TEMP);
if (indexed)
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
}
else
return false;
return true;
@ -1943,40 +1890,16 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLSegment* imlSegment, IMLInstruction* imlInstruction)
{
if (!imlInstruction->op_conditionalJump.jumpAccordingToSegment)
{
debug_printf("PPCRecompilerX64Gen_imlInstruction_conditionalJump(): Failed on deprecated jump method\n");
return false;
}
if( imlInstruction->op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE )
{
// jump always
if (imlInstruction->op_conditionalJump.jumpAccordingToSegment)
{
// jump to segment
if (imlSegment->nextSegmentBranchTaken == nullptr)
assert_dbg();
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken);
x64Gen_jmp_imm32(x64GenContext, 0);
}
else
{
// deprecated (jump to jumpmark)
__debugbreak(); // deprecated
//PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress);
//x64Gen_jmp_imm32(x64GenContext, 0);
}
cemu_assert_debug(imlSegment->nextSegmentBranchTaken);
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken);
x64Gen_jmp_imm32(x64GenContext, 0);
}
else
{
if (!imlInstruction->op_conditionalJump.jumpAccordingToSegment)
{
debug_printf("Unsupported deprecated cjump to ppc address\n");
return false;
}
cemu_assert_debug(imlSegment->nextSegmentBranchTaken);
// generate jump update marker
if( imlInstruction->op_conditionalJump.crRegisterIndex == PPCREC_CR_TEMPORARY || imlInstruction->op_conditionalJump.crRegisterIndex >= 8 )
{
@ -2159,6 +2082,10 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction,
else
assert_dbg();
}
else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
{
x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY));
}
else
assert_dbg();
}
@ -2187,6 +2114,10 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction,
else
assert_dbg();
}
else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
{
x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), tempToRealRegister(imlInstruction->op_r_name.registerIndex));
}
else
assert_dbg();
}

View File

@ -163,24 +163,18 @@ enum
enum
{
PPCREC_NAME_NONE,
PPCREC_NAME_TEMPORARY,
PPCREC_NAME_R0 = 1000,
PPCREC_NAME_SPR0 = 2000,
PPCREC_NAME_FPR0 = 3000,
PPCREC_NAME_TEMPORARY_FPR0 = 4000, // 0 to 7
PPCREC_NAME_TEMPORARY = 1000,
PPCREC_NAME_R0 = 2000,
PPCREC_NAME_SPR0 = 3000,
PPCREC_NAME_FPR0 = 4000,
PPCREC_NAME_TEMPORARY_FPR0 = 5000, // 0 to 7
};
// special cases for LOAD/STORE
#define PPC_REC_LOAD_LWARX_MARKER (100) // lwarx instruction (similar to LWZX but sets reserved address/value)
#define PPC_REC_STORE_STWCX_MARKER (100) // stwcx instruction (similar to STWX but writes only if reservation from LWARX is valid)
#define PPC_REC_STORE_STSWI_1 (200) // stswi nb = 1
#define PPC_REC_STORE_STSWI_2 (201) // stswi nb = 2
#define PPC_REC_STORE_STSWI_3 (202) // stswi nb = 3
#define PPC_REC_STORE_LSWI_1 (200) // lswi nb = 1
#define PPC_REC_STORE_LSWI_2 (201) // lswi nb = 2
#define PPC_REC_STORE_LSWI_3 (202) // lswi nb = 3
#define PPC_REC_INVALID_REGISTER 0xFF
#define PPC_REC_INVALID_REGISTER 0xFF
#define PPCREC_CR_BIT_LT 0
#define PPCREC_CR_BIT_GT 1
@ -312,7 +306,6 @@ struct IMLInstruction
}op_macro;
struct
{
bool jumpAccordingToSegment; //IMLSegment* destinationSegment; // if set, this replaces jumpmarkAddress
uint8 condition; // only used when crRegisterIndex is 8 or above (update: Apparently only used to mark jumps without a condition? -> Cleanup)
uint8 crRegisterIndex;
uint8 crBitIndex;
@ -413,18 +406,82 @@ struct IMLInstruction
void make_macro(uint32 macroId, uint32 param, uint32 param2, uint16 paramU16)
{
type = PPCREC_IML_TYPE_MACRO;
operation = macroId;
op_macro.param = param;
op_macro.param2 = param2;
op_macro.paramU16 = paramU16;
this->type = PPCREC_IML_TYPE_MACRO;
this->operation = macroId;
this->op_macro.param = param;
this->op_macro.param2 = param2;
this->op_macro.paramU16 = paramU16;
}
void make_cjump_cycle_check()
{
type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK;
operation = 0;
crRegister = PPC_REC_INVALID_REGISTER;
this->type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK;
this->operation = 0;
this->crRegister = PPC_REC_INVALID_REGISTER;
}
void make_r_r(uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0)
{
// operation with two register operands (e.g. "t0 = t1")
this->type = PPCREC_IML_TYPE_R_R;
this->operation = operation;
this->crRegister = crRegister;
this->crMode = crMode;
this->op_r_r.registerResult = registerResult;
this->op_r_r.registerA = registerA;
}
void make_r_r_r(uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0)
{
// operation with three register operands (e.g. "t0 = t1 + t4")
this->type = PPCREC_IML_TYPE_R_R_R;
this->operation = operation;
this->crRegister = crRegister;
this->crMode = crMode;
this->op_r_r_r.registerResult = registerResult;
this->op_r_r_r.registerA = registerA;
this->op_r_r_r.registerB = registerB;
}
void make_r_r_s32(uint32 operation, uint8 registerResult, uint8 registerA, sint32 immS32, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0)
{
// operation with two register operands and one signed immediate (e.g. "t0 = t1 + 1234")
this->type = PPCREC_IML_TYPE_R_R_S32;
this->operation = operation;
this->crRegister = crRegister;
this->crMode = crMode;
this->op_r_r_s32.registerResult = registerResult;
this->op_r_r_s32.registerA = registerA;
this->op_r_r_s32.immS32 = immS32;
}
// load from memory
void make_r_memory(uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian)
{
this->type = PPCREC_IML_TYPE_LOAD;
this->operation = 0;
this->crRegister = PPC_REC_INVALID_REGISTER;
this->op_storeLoad.registerData = registerDestination;
this->op_storeLoad.registerMem = registerMemory;
this->op_storeLoad.immS32 = immS32;
this->op_storeLoad.copyWidth = copyWidth;
this->op_storeLoad.flags2.swapEndian = switchEndian;
this->op_storeLoad.flags2.signExtend = signExtend;
}
// store to memory
void make_memory_r(uint8 registerSource, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool switchEndian)
{
this->type = PPCREC_IML_TYPE_STORE;
this->operation = 0;
this->crRegister = PPC_REC_INVALID_REGISTER;
this->op_storeLoad.registerData = registerSource;
this->op_storeLoad.registerMem = registerMemory;
this->op_storeLoad.immS32 = immS32;
this->op_storeLoad.copyWidth = copyWidth;
this->op_storeLoad.flags2.swapEndian = switchEndian;
this->op_storeLoad.flags2.signExtend = false;
}
void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const;

View File

@ -1211,7 +1211,7 @@ void _reorderConditionModifyInstructions(IMLSegment* imlSegment)
#endif
IMLInstruction* newCRSetterInstruction = PPCRecompiler_insertInstruction(imlSegment, unsafeInstructionIndex+1);
memcpy(newCRSetterInstruction, imlSegment->imlList.data() + crSetterInstructionIndex, sizeof(IMLInstruction));
PPCRecompilerImlGen_generateNewInstruction_noOp(nullptr, imlSegment->imlList.data() + crSetterInstructionIndex);
imlSegment->imlList[crSetterInstructionIndex].make_no_op();
}
/*

View File

@ -1203,7 +1203,7 @@ void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, IMLS
{
if (routeDepth >= 64)
{
forceLogDebug_printf("Recompiler RA route maximum depth exceeded for function 0x%08x\n", ppcImlGenContext->functionRef->ppcAddress);
forceLogDebug_printf("Recompiler RA route maximum depth exceeded\n");
return;
}
route[routeDepth] = currentSegment;

View File

@ -32,7 +32,6 @@ struct IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(struct pp
struct ppcImlGenContext_t
{
class PPCFunctionBoundaryTracker* boundaryTracker;
PPCRecFunction_t* functionRef;
uint32* currentInstruction;
uint32 ppcAddressOfCurrentInstruction;
IMLSegment* currentOutputSegment;

View File

@ -26,7 +26,6 @@ uint32 PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext,
uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName);
// IML instruction generation
void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 jumpmarkAddress);
void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction);
void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 copyWidth, bool signExtend, bool bigEndian, uint8 crRegister, uint32 crMode);
@ -37,8 +36,6 @@ void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGe
// IML instruction generation (new style, can generate new instructions but also overwrite existing ones)
void PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction);
void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister = PPC_REC_INVALID_REGISTER);
// IML generation - FPU

View File

@ -50,24 +50,9 @@ uint32 PPCRecompiler_iterateCurrentInstruction(ppcImlGenContext_t* ppcImlGenCont
IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext)
{
//if( ppcImlGenContext->imlListCount+1 > ppcImlGenContext->imlListSize )
//{
// sint32 newSize = ppcImlGenContext->imlListCount*2 + 2;
// ppcImlGenContext->imlList = (IMLInstruction*)realloc(ppcImlGenContext->imlList, sizeof(IMLInstruction)*newSize);
// ppcImlGenContext->imlListSize = newSize;
//}
//IMLInstruction* imlInstruction = ppcImlGenContext->imlList+ppcImlGenContext->imlListCount;
//memset(imlInstruction, 0x00, sizeof(IMLInstruction));
//imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; // dont update any cr register by default
//imlInstruction->associatedPPCAddress = ppcImlGenContext->ppcAddressOfCurrentInstruction;
//ppcImlGenContext->imlListCount++;
//return imlInstruction;
IMLInstruction& inst = ppcImlGenContext->currentOutputSegment->imlList.emplace_back();
memset(&inst, 0x00, sizeof(IMLInstruction));
inst.crRegister = PPC_REC_INVALID_REGISTER; // dont update any cr register by default
//imlInstruction->associatedPPCAddress = ppcImlGenContext->ppcAddressOfCurrentInstruction;
return &inst;
}
@ -86,28 +71,12 @@ void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGe
void PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 crRegister=PPC_REC_INVALID_REGISTER, uint8 crMode=0)
{
// operation with three register operands (e.g. "t0 = t1 + t4")
IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
imlInstruction->type = PPCREC_IML_TYPE_R_R_R;
imlInstruction->operation = operation;
imlInstruction->crRegister = crRegister;
imlInstruction->crMode = crMode;
imlInstruction->op_r_r_r.registerResult = registerResult;
imlInstruction->op_r_r_r.registerA = registerA;
imlInstruction->op_r_r_r.registerB = registerB;
ppcImlGenContext->emitInst().make_r_r_r(operation, registerResult, registerA, registerB, crRegister, crMode);
}
void PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerResult, uint8 registerA, sint32 immS32, uint8 crRegister=PPC_REC_INVALID_REGISTER, uint8 crMode=0)
{
// operation with two register operands and one signed immediate (e.g. "t0 = t1 + 1234")
IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
imlInstruction->type = PPCREC_IML_TYPE_R_R_S32;
imlInstruction->operation = operation;
imlInstruction->crRegister = crRegister;
imlInstruction->crMode = crMode;
imlInstruction->op_r_r_s32.registerResult = registerResult;
imlInstruction->op_r_r_s32.registerA = registerA;
imlInstruction->op_r_r_s32.immS32 = immS32;
ppcImlGenContext->emitInst().make_r_r_s32(operation, registerResult, registerA, immS32, crRegister, crMode);
}
void PPCRecompilerImlGen_generateNewInstruction_name_r(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, uint32 name)
@ -153,25 +122,6 @@ void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenConte
}
void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 jumpmarkAddress)
{
__debugbreak();
//// jump
//if (imlInstruction == NULL)
// imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
//else
// memset(imlInstruction, 0, sizeof(IMLInstruction));
//imlInstruction->type = PPCREC_IML_TYPE_CJUMP;
//imlInstruction->crRegister = PPC_REC_INVALID_REGISTER;
//imlInstruction->op_conditionalJump.jumpmarkAddress = jumpmarkAddress;
//imlInstruction->op_conditionalJump.jumpAccordingToSegment = false;
//imlInstruction->op_conditionalJump.condition = PPCREC_JUMP_CONDITION_NONE;
//imlInstruction->op_conditionalJump.crRegisterIndex = 0;
//imlInstruction->op_conditionalJump.crBitIndex = 0;
//imlInstruction->op_conditionalJump.bitMustBeSet = false;
}
// jump based on segment branches
void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction)
{
@ -180,18 +130,22 @@ void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t*
imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
imlInstruction->type = PPCREC_IML_TYPE_CJUMP;
imlInstruction->crRegister = PPC_REC_INVALID_REGISTER;
imlInstruction->op_conditionalJump.jumpAccordingToSegment = true;
imlInstruction->op_conditionalJump.condition = PPCREC_JUMP_CONDITION_NONE;
imlInstruction->op_conditionalJump.crRegisterIndex = 0;
imlInstruction->op_conditionalJump.crBitIndex = 0;
imlInstruction->op_conditionalJump.bitMustBeSet = false;
}
void PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction)
void PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext_t* ppcImlGenContext, uint32 jumpCondition, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet)
{
if (imlInstruction == NULL)
imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
imlInstruction->make_no_op();
// conditional jump
IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
imlInstruction->type = PPCREC_IML_TYPE_CJUMP;
imlInstruction->crRegister = PPC_REC_INVALID_REGISTER;
imlInstruction->op_conditionalJump.condition = jumpCondition;
imlInstruction->op_conditionalJump.crRegisterIndex = crRegisterIndex;
imlInstruction->op_conditionalJump.crBitIndex = crBitIndex;
imlInstruction->op_conditionalJump.bitMustBeSet = bitMustBeSet;
}
void PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 crD, uint8 crA, uint8 crB)
@ -209,48 +163,9 @@ void PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext_t* ppcImlGen
imlInstruction->op_cr.crB = crB;
}
void PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext_t* ppcImlGenContext, uint32 jumpmarkAddress, uint32 jumpCondition, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet)
{
__debugbreak();
//// conditional jump
//IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
//imlInstruction->type = PPCREC_IML_TYPE_CJUMP;
//imlInstruction->crRegister = PPC_REC_INVALID_REGISTER;
//imlInstruction->op_conditionalJump.jumpAccordingToSegment = false;
//imlInstruction->op_conditionalJump.jumpmarkAddress = jumpmarkAddress;
//imlInstruction->op_conditionalJump.condition = jumpCondition;
//imlInstruction->op_conditionalJump.crRegisterIndex = crRegisterIndex;
//imlInstruction->op_conditionalJump.crBitIndex = crBitIndex;
//imlInstruction->op_conditionalJump.bitMustBeSet = bitMustBeSet;
}
void PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext_t* ppcImlGenContext, uint32 jumpCondition, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet)
{
// conditional jump
IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
imlInstruction->type = PPCREC_IML_TYPE_CJUMP;
imlInstruction->crRegister = PPC_REC_INVALID_REGISTER;
imlInstruction->op_conditionalJump.jumpAccordingToSegment = true;
imlInstruction->op_conditionalJump.condition = jumpCondition;
imlInstruction->op_conditionalJump.crRegisterIndex = crRegisterIndex;
imlInstruction->op_conditionalJump.crBitIndex = crBitIndex;
imlInstruction->op_conditionalJump.bitMustBeSet = bitMustBeSet;
}
void PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian)
{
// load from memory
IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
imlInstruction->type = PPCREC_IML_TYPE_LOAD;
imlInstruction->operation = 0;
imlInstruction->crRegister = PPC_REC_INVALID_REGISTER;
imlInstruction->op_storeLoad.registerData = registerDestination;
imlInstruction->op_storeLoad.registerMem = registerMemory;
imlInstruction->op_storeLoad.immS32 = immS32;
imlInstruction->op_storeLoad.copyWidth = copyWidth;
imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian;
imlInstruction->op_storeLoad.flags2.signExtend = signExtend;
ppcImlGenContext->emitInst().make_r_memory(registerDestination, registerMemory, immS32, copyWidth, signExtend, switchEndian);
}
void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory1, uint8 registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian)
@ -270,17 +185,7 @@ void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContex
void PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext_t* ppcImlGenContext, uint8 registerSource, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool switchEndian)
{
// load from memory
IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
imlInstruction->type = PPCREC_IML_TYPE_STORE;
imlInstruction->operation = 0;
imlInstruction->crRegister = PPC_REC_INVALID_REGISTER;
imlInstruction->op_storeLoad.registerData = registerSource;
imlInstruction->op_storeLoad.registerMem = registerMemory;
imlInstruction->op_storeLoad.immS32 = immS32;
imlInstruction->op_storeLoad.copyWidth = copyWidth;
imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian;
imlInstruction->op_storeLoad.flags2.signExtend = false;
ppcImlGenContext->emitInst().make_memory_r(registerSource, registerMemory, immS32, copyWidth, switchEndian);
}
void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory1, uint8 registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian)
@ -630,10 +535,11 @@ void PPCRecompiler_generateInlinedCode(ppcImlGenContext_t* ppcImlGenContext, uin
}
}
// add range
ppcRecRange_t recRange;
recRange.ppcAddress = startAddress;
recRange.ppcSize = instructionCount*4 + 4; // + 4 because we have to include the BLR
ppcImlGenContext->functionRef->list_ranges.push_back(recRange);
cemu_assert_unimplemented();
//ppcRecRange_t recRange;
//recRange.ppcAddress = startAddress;
//recRange.ppcSize = instructionCount*4 + 4; // + 4 because we have to include the BLR
//ppcImlGenContext->functionRef->list_ranges.push_back(recRange);
}
bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
@ -653,14 +559,9 @@ bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
}
// is jump destination within recompiled function?
if( ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest) )
{
// jump to target within same function
PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext, nullptr);
}
else
{
ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch);
}
return true;
}
@ -778,10 +679,6 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
// far jump
debug_printf("PPCRecompilerImlGen_BC(): Far jump not supported yet");
return false;
PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4, jumpCondition, crRegister, crBit, !conditionMustBeTrue);
ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch);
//ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction + 4);
}
}
}
@ -2190,70 +2087,40 @@ bool PPCRecompilerImlGen_LSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
PPC_OPC_TEMPL_X(opcode, rD, rA, nb);
if( nb == 0 )
nb = 32;
if( nb == 4 )
if (rA == 0)
{
// if nb == 4 this instruction immitates LWZ
if( rA == 0 )
{
#ifdef CEMU_DEBUG_ASSERT
assert_dbg(); // special form where gpr is ignored and only imm is used
#endif
return false;
}
// load memory gpr into register
uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
// check if destination register is already loaded
uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
if( destinationRegister == PPC_REC_INVALID_REGISTER )
destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register
// load half
PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, 32, false, true);
return true;
cemu_assert_unimplemented(); // special form where gpr is ignored and EA is 0
return false;
}
else if( nb == 2 )
// potential optimization: On x86 unaligned access is allowed and we could handle the case nb==4 with a single memory read, and nb==2 with a memory read and shift
uint32 memReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA);
uint32 tmpReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
uint32 memOffset = 0;
while (nb > 0)
{
// if nb == 2 this instruction immitates a LHZ but the result is shifted left by 16 bits
if( rA == 0 )
{
#ifdef CEMU_DEBUG_ASSERT
assert_dbg(); // special form where gpr is ignored and only imm is used
#endif
if (rD == rA)
return false;
}
// load memory gpr into register
uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
// check if destination register is already loaded
uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
if( destinationRegister == PPC_REC_INVALID_REGISTER )
destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register
// load half
PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, 16, false, true);
// shift
PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_LEFT_SHIFT, destinationRegister, destinationRegister, 16);
return true;
}
else if( nb == 3 )
{
// if nb == 3 this instruction loads a 3-byte big-endian and the result is shifted left by 8 bits
if( rA == 0 )
cemu_assert(rD < 32);
uint32 destinationRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD);
// load bytes one-by-one
for (sint32 b = 0; b < 4; b++)
{
#ifdef CEMU_DEBUG_ASSERT
assert_dbg(); // special form where gpr is ignored and only imm is used
#endif
return false;
ppcImlGenContext->emitInst().make_r_memory(tmpReg, memReg, memOffset + b, 8, false, false);
sint32 shiftAmount = (3 - b) * 8;
if(shiftAmount)
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, tmpReg, tmpReg, shiftAmount);
ppcImlGenContext->emitInst().make_r_r(b == 0 ? PPCREC_IML_OP_ASSIGN : PPCREC_IML_OP_OR, destinationRegister, tmpReg);
nb--;
if (nb == 0)
break;
}
// load memory gpr into register
uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
// check if destination register is already loaded
uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD);
if( destinationRegister == PPC_REC_INVALID_REGISTER )
destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register
// load half
PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, PPC_REC_STORE_LSWI_3, false, true);
return true;
memOffset += 4;
rD++;
}
debug_printf("PPCRecompilerImlGen_LSWI(): Unsupported nb value %d\n", nb);
return false;
return true;
}
bool PPCRecompilerImlGen_STSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
@ -2262,38 +2129,32 @@ bool PPCRecompilerImlGen_STSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
PPC_OPC_TEMPL_X(opcode, rS, rA, nb);
if( nb == 0 )
nb = 32;
if( nb == 4 )
uint32 memReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA);
uint32 tmpReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
uint32 memOffset = 0;
while (nb > 0)
{
// load memory gpr into register
uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
// load source register
uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); // can be the same as gprRegister
// store word
PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, 0, 32, true);
return true;
if (rS == rA)
return false;
cemu_assert(rS < 32);
uint32 dataRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS);
// store bytes one-by-one
for (sint32 b = 0; b < 4; b++)
{
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, tmpReg, dataRegister);
sint32 shiftAmount = (3 - b) * 8;
if (shiftAmount)
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT, tmpReg, tmpReg, shiftAmount);
ppcImlGenContext->emitInst().make_memory_r(tmpReg, memReg, memOffset + b, 8, false);
nb--;
if (nb == 0)
break;
}
memOffset += 4;
rS++;
}
else if( nb == 2 )
{
// load memory gpr into register
uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
// load source register
uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); // can be the same as gprRegister
// store half-word (shifted << 16)
PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, 0, PPC_REC_STORE_STSWI_2, false);
return true;
}
else if( nb == 3 )
{
// load memory gpr into register
uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false);
// load source register
uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); // can be the same as gprRegister
// store 3-byte-word (shifted << 8)
PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, 0, PPC_REC_STORE_STSWI_3, false);
return true;
}
debug_printf("PPCRecompilerImlGen_STSWI(): Unsupported nb value %d\n", nb);
return false;
return true;
}
bool PPCRecompilerImlGen_DCBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
@ -4293,9 +4154,7 @@ bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunction
bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* ppcRecFunc, std::set<uint32>& entryAddresses, PPCFunctionBoundaryTracker& boundaryTracker)
{
ppcImlGenContext.functionRef = ppcRecFunc; // todo - remove this and replace internally with boundary tracker
ppcImlGenContext.boundaryTracker = &boundaryTracker;
if (!PPCRecompiler_GenerateIML(ppcImlGenContext, boundaryTracker, entryAddresses))
return false;
@ -4343,8 +4202,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext
uint8 branchCond_crRegisterIndex = lastInstruction->op_conditionalJump.crRegisterIndex;
uint8 branchCond_crBitIndex = lastInstruction->op_conditionalJump.crBitIndex;
bool branchCond_bitMustBeSet = lastInstruction->op_conditionalJump.bitMustBeSet;
PPCRecompilerImlGen_generateNewInstruction_noOp(&ppcImlGenContext, lastInstruction);
lastInstruction->make_no_op();
// append conditional moves based on branch condition
for (sint32 f = 0; f < conditionalSegment->imlList.size(); f++)