PPCRec: FPRs now use the shared register allocator

This commit is contained in:
Exzap 2023-02-06 18:03:18 +01:00
parent b1c6646831
commit b4f2f91d87
15 changed files with 822 additions and 668 deletions

View File

@ -11,13 +11,26 @@
static x86Assembler64::GPR32 _reg32(IMLReg physReg)
{
cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I32);
return (x86Assembler64::GPR32)physReg.GetRegID();
IMLRegID regId = physReg.GetRegID();
cemu_assert_debug(regId < 16);
return (x86Assembler64::GPR32)regId;
}
static uint32 _reg64(IMLReg physReg)
{
cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I64);
return physReg.GetRegID();
IMLRegID regId = physReg.GetRegID();
cemu_assert_debug(regId < 16);
return regId;
}
uint32 _regF64(IMLReg physReg)
{
cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::F64);
IMLRegID regId = physReg.GetRegID();
cemu_assert_debug(regId >= IMLArchX86::PHYSREG_FPR_BASE && regId < IMLArchX86::PHYSREG_FPR_BASE+16);
regId -= IMLArchX86::PHYSREG_FPR_BASE;
return regId;
}
static x86Assembler64::GPR8_REX _reg8(IMLReg physReg)
@ -1233,111 +1246,192 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction
void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
uint32 name = imlInstruction->op_r_name.name;
auto regR = _reg64(imlInstruction->op_r_name.regR);
if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 )
if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::I64)
{
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0));
}
else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 )
{
sint32 sprIndex = (name - PPCREC_NAME_SPR0);
if (sprIndex == SPR_LR)
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR));
else if (sprIndex == SPR_CTR)
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR));
else if (sprIndex == SPR_XER)
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER));
else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7)
auto regR = _reg64(imlInstruction->op_r_name.regR);
if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32)
{
sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0);
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, memOffset);
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0));
}
else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999)
{
sint32 sprIndex = (name - PPCREC_NAME_SPR0);
if (sprIndex == SPR_LR)
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR));
else if (sprIndex == SPR_CTR)
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR));
else if (sprIndex == SPR_XER)
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER));
else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7)
{
sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0);
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, memOffset);
}
else
assert_dbg();
}
else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
{
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY));
}
else if (name == PPCREC_NAME_XER_CA)
{
x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca));
}
else if (name == PPCREC_NAME_XER_SO)
{
x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so));
}
else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST)
{
x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR));
}
else if (name == PPCREC_NAME_CPU_MEMRES_EA)
{
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr));
}
else if (name == PPCREC_NAME_CPU_MEMRES_VAL)
{
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue));
}
else
assert_dbg();
}
else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64)
{
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY));
}
else if (name == PPCREC_NAME_XER_CA)
{
x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca));
}
else if (name == PPCREC_NAME_XER_SO)
{
x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so));
}
else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST)
{
x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR));
}
else if (name == PPCREC_NAME_CPU_MEMRES_EA)
{
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr));
}
else if (name == PPCREC_NAME_CPU_MEMRES_VAL)
{
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue));
auto regR = _regF64(imlInstruction->op_r_name.regR);
if (name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0 + 32))
{
x64Gen_movupd_xmmReg_memReg128(x64GenContext, regR, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0));
}
else if (name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0 + 8))
{
x64Gen_movupd_xmmReg_memReg128(x64GenContext, regR, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0));
}
else
{
cemu_assert_debug(false);
}
}
else
assert_dbg();
DEBUG_BREAK;
}
void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
uint32 name = imlInstruction->op_r_name.name;
auto regR = _reg64(imlInstruction->op_r_name.regR);
if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 )
if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::I64)
{
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0), regR);
}
else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 )
{
uint32 sprIndex = (name - PPCREC_NAME_SPR0);
if (sprIndex == SPR_LR)
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR), regR);
else if (sprIndex == SPR_CTR)
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), regR);
else if (sprIndex == SPR_XER)
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), regR);
else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7)
auto regR = _reg64(imlInstruction->op_r_name.regR);
if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32)
{
sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0);
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, memOffset, regR);
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0), regR);
}
else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999)
{
uint32 sprIndex = (name - PPCREC_NAME_SPR0);
if (sprIndex == SPR_LR)
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR), regR);
else if (sprIndex == SPR_CTR)
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), regR);
else if (sprIndex == SPR_XER)
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), regR);
else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7)
{
sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0);
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, memOffset, regR);
}
else
assert_dbg();
}
else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
{
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), regR);
}
else if (name == PPCREC_NAME_XER_CA)
{
x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg64(regR));
}
else if (name == PPCREC_NAME_XER_SO)
{
x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg64(regR));
}
else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST)
{
x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg64(regR));
}
else if (name == PPCREC_NAME_CPU_MEMRES_EA)
{
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr), regR);
}
else if (name == PPCREC_NAME_CPU_MEMRES_VAL)
{
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue), regR);
}
else
assert_dbg();
assert_dbg();
}
else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64)
{
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), regR);
}
else if (name == PPCREC_NAME_XER_CA)
{
x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg64(regR));
}
else if (name == PPCREC_NAME_XER_SO)
{
x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg64(regR));
}
else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST)
{
x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg64(regR));
}
else if (name == PPCREC_NAME_CPU_MEMRES_EA)
{
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr), regR);
}
else if (name == PPCREC_NAME_CPU_MEMRES_VAL)
{
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue), regR);
auto regR = _regF64(imlInstruction->op_r_name.regR);
uint32 name = imlInstruction->op_r_name.name;
if (name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0 + 32))
{
x64Gen_movupd_memReg128_xmmReg(x64GenContext, regR, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0));
}
else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8))
{
x64Gen_movupd_memReg128_xmmReg(x64GenContext, regR, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0));
}
else
{
cemu_assert_debug(false);
}
}
else
assert_dbg();
DEBUG_BREAK;
}
//void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
//{
// uint32 name = imlInstruction->op_r_name.name;
// uint32 fprReg = _regF64(imlInstruction->op_r_name.regR);
// if (name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0 + 32))
// {
// x64Gen_movupd_xmmReg_memReg128(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0));
// }
// else if (name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0 + 8))
// {
// x64Gen_movupd_xmmReg_memReg128(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0));
// }
// else
// {
// cemu_assert_debug(false);
// }
//}
//
//void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
//{
// uint32 name = imlInstruction->op_r_name.name;
// uint32 fprReg = _regF64(imlInstruction->op_r_name.regR);
// if (name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0 + 32))
// {
// x64Gen_movupd_memReg128_xmmReg(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0));
// }
// else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8))
// {
// x64Gen_movupd_memReg128_xmmReg(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0));
// }
// else
// {
// cemu_assert_debug(false);
// }
//}
uint8* codeMemoryBlock = nullptr;
sint32 codeMemoryBlockIndex = 0;
sint32 codeMemoryBlockSize = 0;

View File

@ -5,11 +5,7 @@
#include "asm/x64util.h" // for recompiler_fres / frsqrte
uint32 _regF64(IMLReg r)
{
cemu_assert_debug(r.GetRegFormat() == IMLRegFormat::F64);
return (uint32)r.GetRegID();
}
uint32 _regF64(IMLReg physReg);
uint32 _regI32(IMLReg r)
{

View File

@ -15,8 +15,8 @@ bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction);
// optimizer passes
// todo - rename
bool PPCRecompiler_reduceNumberOfFPRRegisters(struct ppcImlGenContext_t* ppcImlGenContext);
bool PPCRecompiler_manageFPRRegisters(struct ppcImlGenContext_t* ppcImlGenContext);
//bool PPCRecompiler_reduceNumberOfFPRRegisters(struct ppcImlGenContext_t* ppcImlGenContext);
//bool PPCRecompiler_manageFPRRegisters(struct ppcImlGenContext_t* ppcImlGenContext);
void PPCRecompiler_optimizeDirectFloatCopies(struct ppcImlGenContext_t* ppcImlGenContext);
void PPCRecompiler_optimizeDirectIntegerCopies(struct ppcImlGenContext_t* ppcImlGenContext);
void PPCRecompiler_optimizePSQLoadAndStore(struct ppcImlGenContext_t* ppcImlGenContext);

View File

@ -41,22 +41,36 @@ const char* IMLDebug_GetOpcodeName(const IMLInstruction* iml)
return _tempOpcodename;
}
std::string IMLDebug_GetRegName(IMLReg r)
{
std::string regName;
uint32 regId = r.GetRegID();
switch (r.GetRegFormat())
{
case IMLRegFormat::F32:
regName.append("f");
break;
case IMLRegFormat::F64:
regName.append("fd");
break;
case IMLRegFormat::I32:
regName.append("i");
break;
case IMLRegFormat::I64:
regName.append("r");
break;
default:
__debugbreak();
}
regName.append(fmt::format("{}", regId));
return regName;
}
void IMLDebug_AppendRegisterParam(StringBuf& strOutput, IMLReg virtualRegister, bool isLast = false)
{
uint32 regId = virtualRegister.GetRegID();
DEBUG_BREAK; // todo (print type)
if (isLast)
{
if (regId < 10)
strOutput.addFmt("t{} ", regId);
else
strOutput.addFmt("t{}", regId);
return;
}
if (regId < 10)
strOutput.addFmt("t{} , ", regId);
else
strOutput.addFmt("t{}, ", regId);
strOutput.add(IMLDebug_GetRegName(virtualRegister));
if (!isLast)
strOutput.add(", ");
}
void IMLDebug_AppendS32Param(StringBuf& strOutput, sint32 val, bool isLast = false)
@ -149,12 +163,6 @@ std::string IMLDebug_GetConditionName(IMLCondition cond)
return "ukn";
}
std::string IMLDebug_GetRegName(IMLReg r)
{
cemu_assert_unimplemented();
return "";
}
void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo)
{
StringBuf strOutput(1024);
@ -197,19 +205,24 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
if (inst.type == PPCREC_IML_TYPE_R_NAME || inst.type == PPCREC_IML_TYPE_NAME_R)
{
if (inst.type == PPCREC_IML_TYPE_R_NAME)
strOutput.add("LD_NAME");
strOutput.add("R_NAME");
else
strOutput.add("ST_NAME");
strOutput.add("NAME_R");
while ((sint32)strOutput.getLen() < lineOffsetParameters)
strOutput.add(" ");
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR);
if(inst.type == PPCREC_IML_TYPE_R_NAME)
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR);
strOutput.addFmt("name_{} (", inst.op_r_name.regR.GetRegID());
strOutput.add("name_");
if (inst.op_r_name.name >= PPCREC_NAME_R0 && inst.op_r_name.name < (PPCREC_NAME_R0 + 999))
{
strOutput.addFmt("r{}", inst.op_r_name.name - PPCREC_NAME_R0);
}
else if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 999))
{
strOutput.addFmt("f{}", inst.op_r_name.name - PPCREC_NAME_FPR0);
}
else if (inst.op_r_name.name >= PPCREC_NAME_SPR0 && inst.op_r_name.name < (PPCREC_NAME_SPR0 + 999))
{
strOutput.addFmt("spr{}", inst.op_r_name.name - PPCREC_NAME_SPR0);
@ -227,8 +240,15 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_VAL)
strOutput.add("cpuReservation.value");
else
strOutput.add("ukn");
strOutput.add(")");
{
strOutput.addFmt("name_ukn{}", inst.op_r_name.name);
}
if (inst.type != PPCREC_IML_TYPE_R_NAME)
{
strOutput.add(", ");
IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR, true);
}
}
else if (inst.type == PPCREC_IML_TYPE_R_R)
{
@ -281,7 +301,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
}
else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
{
strOutput.add("CJUMP2 ");
strOutput.add("CJUMP ");
while ((sint32)strOutput.getLen() < lineOffsetParameters)
strOutput.add(" ");
IMLDebug_AppendRegisterParam(strOutput, inst.op_conditional_jump.registerBool, true);
@ -342,9 +362,9 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData);
if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED)
strOutput.addFmt("[t{}+t{}]", inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.registerMem2.GetRegID());
strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), IMLDebug_GetRegName(inst.op_storeLoad.registerMem2));
else
strOutput.addFmt("[t{}+{}]", inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.immS32);
strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32);
}
else if (inst.type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
{
@ -366,7 +386,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
{
if (inst.operation == PPCREC_IML_MACRO_B_TO_REG)
{
strOutput.addFmt("MACRO B_TO_REG t{}", inst.op_macro.param);
strOutput.addFmt("MACRO B_TO_REG {}", IMLDebug_GetRegName(inst.op_macro.paramReg));
}
else if (inst.operation == PPCREC_IML_MACRO_BL)
{

View File

@ -638,78 +638,64 @@ void IMLInstruction::RewriteGPR(const std::unordered_map<IMLRegID, IMLRegID>& tr
}
else if (type == PPCREC_IML_TYPE_FPR_R_NAME)
{
op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable);
}
else if (type == PPCREC_IML_TYPE_FPR_NAME_R)
{
op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable);
}
else if (type == PPCREC_IML_TYPE_FPR_LOAD)
{
if (op_storeLoad.registerMem.IsValid())
{
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
}
if (op_storeLoad.registerGQR.IsValid())
{
op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable);
}
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable);
}
else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED)
{
if (op_storeLoad.registerMem.IsValid())
{
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
}
if (op_storeLoad.registerMem2.IsValid())
{
op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
}
if (op_storeLoad.registerGQR.IsValid())
{
op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable);
}
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable);
}
else if (type == PPCREC_IML_TYPE_FPR_STORE)
{
if (op_storeLoad.registerMem.IsValid())
{
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
}
if (op_storeLoad.registerGQR.IsValid())
{
op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable);
}
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable);
}
else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
{
if (op_storeLoad.registerMem.IsValid())
{
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
}
if (op_storeLoad.registerMem2.IsValid())
{
op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
}
if (op_storeLoad.registerGQR.IsValid())
{
op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable);
}
}
else if (type == PPCREC_IML_TYPE_FPR_R_R)
{
}
else if (type == PPCREC_IML_TYPE_FPR_R_R_R)
{
}
else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R)
{
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable);
}
else if (type == PPCREC_IML_TYPE_FPR_R)
{
op_fpr_r.regR = replaceRegisterIdMultiple(op_fpr_r.regR, translationTable);
}
else if (type == PPCREC_IML_TYPE_FPR_R_R)
{
op_fpr_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r.regR, translationTable);
op_fpr_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r.regA, translationTable);
}
else if (type == PPCREC_IML_TYPE_FPR_R_R_R)
{
op_fpr_r_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r_r.regR, translationTable);
op_fpr_r_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r_r.regA, translationTable);
op_fpr_r_r_r.regB = replaceRegisterIdMultiple(op_fpr_r_r_r.regB, translationTable);
}
else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R)
{
op_fpr_r_r_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regR, translationTable);
op_fpr_r_r_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regA, translationTable);
op_fpr_r_r_r_r.regB = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regB, translationTable);
op_fpr_r_r_r_r.regC = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regC, translationTable);
}
else if (type == PPCREC_IML_TYPE_FPR_COMPARE)
{
op_fpr_compare.regA = replaceRegisterIdMultiple(op_fpr_compare.regA, translationTable);
op_fpr_compare.regB = replaceRegisterIdMultiple(op_fpr_compare.regB, translationTable);
op_fpr_compare.regR = replaceRegisterIdMultiple(op_fpr_compare.regR, translationTable);
}
else

View File

@ -388,6 +388,7 @@ struct IMLUsedRegisters
template<typename Fn>
void ForEachAccessedGPR(Fn F) const
{
// GPRs
if (readGPR1.IsValid())
F(readGPR1, false);
if (readGPR2.IsValid())
@ -398,22 +399,33 @@ struct IMLUsedRegisters
F(writtenGPR1, true);
if (writtenGPR2.IsValid())
F(writtenGPR2, true);
// FPRs
if (readFPR1.IsValid())
F(readFPR1, false);
if (readFPR2.IsValid())
F(readFPR2, false);
if (readFPR3.IsValid())
F(readFPR3, false);
if (readFPR4.IsValid())
F(readFPR4, false);
if (writtenFPR1.IsValid())
F(writtenFPR1, true);
}
bool HasSameBaseFPRRegId(IMLRegID regId) const
{
if (readFPR1.IsValid() && readFPR1.GetRegID() == regId)
return true;
if (readFPR2.IsValid() && readFPR2.GetRegID() == regId)
return true;
if (readFPR3.IsValid() && readFPR3.GetRegID() == regId)
return true;
if (readFPR4.IsValid() && readFPR4.GetRegID() == regId)
return true;
if (writtenFPR1.IsValid() && writtenFPR1.GetRegID() == regId)
return true;
return false;
}
//bool HasSameBaseFPRRegId(IMLRegID regId) const
//{
// if (readFPR1.IsValid() && readFPR1.GetRegID() == regId)
// return true;
// if (readFPR2.IsValid() && readFPR2.GetRegID() == regId)
// return true;
// if (readFPR3.IsValid() && readFPR3.GetRegID() == regId)
// return true;
// if (readFPR4.IsValid() && readFPR4.GetRegID() == regId)
// return true;
// if (writtenFPR1.IsValid() && writtenFPR1.GetRegID() == regId)
// return true;
// return false;
//}
};
struct IMLInstruction
@ -765,4 +777,11 @@ struct IMLInstruction
void ReplaceFPRs(IMLReg fprRegisterSearched[4], IMLReg fprRegisterReplaced[4]);
void ReplaceFPR(IMLRegID fprRegisterSearched, IMLRegID fprRegisterReplaced);
};
// architecture specific constants
namespace IMLArchX86
{
static constexpr int PHYSREG_GPR_BASE = 0;
static constexpr int PHYSREG_FPR_BASE = 16;
};

View File

@ -6,369 +6,369 @@
#include "../PPCRecompilerIml.h"
#include "../BackendX64/BackendX64.h"
bool _RegExceedsFPRSpace(IMLReg r)
{
if (r.IsInvalid())
return false;
if (r.GetRegID() >= PPC_X64_FPR_USABLE_REGISTERS)
return true;
return false;
}
//bool _RegExceedsFPRSpace(IMLReg r)
//{
// if (r.IsInvalid())
// return false;
// if (r.GetRegID() >= PPC_X64_FPR_USABLE_REGISTERS)
// return true;
// return false;
//}
IMLReg _FPRRegFromID(IMLRegID regId)
{
return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, regId);
}
bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext)
{
// only xmm0 to xmm14 may be used, xmm15 is reserved
// this method will reduce the number of fpr registers used
// inefficient algorithm for optimizing away excess registers
// we simply load, use and store excess registers into other unused registers when we need to
// first we remove all name load and store instructions that involve out-of-bounds registers
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
{
size_t imlIndex = 0;
while( imlIndex < segIt->imlList.size() )
{
IMLInstruction& imlInstructionItr = segIt->imlList[imlIndex];
if( imlInstructionItr.type == PPCREC_IML_TYPE_FPR_R_NAME || imlInstructionItr.type == PPCREC_IML_TYPE_FPR_NAME_R )
{
if(_RegExceedsFPRSpace(imlInstructionItr.op_r_name.regR))
{
imlInstructionItr.make_no_op();
}
}
imlIndex++;
}
}
// replace registers
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
{
size_t imlIndex = 0;
while( imlIndex < segIt->imlList.size() )
{
IMLUsedRegisters registersUsed;
while( true )
{
segIt->imlList[imlIndex].CheckRegisterUsage(&registersUsed);
if(_RegExceedsFPRSpace(registersUsed.readFPR1) || _RegExceedsFPRSpace(registersUsed.readFPR2) || _RegExceedsFPRSpace(registersUsed.readFPR3) || _RegExceedsFPRSpace(registersUsed.readFPR4) || _RegExceedsFPRSpace(registersUsed.writtenFPR1) )
{
// get index of register to replace
sint32 fprToReplace = -1;
if(_RegExceedsFPRSpace(registersUsed.readFPR1) )
fprToReplace = registersUsed.readFPR1.GetRegID();
else if(_RegExceedsFPRSpace(registersUsed.readFPR2) )
fprToReplace = registersUsed.readFPR2.GetRegID();
else if (_RegExceedsFPRSpace(registersUsed.readFPR3))
fprToReplace = registersUsed.readFPR3.GetRegID();
else if (_RegExceedsFPRSpace(registersUsed.readFPR4))
fprToReplace = registersUsed.readFPR4.GetRegID();
else if(_RegExceedsFPRSpace(registersUsed.writtenFPR1) )
fprToReplace = registersUsed.writtenFPR1.GetRegID();
if (fprToReplace >= 0)
{
// generate mask of useable registers
uint8 useableRegisterMask = 0x7F; // lowest bit is fpr register 0
if (registersUsed.readFPR1.IsValid())
useableRegisterMask &= ~(1 << (registersUsed.readFPR1.GetRegID()));
if (registersUsed.readFPR2.IsValid())
useableRegisterMask &= ~(1 << (registersUsed.readFPR2.GetRegID()));
if (registersUsed.readFPR3.IsValid())
useableRegisterMask &= ~(1 << (registersUsed.readFPR3.GetRegID()));
if (registersUsed.readFPR4.IsValid())
useableRegisterMask &= ~(1 << (registersUsed.readFPR4.GetRegID()));
if (registersUsed.writtenFPR1.IsValid())
useableRegisterMask &= ~(1 << (registersUsed.writtenFPR1.GetRegID()));
// get highest unused register index (0-6 range)
sint32 unusedRegisterIndex = -1;
for (sint32 f = 0; f < PPC_X64_FPR_USABLE_REGISTERS; f++)
{
if (useableRegisterMask & (1 << f))
{
unusedRegisterIndex = f;
}
}
if (unusedRegisterIndex == -1)
assert_dbg();
// determine if the placeholder register is actually used (if not we must not load/store it)
uint32 unusedRegisterName = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex];
bool replacedRegisterIsUsed = true;
if (unusedRegisterName >= PPCREC_NAME_FPR0 && unusedRegisterName < (PPCREC_NAME_FPR0 + 32))
{
replacedRegisterIsUsed = segIt->ppcFPRUsed[unusedRegisterName - PPCREC_NAME_FPR0];
}
// replace registers that are out of range
segIt->imlList[imlIndex].ReplaceFPR(fprToReplace, unusedRegisterIndex);
// add load/store name after instruction
PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex + 1, 2);
// add load/store before current instruction
PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex, 2);
// name_unusedRegister = unusedRegister
IMLInstruction* imlInstructionItr = segIt->imlList.data() + (imlIndex + 0);
memset(imlInstructionItr, 0x00, sizeof(IMLInstruction));
if (replacedRegisterIsUsed)
{
imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R;
imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN;
imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex);
imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex];
}
else
imlInstructionItr->make_no_op();
imlInstructionItr = segIt->imlList.data() + (imlIndex + 1);
memset(imlInstructionItr, 0x00, sizeof(IMLInstruction));
imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME;
imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN;
imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex);
imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace];
// name_gprToReplace = unusedRegister
imlInstructionItr = segIt->imlList.data() + (imlIndex + 3);
memset(imlInstructionItr, 0x00, sizeof(IMLInstruction));
imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R;
imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN;
imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex);
imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace];
// unusedRegister = name_unusedRegister
imlInstructionItr = segIt->imlList.data() + (imlIndex + 4);
memset(imlInstructionItr, 0x00, sizeof(IMLInstruction));
if (replacedRegisterIsUsed)
{
imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME;
imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN;
imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex);
imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex];
}
else
imlInstructionItr->make_no_op();
}
}
else
break;
}
imlIndex++;
}
}
return true;
}
typedef struct
{
bool isActive;
uint32 virtualReg;
sint32 lastUseIndex;
}ppcRecRegisterMapping_t;
typedef struct
{
ppcRecRegisterMapping_t currentMapping[PPC_X64_FPR_USABLE_REGISTERS];
sint32 ppcRegToMapping[64];
sint32 currentUseIndex;
}ppcRecManageRegisters_t;
ppcRecRegisterMapping_t* PPCRecompiler_findAvailableRegisterDepr(ppcRecManageRegisters_t* rCtx, IMLUsedRegisters* instructionUsedRegisters)
{
// find free register
for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++)
{
if (rCtx->currentMapping[i].isActive == false)
{
rCtx->currentMapping[i].isActive = true;
rCtx->currentMapping[i].virtualReg = -1;
rCtx->currentMapping[i].lastUseIndex = rCtx->currentUseIndex;
return rCtx->currentMapping + i;
}
}
// all registers are used
return nullptr;
}
ppcRecRegisterMapping_t* PPCRecompiler_findUnloadableRegister(ppcRecManageRegisters_t* rCtx, IMLUsedRegisters* instructionUsedRegisters, uint32 unloadLockedMask)
{
// find unloadable register (with lowest lastUseIndex)
sint32 unloadIndex = -1;
sint32 unloadIndexLastUse = 0x7FFFFFFF;
for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++)
{
if (rCtx->currentMapping[i].isActive == false)
continue;
if( (unloadLockedMask&(1<<i)) != 0 )
continue;
IMLRegID virtualReg = rCtx->currentMapping[i].virtualReg;
bool isReserved = instructionUsedRegisters->HasSameBaseFPRRegId(virtualReg);
if (isReserved)
continue;
if (rCtx->currentMapping[i].lastUseIndex < unloadIndexLastUse)
{
unloadIndexLastUse = rCtx->currentMapping[i].lastUseIndex;
unloadIndex = i;
}
}
cemu_assert(unloadIndex != -1);
return rCtx->currentMapping + unloadIndex;
}
bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenContext, sint32 segmentIndex)
{
ppcRecManageRegisters_t rCtx = { 0 };
for (sint32 i = 0; i < 64; i++)
rCtx.ppcRegToMapping[i] = -1;
IMLSegment* imlSegment = ppcImlGenContext->segmentList2[segmentIndex];
size_t idx = 0;
sint32 currentUseIndex = 0;
IMLUsedRegisters registersUsed;
while (idx < imlSegment->imlList.size())
{
IMLInstruction& idxInst = imlSegment->imlList[idx];
if (idxInst.IsSuffixInstruction())
break;
idxInst.CheckRegisterUsage(&registersUsed);
IMLReg fprMatch[4];
IMLReg fprReplace[4];
fprMatch[0] = IMLREG_INVALID;
fprMatch[1] = IMLREG_INVALID;
fprMatch[2] = IMLREG_INVALID;
fprMatch[3] = IMLREG_INVALID;
fprReplace[0] = IMLREG_INVALID;
fprReplace[1] = IMLREG_INVALID;
fprReplace[2] = IMLREG_INVALID;
fprReplace[3] = IMLREG_INVALID;
// generate a mask of registers that we may not free
sint32 numReplacedOperands = 0;
uint32 unloadLockedMask = 0;
for (sint32 f = 0; f < 5; f++)
{
IMLReg virtualFpr;
if (f == 0)
virtualFpr = registersUsed.readFPR1;
else if (f == 1)
virtualFpr = registersUsed.readFPR2;
else if (f == 2)
virtualFpr = registersUsed.readFPR3;
else if (f == 3)
virtualFpr = registersUsed.readFPR4;
else if (f == 4)
virtualFpr = registersUsed.writtenFPR1;
if(virtualFpr.IsInvalid())
continue;
cemu_assert_debug(virtualFpr.GetBaseFormat() == IMLRegFormat::F64);
cemu_assert_debug(virtualFpr.GetRegFormat() == IMLRegFormat::F64);
cemu_assert_debug(virtualFpr.GetRegID() < 64);
// check if this virtual FPR is already loaded in any real register
ppcRecRegisterMapping_t* regMapping;
if (rCtx.ppcRegToMapping[virtualFpr.GetRegID()] == -1)
{
// not loaded
// find available register
while (true)
{
regMapping = PPCRecompiler_findAvailableRegisterDepr(&rCtx, &registersUsed);
if (regMapping == NULL)
{
// unload least recently used register and try again
ppcRecRegisterMapping_t* unloadRegMapping = PPCRecompiler_findUnloadableRegister(&rCtx, &registersUsed, unloadLockedMask);
// mark as locked
unloadLockedMask |= (1<<(unloadRegMapping- rCtx.currentMapping));
// create unload instruction
PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, 1);
IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx;
memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction));
imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R;
imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN;
imlInstructionTemp->op_r_name.regR = _FPRRegFromID((uint8)(unloadRegMapping - rCtx.currentMapping));
imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unloadRegMapping->virtualReg];
idx++;
// update mapping
unloadRegMapping->isActive = false;
rCtx.ppcRegToMapping[unloadRegMapping->virtualReg] = -1;
}
else
break;
}
// create load instruction
PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, 1);
IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx;
memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction));
imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_R_NAME;
imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN;
imlInstructionTemp->op_r_name.regR = _FPRRegFromID((uint8)(regMapping-rCtx.currentMapping));
imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[virtualFpr.GetRegID()];
idx++;
// update mapping
regMapping->virtualReg = virtualFpr.GetRegID();
rCtx.ppcRegToMapping[virtualFpr.GetRegID()] = (sint32)(regMapping - rCtx.currentMapping);
regMapping->lastUseIndex = rCtx.currentUseIndex;
rCtx.currentUseIndex++;
}
else
{
regMapping = rCtx.currentMapping + rCtx.ppcRegToMapping[virtualFpr.GetRegID()];
regMapping->lastUseIndex = rCtx.currentUseIndex;
rCtx.currentUseIndex++;
}
// replace FPR
bool entryFound = false;
for (sint32 t = 0; t < numReplacedOperands; t++)
{
if (fprMatch[t].IsValid() && fprMatch[t].GetRegID() == virtualFpr.GetRegID())
{
cemu_assert_debug(fprReplace[t] == _FPRRegFromID(regMapping - rCtx.currentMapping));
entryFound = true;
break;
}
}
if (entryFound == false)
{
cemu_assert_debug(numReplacedOperands != 4);
fprMatch[numReplacedOperands] = virtualFpr;
fprReplace[numReplacedOperands] = _FPRRegFromID(regMapping - rCtx.currentMapping);
numReplacedOperands++;
}
}
if (numReplacedOperands > 0)
{
imlSegment->imlList[idx].ReplaceFPRs(fprMatch, fprReplace);
}
// next
idx++;
}
// count loaded registers
sint32 numLoadedRegisters = 0;
for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++)
{
if (rCtx.currentMapping[i].isActive)
numLoadedRegisters++;
}
// store all loaded registers
if (numLoadedRegisters > 0)
{
PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, numLoadedRegisters);
for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++)
{
if (rCtx.currentMapping[i].isActive == false)
continue;
IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx;
memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction));
imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R;
imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN;
imlInstructionTemp->op_r_name.regR = _FPRRegFromID(i);
imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[rCtx.currentMapping[i].virtualReg];
idx++;
}
}
return true;
}
bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext)
{
for (sint32 s = 0; s < ppcImlGenContext->segmentList2.size(); s++)
{
if (PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext, s) == false)
return false;
}
return true;
}
//bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext)
//{
// // only xmm0 to xmm14 may be used, xmm15 is reserved
// // this method will reduce the number of fpr registers used
// // inefficient algorithm for optimizing away excess registers
// // we simply load, use and store excess registers into other unused registers when we need to
// // first we remove all name load and store instructions that involve out-of-bounds registers
// for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
// {
// size_t imlIndex = 0;
// while( imlIndex < segIt->imlList.size() )
// {
// IMLInstruction& imlInstructionItr = segIt->imlList[imlIndex];
// if( imlInstructionItr.type == PPCREC_IML_TYPE_FPR_R_NAME || imlInstructionItr.type == PPCREC_IML_TYPE_FPR_NAME_R )
// {
// if(_RegExceedsFPRSpace(imlInstructionItr.op_r_name.regR))
// {
// imlInstructionItr.make_no_op();
// }
// }
// imlIndex++;
// }
// }
// // replace registers
// for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
// {
// size_t imlIndex = 0;
// while( imlIndex < segIt->imlList.size() )
// {
// IMLUsedRegisters registersUsed;
// while( true )
// {
// segIt->imlList[imlIndex].CheckRegisterUsage(&registersUsed);
// if(_RegExceedsFPRSpace(registersUsed.readFPR1) || _RegExceedsFPRSpace(registersUsed.readFPR2) || _RegExceedsFPRSpace(registersUsed.readFPR3) || _RegExceedsFPRSpace(registersUsed.readFPR4) || _RegExceedsFPRSpace(registersUsed.writtenFPR1) )
// {
// // get index of register to replace
// sint32 fprToReplace = -1;
// if(_RegExceedsFPRSpace(registersUsed.readFPR1) )
// fprToReplace = registersUsed.readFPR1.GetRegID();
// else if(_RegExceedsFPRSpace(registersUsed.readFPR2) )
// fprToReplace = registersUsed.readFPR2.GetRegID();
// else if (_RegExceedsFPRSpace(registersUsed.readFPR3))
// fprToReplace = registersUsed.readFPR3.GetRegID();
// else if (_RegExceedsFPRSpace(registersUsed.readFPR4))
// fprToReplace = registersUsed.readFPR4.GetRegID();
// else if(_RegExceedsFPRSpace(registersUsed.writtenFPR1) )
// fprToReplace = registersUsed.writtenFPR1.GetRegID();
// if (fprToReplace >= 0)
// {
// // generate mask of useable registers
// uint8 useableRegisterMask = 0x7F; // lowest bit is fpr register 0
// if (registersUsed.readFPR1.IsValid())
// useableRegisterMask &= ~(1 << (registersUsed.readFPR1.GetRegID()));
// if (registersUsed.readFPR2.IsValid())
// useableRegisterMask &= ~(1 << (registersUsed.readFPR2.GetRegID()));
// if (registersUsed.readFPR3.IsValid())
// useableRegisterMask &= ~(1 << (registersUsed.readFPR3.GetRegID()));
// if (registersUsed.readFPR4.IsValid())
// useableRegisterMask &= ~(1 << (registersUsed.readFPR4.GetRegID()));
// if (registersUsed.writtenFPR1.IsValid())
// useableRegisterMask &= ~(1 << (registersUsed.writtenFPR1.GetRegID()));
// // get highest unused register index (0-6 range)
// sint32 unusedRegisterIndex = -1;
// for (sint32 f = 0; f < PPC_X64_FPR_USABLE_REGISTERS; f++)
// {
// if (useableRegisterMask & (1 << f))
// {
// unusedRegisterIndex = f;
// }
// }
// if (unusedRegisterIndex == -1)
// assert_dbg();
// // determine if the placeholder register is actually used (if not we must not load/store it)
// uint32 unusedRegisterName = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex];
// bool replacedRegisterIsUsed = true;
// if (unusedRegisterName >= PPCREC_NAME_FPR0 && unusedRegisterName < (PPCREC_NAME_FPR0 + 32))
// {
// replacedRegisterIsUsed = segIt->ppcFPRUsed[unusedRegisterName - PPCREC_NAME_FPR0];
// }
// // replace registers that are out of range
// segIt->imlList[imlIndex].ReplaceFPR(fprToReplace, unusedRegisterIndex);
// // add load/store name after instruction
// PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex + 1, 2);
// // add load/store before current instruction
// PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex, 2);
// // name_unusedRegister = unusedRegister
// IMLInstruction* imlInstructionItr = segIt->imlList.data() + (imlIndex + 0);
// memset(imlInstructionItr, 0x00, sizeof(IMLInstruction));
// if (replacedRegisterIsUsed)
// {
// imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R;
// imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN;
// imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex);
// imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex];
// }
// else
// imlInstructionItr->make_no_op();
// imlInstructionItr = segIt->imlList.data() + (imlIndex + 1);
// memset(imlInstructionItr, 0x00, sizeof(IMLInstruction));
// imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME;
// imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN;
// imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex);
// imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace];
// // name_gprToReplace = unusedRegister
// imlInstructionItr = segIt->imlList.data() + (imlIndex + 3);
// memset(imlInstructionItr, 0x00, sizeof(IMLInstruction));
// imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R;
// imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN;
// imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex);
// imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace];
// // unusedRegister = name_unusedRegister
// imlInstructionItr = segIt->imlList.data() + (imlIndex + 4);
// memset(imlInstructionItr, 0x00, sizeof(IMLInstruction));
// if (replacedRegisterIsUsed)
// {
// imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME;
// imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN;
// imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex);
// imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex];
// }
// else
// imlInstructionItr->make_no_op();
// }
// }
// else
// break;
// }
// imlIndex++;
// }
// }
// return true;
//}
//
//typedef struct
//{
// bool isActive;
// uint32 virtualReg;
// sint32 lastUseIndex;
//}ppcRecRegisterMapping_t;
//
//typedef struct
//{
// ppcRecRegisterMapping_t currentMapping[PPC_X64_FPR_USABLE_REGISTERS];
// sint32 ppcRegToMapping[64];
// sint32 currentUseIndex;
//}ppcRecManageRegisters_t;
//
//ppcRecRegisterMapping_t* PPCRecompiler_findAvailableRegisterDepr(ppcRecManageRegisters_t* rCtx, IMLUsedRegisters* instructionUsedRegisters)
//{
// // find free register
// for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++)
// {
// if (rCtx->currentMapping[i].isActive == false)
// {
// rCtx->currentMapping[i].isActive = true;
// rCtx->currentMapping[i].virtualReg = -1;
// rCtx->currentMapping[i].lastUseIndex = rCtx->currentUseIndex;
// return rCtx->currentMapping + i;
// }
// }
// // all registers are used
// return nullptr;
//}
//
//ppcRecRegisterMapping_t* PPCRecompiler_findUnloadableRegister(ppcRecManageRegisters_t* rCtx, IMLUsedRegisters* instructionUsedRegisters, uint32 unloadLockedMask)
//{
// // find unloadable register (with lowest lastUseIndex)
// sint32 unloadIndex = -1;
// sint32 unloadIndexLastUse = 0x7FFFFFFF;
// for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++)
// {
// if (rCtx->currentMapping[i].isActive == false)
// continue;
// if( (unloadLockedMask&(1<<i)) != 0 )
// continue;
// IMLRegID virtualReg = rCtx->currentMapping[i].virtualReg;
// bool isReserved = instructionUsedRegisters->HasSameBaseFPRRegId(virtualReg);
// if (isReserved)
// continue;
// if (rCtx->currentMapping[i].lastUseIndex < unloadIndexLastUse)
// {
// unloadIndexLastUse = rCtx->currentMapping[i].lastUseIndex;
// unloadIndex = i;
// }
// }
// cemu_assert(unloadIndex != -1);
// return rCtx->currentMapping + unloadIndex;
//}
//
//bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenContext, sint32 segmentIndex)
//{
// ppcRecManageRegisters_t rCtx = { 0 };
// for (sint32 i = 0; i < 64; i++)
// rCtx.ppcRegToMapping[i] = -1;
// IMLSegment* imlSegment = ppcImlGenContext->segmentList2[segmentIndex];
// size_t idx = 0;
// sint32 currentUseIndex = 0;
// IMLUsedRegisters registersUsed;
// while (idx < imlSegment->imlList.size())
// {
// IMLInstruction& idxInst = imlSegment->imlList[idx];
// if (idxInst.IsSuffixInstruction())
// break;
// idxInst.CheckRegisterUsage(&registersUsed);
// IMLReg fprMatch[4];
// IMLReg fprReplace[4];
// fprMatch[0] = IMLREG_INVALID;
// fprMatch[1] = IMLREG_INVALID;
// fprMatch[2] = IMLREG_INVALID;
// fprMatch[3] = IMLREG_INVALID;
// fprReplace[0] = IMLREG_INVALID;
// fprReplace[1] = IMLREG_INVALID;
// fprReplace[2] = IMLREG_INVALID;
// fprReplace[3] = IMLREG_INVALID;
// // generate a mask of registers that we may not free
// sint32 numReplacedOperands = 0;
// uint32 unloadLockedMask = 0;
// for (sint32 f = 0; f < 5; f++)
// {
// IMLReg virtualFpr;
// if (f == 0)
// virtualFpr = registersUsed.readFPR1;
// else if (f == 1)
// virtualFpr = registersUsed.readFPR2;
// else if (f == 2)
// virtualFpr = registersUsed.readFPR3;
// else if (f == 3)
// virtualFpr = registersUsed.readFPR4;
// else if (f == 4)
// virtualFpr = registersUsed.writtenFPR1;
// if(virtualFpr.IsInvalid())
// continue;
// cemu_assert_debug(virtualFpr.GetBaseFormat() == IMLRegFormat::F64);
// cemu_assert_debug(virtualFpr.GetRegFormat() == IMLRegFormat::F64);
// cemu_assert_debug(virtualFpr.GetRegID() < 64);
// // check if this virtual FPR is already loaded in any real register
// ppcRecRegisterMapping_t* regMapping;
// if (rCtx.ppcRegToMapping[virtualFpr.GetRegID()] == -1)
// {
// // not loaded
// // find available register
// while (true)
// {
// regMapping = PPCRecompiler_findAvailableRegisterDepr(&rCtx, &registersUsed);
// if (regMapping == NULL)
// {
// // unload least recently used register and try again
// ppcRecRegisterMapping_t* unloadRegMapping = PPCRecompiler_findUnloadableRegister(&rCtx, &registersUsed, unloadLockedMask);
// // mark as locked
// unloadLockedMask |= (1<<(unloadRegMapping- rCtx.currentMapping));
// // create unload instruction
// PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, 1);
// IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx;
// memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction));
// imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R;
// imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN;
// imlInstructionTemp->op_r_name.regR = _FPRRegFromID((uint8)(unloadRegMapping - rCtx.currentMapping));
// imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unloadRegMapping->virtualReg];
// idx++;
// // update mapping
// unloadRegMapping->isActive = false;
// rCtx.ppcRegToMapping[unloadRegMapping->virtualReg] = -1;
// }
// else
// break;
// }
// // create load instruction
// PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, 1);
// IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx;
// memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction));
// imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_R_NAME;
// imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN;
// imlInstructionTemp->op_r_name.regR = _FPRRegFromID((uint8)(regMapping-rCtx.currentMapping));
// imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[virtualFpr.GetRegID()];
// idx++;
// // update mapping
// regMapping->virtualReg = virtualFpr.GetRegID();
// rCtx.ppcRegToMapping[virtualFpr.GetRegID()] = (sint32)(regMapping - rCtx.currentMapping);
// regMapping->lastUseIndex = rCtx.currentUseIndex;
// rCtx.currentUseIndex++;
// }
// else
// {
// regMapping = rCtx.currentMapping + rCtx.ppcRegToMapping[virtualFpr.GetRegID()];
// regMapping->lastUseIndex = rCtx.currentUseIndex;
// rCtx.currentUseIndex++;
// }
// // replace FPR
// bool entryFound = false;
// for (sint32 t = 0; t < numReplacedOperands; t++)
// {
// if (fprMatch[t].IsValid() && fprMatch[t].GetRegID() == virtualFpr.GetRegID())
// {
// cemu_assert_debug(fprReplace[t] == _FPRRegFromID(regMapping - rCtx.currentMapping));
// entryFound = true;
// break;
// }
// }
// if (entryFound == false)
// {
// cemu_assert_debug(numReplacedOperands != 4);
// fprMatch[numReplacedOperands] = virtualFpr;
// fprReplace[numReplacedOperands] = _FPRRegFromID(regMapping - rCtx.currentMapping);
// numReplacedOperands++;
// }
// }
// if (numReplacedOperands > 0)
// {
// imlSegment->imlList[idx].ReplaceFPRs(fprMatch, fprReplace);
// }
// // next
// idx++;
// }
// // count loaded registers
// sint32 numLoadedRegisters = 0;
// for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++)
// {
// if (rCtx.currentMapping[i].isActive)
// numLoadedRegisters++;
// }
// // store all loaded registers
// if (numLoadedRegisters > 0)
// {
// PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, numLoadedRegisters);
// for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++)
// {
// if (rCtx.currentMapping[i].isActive == false)
// continue;
// IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx;
// memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction));
// imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R;
// imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN;
// imlInstructionTemp->op_r_name.regR = _FPRRegFromID(i);
// imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[rCtx.currentMapping[i].virtualReg];
// idx++;
// }
// }
// return true;
//}
//
//bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext)
//{
// for (sint32 s = 0; s < ppcImlGenContext->segmentList2.size(); s++)
// {
// if (PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext, s) == false)
// return false;
// }
// return true;
//}
/*
@ -663,11 +663,13 @@ void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenCont
}
}
IMLName PPCRecompilerImlGen_GetRegName(ppcImlGenContext_t* ppcImlGenContext, IMLReg reg);
sint32 _getGQRIndexFromRegister(ppcImlGenContext_t* ppcImlGenContext, IMLReg gqrReg)
{
if (gqrReg.IsInvalid())
return -1;
sint32 namedReg = ppcImlGenContext->mappedRegister[gqrReg.GetRegID()];
sint32 namedReg = PPCRecompilerImlGen_GetRegName(ppcImlGenContext, gqrReg);
if (namedReg >= (PPCREC_NAME_SPR0 + SPR_UGQR0) && namedReg <= (PPCREC_NAME_SPR0 + SPR_UGQR7))
{
return namedReg - (PPCREC_NAME_SPR0 + SPR_UGQR0);

View File

@ -1100,7 +1100,7 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML
if(it.second.isProcessed)
continue;
IMLRegID regId = it.first;
raLivenessRange_t* range = PPCRecRA_createRangeBase(ctx.deprGenContext, regId, ctx.deprGenContext->mappedRegister[regId]);
raLivenessRange_t* range = PPCRecRA_createRangeBase(ctx.deprGenContext, regId, ctx.raParam->regIdToName.find(regId)->second);
PPCRecRA_convertToMappedRanges(ctx, imlSegment, regId, range);
}
// fill created ranges with read/write location indices

View File

@ -93,7 +93,8 @@ struct IMLRegisterAllocatorParameters
return perTypePhysPool[stdx::to_underlying(regFormat)];
}
IMLPhysRegisterSet perTypePhysPool[stdx::to_underlying(IMLRegFormat::TYPE_COUNT)];// physicalRegisterPool;
IMLPhysRegisterSet perTypePhysPool[stdx::to_underlying(IMLRegFormat::TYPE_COUNT)];
std::unordered_map<IMLRegID, IMLName> regIdToName;
};
void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam);

View File

@ -91,7 +91,7 @@ struct IMLSegment
bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary)
uint32 enterPPCAddress{}; // used if isEnterable is true
// PPC FPR use mask
bool ppcFPRUsed[32]{}; // same as ppcGPRUsed, but for FPR
//bool ppcFPRUsed[32]{}; // same as ppcGPRUsed, but for FPR
// CR use mask
uint32 crBitsInput{}; // bits that are expected to be set from the previous segment (read in this segment but not overwritten)
uint32 crBitsRead{}; // all bits that are read in this segment

View File

@ -206,8 +206,19 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
// return nullptr;
//}
// Large functions for testing (botw):
// 3B4049C
//if (ppcRecFunc->ppcAddress == 0x03C26844)
//{
// __debugbreak();
// IMLDebug_Dump(&ppcImlGenContext);
// __debugbreak();
//}
// 31A8778
// Functions for testing (botw):
// 3B4049C (large with switch case)
// 30BF118 (has a bndz copy loop + some float instructions at the end)
// emit x64 code
bool x64GenerationSuccess = PPCRecompiler_generateX64Code(ppcRecFunc, &ppcImlGenContext);
@ -217,8 +228,6 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
}
// collect list of PPC-->x64 entry points
cemuLog_log(LogType::Force, "[Recompiler] Successfully compiled {:08x} - {:08x} Segments: {}", ppcRecFunc->ppcAddress, ppcRecFunc->ppcAddress + ppcRecFunc->ppcSize, ppcImlGenContext.segmentList2.size());
entryPointsOut.clear();
for(IMLSegment* imlSegment : ppcImlGenContext.segmentList2)
{
@ -230,6 +239,9 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
entryPointsOut.emplace_back(ppcEnterOffset, x64Offset);
}
cemuLog_log(LogType::Force, "[Recompiler] Successfully compiled {:08x} - {:08x} Segments: {} Entrypoints: {}", ppcRecFunc->ppcAddress, ppcRecFunc->ppcAddress + ppcRecFunc->ppcSize, ppcImlGenContext.segmentList2.size(), entryPointsOut.size());
return ppcRecFunc;
}
@ -242,72 +254,85 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext)
// if GQRs can be predicted, optimize PSQ load/stores
PPCRecompiler_optimizePSQLoadAndStore(&ppcImlGenContext);
// count number of used registers
uint32 numLoadedFPRRegisters = 0;
for (uint32 i = 0; i < 255; i++)
{
if (ppcImlGenContext.mappedFPRRegister[i])
numLoadedFPRRegisters++;
}
// insert name store instructions at the end of each segment but before branch instructions
for (IMLSegment* segIt : ppcImlGenContext.segmentList2)
{
if (segIt->imlList.size() == 0)
continue; // ignore empty segments
// analyze segment for register usage
IMLUsedRegisters registersUsed;
for (sint32 i = 0; i < segIt->imlList.size(); i++)
{
segIt->imlList[i].CheckRegisterUsage(&registersUsed);
IMLReg accessedTempReg[5];
// intermediate FPRs
accessedTempReg[0] = registersUsed.readFPR1;
accessedTempReg[1] = registersUsed.readFPR2;
accessedTempReg[2] = registersUsed.readFPR3;
accessedTempReg[3] = registersUsed.readFPR4;
accessedTempReg[4] = registersUsed.writtenFPR1;
for (sint32 f = 0; f < 5; f++)
{
if (accessedTempReg[f].IsInvalid())
continue;
uint32 regName = ppcImlGenContext.mappedFPRRegister[accessedTempReg[f].GetRegID()];
if (regName >= PPCREC_NAME_FPR0 && regName < PPCREC_NAME_FPR0 + 32)
{
segIt->ppcFPRUsed[regName - PPCREC_NAME_FPR0] = true;
}
}
}
}
//for (IMLSegment* segIt : ppcImlGenContext.segmentList2)
//{
// if (segIt->imlList.size() == 0)
// continue; // ignore empty segments
// // analyze segment for register usage
// IMLUsedRegisters registersUsed;
// for (sint32 i = 0; i < segIt->imlList.size(); i++)
// {
// segIt->imlList[i].CheckRegisterUsage(&registersUsed);
// IMLReg accessedTempReg[5];
// // intermediate FPRs
// accessedTempReg[0] = registersUsed.readFPR1;
// accessedTempReg[1] = registersUsed.readFPR2;
// accessedTempReg[2] = registersUsed.readFPR3;
// accessedTempReg[3] = registersUsed.readFPR4;
// accessedTempReg[4] = registersUsed.writtenFPR1;
// for (sint32 f = 0; f < 5; f++)
// {
// if (accessedTempReg[f].IsInvalid())
// continue;
// uint32 regName = ppcImlGenContext.mappedFPRRegister[accessedTempReg[f].GetRegID()];
// if (regName >= PPCREC_NAME_FPR0 && regName < PPCREC_NAME_FPR0 + 32)
// {
// segIt->ppcFPRUsed[regName - PPCREC_NAME_FPR0] = true;
// }
// }
// }
//}
// merge certain float load+store patterns (must happen before FPR register remapping)
PPCRecompiler_optimizeDirectFloatCopies(&ppcImlGenContext);
// delay byte swapping for certain load+store patterns
PPCRecompiler_optimizeDirectIntegerCopies(&ppcImlGenContext);
if (numLoadedFPRRegisters > 0)
{
if (PPCRecompiler_manageFPRRegisters(&ppcImlGenContext) == false)
{
return false;
}
}
//if (numLoadedFPRRegisters > 0)
//{
// if (PPCRecompiler_manageFPRRegisters(&ppcImlGenContext) == false)
// {
// return false;
// }
//}
IMLRegisterAllocatorParameters raParam;
for (auto& it : ppcImlGenContext.mappedRegs)
raParam.regIdToName.try_emplace(it.second.GetRegID(), it.first);
auto& gprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::I64);
gprPhysPool.SetAvailable(X86_REG_RAX);
gprPhysPool.SetAvailable(X86_REG_RDX);
gprPhysPool.SetAvailable(X86_REG_RBX);
gprPhysPool.SetAvailable(X86_REG_RBP);
gprPhysPool.SetAvailable(X86_REG_RSI);
gprPhysPool.SetAvailable(X86_REG_RDI);
gprPhysPool.SetAvailable(X86_REG_R8);
gprPhysPool.SetAvailable(X86_REG_R9);
gprPhysPool.SetAvailable(X86_REG_R10);
gprPhysPool.SetAvailable(X86_REG_R11);
gprPhysPool.SetAvailable(X86_REG_R12);
gprPhysPool.SetAvailable(X86_REG_RCX);
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RAX);
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX);
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RBX);
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RBP);
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RSI);
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDI);
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R8);
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R9);
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R10);
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R11);
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R12);
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RCX);
// add XMM registers, except XMM15 which is the temporary register
auto& fprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::F64);
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 0);
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 1);
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 2);
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 3);
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 4);
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 5);
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 6);
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 7);
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 8);
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 9);
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 10);
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 11);
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 12);
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 13);
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 14);
IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext, raParam);

View File

@ -42,9 +42,12 @@ struct ppcImlGenContext_t
// cycle counter
uint32 cyclesSinceLastBranch; // used to track ppc cycles
// temporary general purpose registers
uint32 mappedRegister[PPC_REC_MAX_VIRTUAL_GPR];
//uint32 mappedRegister[PPC_REC_MAX_VIRTUAL_GPR];
// temporary floating point registers (single and double precision)
uint32 mappedFPRRegister[256];
//uint32 mappedFPRRegister[256];
std::unordered_map<IMLName, IMLReg> mappedRegs;
// list of segments
std::vector<IMLSegment*> segmentList2;
// code generation control

View File

@ -102,4 +102,3 @@ bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 o
// IML general
void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext);

View File

@ -134,74 +134,73 @@ void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, P
basicBlockInfo.appendSegment = segMerge;
}
uint32 PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName)
IMLReg PPCRecompilerImlGen_LookupReg(ppcImlGenContext_t* ppcImlGenContext, IMLName mappedName, IMLRegFormat regFormat)
{
if( mappedName == PPCREC_NAME_NONE )
auto it = ppcImlGenContext->mappedRegs.find(mappedName);
if (it != ppcImlGenContext->mappedRegs.end())
return it->second;
// create new reg entry
IMLRegFormat baseFormat;
if (regFormat == IMLRegFormat::F64)
baseFormat = IMLRegFormat::F64;
else if (regFormat == IMLRegFormat::I32)
baseFormat = IMLRegFormat::I64;
else
{
debug_printf("PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(): Invalid mappedName parameter\n");
return PPC_REC_INVALID_REGISTER;
cemu_assert_suspicious();
}
for(uint32 i=0; i<(PPC_REC_MAX_VIRTUAL_GPR-1); i++)
{
if( ppcImlGenContext->mappedRegister[i] == PPCREC_NAME_NONE )
{
ppcImlGenContext->mappedRegister[i] = mappedName;
return i;
}
}
return 0;
IMLRegID newRegId = ppcImlGenContext->mappedRegs.size();
IMLReg newReg(baseFormat, regFormat, 0, newRegId);
ppcImlGenContext->mappedRegs.try_emplace(mappedName, newReg);
return newReg;
}
uint32 PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName)
IMLName PPCRecompilerImlGen_GetRegName(ppcImlGenContext_t* ppcImlGenContext, IMLReg reg)
{
for(uint32 i=0; i< PPC_REC_MAX_VIRTUAL_GPR; i++)
for (auto& it : ppcImlGenContext->mappedRegs)
{
if( ppcImlGenContext->mappedRegister[i] == mappedName )
{
return i;
}
if (it.second.GetRegID() == reg.GetRegID())
return it.first;
}
return PPC_REC_INVALID_REGISTER;
cemu_assert(false);
return 0;
}
uint32 PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName)
{
if( mappedName == PPCREC_NAME_NONE )
{
debug_printf("PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(): Invalid mappedName parameter\n");
return PPC_REC_INVALID_REGISTER;
}
for(uint32 i=0; i<255; i++)
{
if( ppcImlGenContext->mappedFPRRegister[i] == PPCREC_NAME_NONE )
{
ppcImlGenContext->mappedFPRRegister[i] = mappedName;
return i;
}
}
__debugbreak();
//if( mappedName == PPCREC_NAME_NONE )
//{
// debug_printf("PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(): Invalid mappedName parameter\n");
// return PPC_REC_INVALID_REGISTER;
//}
//for(uint32 i=0; i<255; i++)
//{
// if( ppcImlGenContext->mappedFPRRegister[i] == PPCREC_NAME_NONE )
// {
// ppcImlGenContext->mappedFPRRegister[i] = mappedName;
// return i;
// }
//}
return 0;
}
uint32 PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName)
{
for(uint32 i=0; i<255; i++)
{
if( ppcImlGenContext->mappedFPRRegister[i] == mappedName )
{
return i;
}
}
__debugbreak();
//for(uint32 i=0; i<255; i++)
//{
// if( ppcImlGenContext->mappedFPRRegister[i] == mappedName )
// {
// return i;
// }
//}
return PPC_REC_INVALID_REGISTER;
}
IMLReg PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName)
{
uint32 loadedRegisterIndex = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, mappedName);
if (loadedRegisterIndex != PPC_REC_INVALID_REGISTER)
return IMLReg(IMLRegFormat::I64, IMLRegFormat::I32, 0, loadedRegisterIndex);
uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, mappedName);
return IMLReg(IMLRegFormat::I64, IMLRegFormat::I32, 0, registerIndex);
return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, mappedName, IMLRegFormat::I32);
}
IMLReg _GetRegGPR(ppcImlGenContext_t* ppcImlGenContext, uint32 index)
@ -243,14 +242,15 @@ IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index)
*/
IMLReg PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew)
{
if( loadNew == false )
{
uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName);
if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER )
return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, loadedRegisterIndex);
}
uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName);
return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, registerIndex);
//if( loadNew == false )
//{
// uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName);
// if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER )
// return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, loadedRegisterIndex);
//}
//uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName);
//return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, registerIndex);
return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, mappedName, IMLRegFormat::F64);
}
/*
@ -259,11 +259,12 @@ IMLReg PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext,
*/
IMLReg PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName)
{
uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName);
if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER )
return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, loadedRegisterIndex);
uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName);
return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, registerIndex);
//uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName);
//if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER )
// return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, loadedRegisterIndex);
//uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName);
//return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, registerIndex);
return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, mappedName, IMLRegFormat::F64);
}
bool PPCRecompiler_canInlineFunction(MPTR functionPtr, sint32* functionInstructionCount)

View File

@ -44,7 +44,8 @@ public:
void add(std::string_view appendedStr)
{
size_t remainingLen = this->limit - this->length;
if (this->length + appendedStr.size() + 1 >= this->limit)
_reserve(std::max<uint32>(this->length + appendedStr.size() + 64, this->limit + this->limit / 2));
size_t copyLen = appendedStr.size();
if (remainingLen < copyLen)
copyLen = remainingLen;
@ -80,6 +81,13 @@ public:
}
private:
void _reserve(uint32 newLimit)
{
cemu_assert_debug(newLimit > length);
this->str = (uint8*)realloc(this->str, newLimit + 4);
this->limit = newLimit;
}
uint8* str;
uint32 length; /* in bytes */
uint32 limit; /* in bytes */