PPCRec: Reworked IML builder to work with basic-blocks

Intermediate commit while I'm still fixing things but I didn't want to pile on too many changes in a single commit.
New:
Reworked PPC->IML converter to first create a graph of basic blocks and then turn those into IML segment(s). This was mainly done to decouple IML design from having PPC specific knowledge like branch target addresses. The previous design also didn't allow to preserve cycle counting properly in all cases since it was based on IML instruction counting.
The new solution supports functions with non-continuous body. A pretty common example for this is when functions end with a trailing B instruction to some other place.

Current limitations:
- BL inlining not implemented
- MFTB not implemented
- BCCTR and BCLR are only partially implemented

Undo vcpkg change
This commit is contained in:
Exzap 2022-12-12 08:50:29 +01:00
parent db60ea6535
commit ce8dc5526c
18 changed files with 1554 additions and 689 deletions

View File

@ -91,13 +91,15 @@ namespace Espresso
BCCTR = 528
};
enum class OPCODE_31
enum class Opcode31
{
TW = 4,
MFTB = 371,
};
inline PrimaryOpcode GetPrimaryOpcode(uint32 opcode) { return (PrimaryOpcode)(opcode >> 26); };
inline Opcode19 GetGroup19Opcode(uint32 opcode) { return (Opcode19)((opcode >> 1) & 0x3FF); };
inline Opcode31 GetGroup31Opcode(uint32 opcode) { return (Opcode31)((opcode >> 1) & 0x3FF); };
struct BOField
{
@ -132,6 +134,12 @@ namespace Espresso
uint8 bo;
};
// returns true if LK bit is set, only valid for branch instructions
inline bool DecodeLK(uint32 opcode)
{
return (opcode & 1) != 0;
}
inline void _decodeForm_I(uint32 opcode, uint32& LI, bool& AA, bool& LK)
{
LI = opcode & 0x3fffffc;
@ -183,13 +191,7 @@ namespace Espresso
_decodeForm_D_branch(opcode, BD, BO, BI, AA, LK);
}
inline void decodeOp_BCLR(uint32 opcode, BOField& BO, uint32& BI, bool& LK)
{
// form XL (with BD field expected to be zero)
_decodeForm_XL(opcode, BO, BI, LK);
}
inline void decodeOp_BCCTR(uint32 opcode, BOField& BO, uint32& BI, bool& LK)
inline void decodeOp_BCSPR(uint32 opcode, BOField& BO, uint32& BI, bool& LK) // BCLR and BCSPR
{
// form XL (with BD field expected to be zero)
_decodeForm_XL(opcode, BO, BI, LK);

View File

@ -24,15 +24,7 @@ sint32 x64Gen_registerMap[12] = // virtual GPR to x64 register mapping
*/
void PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext_t* x64GenContext, uint8 type, void* extraInfo = nullptr)
{
if( x64GenContext->relocateOffsetTableCount >= x64GenContext->relocateOffsetTableSize )
{
x64GenContext->relocateOffsetTableSize = std::max(4, x64GenContext->relocateOffsetTableSize*2);
x64GenContext->relocateOffsetTable = (x64RelocEntry_t*)realloc(x64GenContext->relocateOffsetTable, sizeof(x64RelocEntry_t)*x64GenContext->relocateOffsetTableSize);
}
x64GenContext->relocateOffsetTable[x64GenContext->relocateOffsetTableCount].offset = x64GenContext->codeBufferIndex;
x64GenContext->relocateOffsetTable[x64GenContext->relocateOffsetTableCount].type = type;
x64GenContext->relocateOffsetTable[x64GenContext->relocateOffsetTableCount].extraInfo = extraInfo;
x64GenContext->relocateOffsetTableCount++;
x64GenContext->relocateOffsetTable2.emplace_back(x64GenContext->codeBufferIndex, type, extraInfo);
}
/*
@ -306,6 +298,9 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction,
}
else if( imlInstruction->operation == PPCREC_IML_MACRO_MFTB )
{
// according to MS ABI the caller needs to save:
// RAX, RCX, RDX, R8, R9, R10, R11
uint32 ppcAddress = imlInstruction->op_macro.param;
uint32 sprId = imlInstruction->op_macro.param2&0xFFFF;
uint32 gprIndex = (imlInstruction->op_macro.param2>>16)&0x1F;
@ -321,7 +316,7 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction,
// reserve space on stack for call parameters
x64Gen_sub_reg64_imm32(x64GenContext, REG_RSP, 8*11 + 8);
x64Gen_mov_reg64_imm64(x64GenContext, REG_RBP, 0);
// call HLE function
// call function
if( sprId == SPR_TBL )
x64Gen_mov_reg64_imm64(x64GenContext, REG_RAX, (uint64)PPCRecompiler_getTBL);
else if( sprId == SPR_TBU )
@ -1971,6 +1966,12 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLSegment* imlSegment, IMLInstruction* imlInstruction)
{
if (!imlInstruction->op_conditionalJump.jumpAccordingToSegment)
{
debug_printf("PPCRecompilerX64Gen_imlInstruction_conditionalJump(): Failed on deprecated jump method\n");
return false;
}
if( imlInstruction->op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE )
{
// jump always
@ -1985,19 +1986,25 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec
else
{
// deprecated (jump to jumpmark)
__debugbreak(); // deprecated
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress);
x64Gen_jmp_imm32(x64GenContext, 0);
}
}
else
{
if (imlInstruction->op_conditionalJump.jumpAccordingToSegment)
assert_dbg();
if (!imlInstruction->op_conditionalJump.jumpAccordingToSegment)
{
debug_printf("Unsupported deprecated cjump to ppc address\n");
return false;
}
cemu_assert_debug(imlSegment->nextSegmentBranchTaken);
// generate jump update marker
if( imlInstruction->op_conditionalJump.crRegisterIndex == PPCREC_CR_TEMPORARY || imlInstruction->op_conditionalJump.crRegisterIndex >= 8 )
{
// temporary cr is used, which means we use the currently active eflags
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress);
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken);
sint32 condition = imlInstruction->op_conditionalJump.condition;
if( condition == PPCREC_JUMP_CONDITION_E )
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
@ -2015,19 +2022,19 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec
{
if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT)
{
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress);
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken);
x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_CARRY : X86_CONDITION_NOT_CARRY, 0);
return true;
}
else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ)
{
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress);
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken);
x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0);
return true;
}
else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT)
{
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress);
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken);
x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_UNSIGNED_ABOVE : X86_CONDITION_UNSIGNED_BELOW_EQUAL, 0);
return true;
}
@ -2036,19 +2043,19 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec
{
if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT)
{
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress);
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken);
x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_LESS : X86_CONDITION_SIGNED_GREATER_EQUAL, 0);
return true;
}
else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ)
{
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress);
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken);
x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0);
return true;
}
else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT)
{
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress);
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken);
x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, 0);
return true;
}
@ -2057,26 +2064,28 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec
{
if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT)
{
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress);
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken);
x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGN : X86_CONDITION_NOT_SIGN, 0);
return true;
}
else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ)
{
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress);
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken);
x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0);
return true;
}
else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT)
{
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress);
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken);
x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, 0);
return true;
}
}
cemu_assert_debug(false); // should not reach?
}
x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0);
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress);
cemu_assert_debug(imlSegment->GetBranchTaken());
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, (void*)imlSegment->GetBranchTaken());
if( imlInstruction->op_conditionalJump.bitMustBeSet )
{
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0);
@ -2094,13 +2103,14 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction
{
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
// some tests (all performed on a i7-4790K)
// 1) DEC [mem] + JNS has significantly worse performance than BT + JNC (probably due to additional memory write)
// 1) DEC [mem] + JNS has significantly worse performance than BT + JNC (probably due to additional memory write and direct dependency)
// 2) CMP [mem], 0 + JG has about equal (or slightly worse) performance than BT + JNC
// BT
x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress);
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NOT_CARRY, 0);
cemu_assert_debug(x64GenContext->currentSegment->GetBranchTaken());
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, x64GenContext->currentSegment->GetBranchTaken());
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0);
return true;
}
@ -2152,22 +2162,6 @@ bool PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction_t* PPCRecFunction, ppc
void PPCRecompilerX64Gen_imlInstruction_ppcEnter(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
imlInstruction->op_ppcEnter.x64Offset = x64GenContext->codeBufferIndex;
// generate code
if( ppcImlGenContext->hasFPUInstruction )
{
// old FPU unavailable code
//PPCRecompilerX86_crConditionFlags_saveBeforeOverwrite(PPCRecFunction, ppcImlGenContext, x64GenContext);
//// skip if FP bit in MSR is set
//// #define MSR_FP (1<<13)
//x64Gen_bt_mem8(x64GenContext, REG_ESP, offsetof(PPCInterpreter_t, msr), 13);
//uint32 jmpCodeOffset = x64GenContext->codeBufferIndex;
//x64Gen_jmpc(x64GenContext, X86_CONDITION_CARRY, 0);
//x64Gen_mov_reg32_imm32(x64GenContext, REG_EAX, imlInstruction->op_ppcEnter.ppcAddress&0x7FFFFFFF);
//PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X86_RELOC_MAKE_RELATIVE);
//x64Gen_jmp_imm32(x64GenContext, (uint32)PPCRecompiler_recompilerCallEscapeAndCallFPUUnavailable);
//// patch jump
//*(uint32*)(x64GenContext->codeBuffer+jmpCodeOffset+2) = x64GenContext->codeBufferIndex-jmpCodeOffset-6;
}
}
void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
@ -2193,7 +2187,6 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction,
}
else
assert_dbg();
//x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, spr)+sizeof(uint32)*(name-PPCREC_NAME_SPR0));
}
else
assert_dbg();
@ -2256,7 +2249,7 @@ uint8* PPCRecompilerX86_allocateExecutableMemory(sint32 size)
bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext)
{
x64GenContext_t x64GenContext = {0};
x64GenContext_t x64GenContext{};
x64GenContext.codeBufferSize = 1024;
x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize);
x64GenContext.codeBufferIndex = 0;
@ -2266,6 +2259,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
bool codeGenerationFailed = false;
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
{
x64GenContext.currentSegment = segIt;
segIt->x64Offset = x64GenContext.codeBufferIndex;
for(size_t i=0; i<segIt->imlList.size(); i++)
{
@ -2442,48 +2436,43 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
if( codeGenerationFailed )
{
free(x64GenContext.codeBuffer);
if (x64GenContext.relocateOffsetTable)
free(x64GenContext.relocateOffsetTable);
return false;
}
// allocate executable memory
uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.codeBufferIndex);
size_t baseAddress = (size_t)executableMemory;
// fix relocs
for(sint32 i=0; i<x64GenContext.relocateOffsetTableCount; i++)
for(auto& relocIt : x64GenContext.relocateOffsetTable2)
{
if( x64GenContext.relocateOffsetTable[i].type == X86_RELOC_MAKE_RELATIVE )
{
assert_dbg(); // deprecated
}
else if(x64GenContext.relocateOffsetTable[i].type == X64_RELOC_LINK_TO_PPC || x64GenContext.relocateOffsetTable[i].type == X64_RELOC_LINK_TO_SEGMENT)
if(relocIt.type == X64_RELOC_LINK_TO_PPC || relocIt.type == X64_RELOC_LINK_TO_SEGMENT)
{
// if link to PPC, search for segment that starts with this offset
uint32 ppcOffset = (uint32)(size_t)x64GenContext.relocateOffsetTable[i].extraInfo;
uint32 ppcOffset = (uint32)(size_t)relocIt.extraInfo;
uint32 x64Offset = 0xFFFFFFFF;
if (x64GenContext.relocateOffsetTable[i].type == X64_RELOC_LINK_TO_PPC)
if (relocIt.type == X64_RELOC_LINK_TO_PPC)
{
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
{
if (segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset)
{
x64Offset = segIt->x64Offset;
break;
}
}
if (x64Offset == 0xFFFFFFFF)
{
debug_printf("Recompiler could not resolve jump (function at 0x%08x)\n", PPCRecFunction->ppcAddress);
// todo: Cleanup
return false;
}
cemu_assert_suspicious();
//for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
//{
// if (segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset)
// {
// x64Offset = segIt->x64Offset;
// break;
// }
//}
//if (x64Offset == 0xFFFFFFFF)
//{
// debug_printf("Recompiler could not resolve jump (function at 0x%08x)\n", PPCRecFunction->ppcAddress);
// // todo: Cleanup
// return false;
//}
}
else
{
IMLSegment* destSegment = (IMLSegment*)x64GenContext.relocateOffsetTable[i].extraInfo;
IMLSegment* destSegment = (IMLSegment*)relocIt.extraInfo;
x64Offset = destSegment->x64Offset;
}
uint32 relocBase = x64GenContext.relocateOffsetTable[i].offset;
uint32 relocBase = relocIt.offset;
uint8* relocInstruction = x64GenContext.codeBuffer+relocBase;
if( relocInstruction[0] == 0x0F && (relocInstruction[1] >= 0x80 && relocInstruction[1] <= 0x8F) )
{
@ -2525,8 +2514,6 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
memcpy(executableMemory, x64GenContext.codeBuffer, x64GenContext.codeBufferIndex);
free(x64GenContext.codeBuffer);
x64GenContext.codeBuffer = nullptr;
if (x64GenContext.relocateOffsetTable)
free(x64GenContext.relocateOffsetTable);
// set code
PPCRecFunction->x86Code = executableMemory;
PPCRecFunction->x86Size = x64GenContext.codeBufferIndex;
@ -2535,7 +2522,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
void PPCRecompilerX64Gen_generateEnterRecompilerCode()
{
x64GenContext_t x64GenContext = {0};
x64GenContext_t x64GenContext{};
x64GenContext.codeBufferSize = 1024;
x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize);
x64GenContext.codeBufferIndex = 0;
@ -2615,7 +2602,7 @@ void PPCRecompilerX64Gen_generateEnterRecompilerCode()
void* PPCRecompilerX64Gen_generateLeaveRecompilerCode()
{
x64GenContext_t x64GenContext = {0};
x64GenContext_t x64GenContext{};
x64GenContext.codeBufferSize = 128;
x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize);
x64GenContext.codeBufferIndex = 0;

View File

@ -3,6 +3,8 @@
struct x64RelocEntry_t
{
x64RelocEntry_t(uint32 offset, uint8 type, void* extraInfo) : offset(offset), type(type), extraInfo(extraInfo) {};
uint32 offset;
uint8 type;
void* extraInfo;
@ -10,16 +12,16 @@ struct x64RelocEntry_t
struct x64GenContext_t
{
uint8* codeBuffer;
sint32 codeBufferIndex;
sint32 codeBufferSize;
IMLSegment* currentSegment{};
uint8* codeBuffer{};
sint32 codeBufferIndex{};
sint32 codeBufferSize{};
// cr state
sint32 activeCRRegister; // current x86 condition flags reflect this cr* register
sint32 activeCRState; // describes the way in which x86 flags map to the cr register (signed / unsigned)
sint32 activeCRRegister{}; // current x86 condition flags reflect this cr* register
sint32 activeCRState{}; // describes the way in which x86 flags map to the cr register (signed / unsigned)
// relocate offsets
x64RelocEntry_t* relocateOffsetTable;
sint32 relocateOffsetTableSize;
sint32 relocateOffsetTableCount;
std::vector<x64RelocEntry_t> relocateOffsetTable2;
};
// Some of these are defined by winnt.h and gnu headers
@ -126,7 +128,6 @@ enum
#define PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC (1) // for unsigned arithmetic operations (ADD, CMPI)
#define PPCREC_CR_STATE_TYPE_LOGICAL (2) // for unsigned operations (CMPLI)
#define X86_RELOC_MAKE_RELATIVE (0) // make code imm relative to instruction
#define X64_RELOC_LINK_TO_PPC (1) // translate from ppc address to x86 offset
#define X64_RELOC_LINK_TO_SEGMENT (2) // link to beginning of segment

View File

@ -5,11 +5,6 @@
// mulx, rorx, sarx, shlx, shrx
// PDEP, PEXT
void x64Gen_checkBuffer(x64GenContext_t* x64GenContext)
{
// todo
}
void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v)
{
if( x64GenContext->codeBufferIndex+1 > x64GenContext->codeBufferSize )

View File

@ -203,7 +203,6 @@ template<class opcodeBytes, typename TA, typename TB>
void _x64Gen_writeMODRM_internal(x64GenContext_t* x64GenContext, TA opA, TB opB)
{
static_assert(TA::getType() == MODRM_OPR_TYPE::REG);
x64Gen_checkBuffer(x64GenContext);
// REX prefix
// 0100 WRXB
if constexpr (TA::getType() == MODRM_OPR_TYPE::REG && TB::getType() == MODRM_OPR_TYPE::REG)

View File

@ -28,5 +28,5 @@ void PPCRecompiler_reorderConditionModifyInstructions(struct ppcImlGenContext_t*
void IMLRegisterAllocator_AllocateRegisters(struct ppcImlGenContext_t* ppcImlGenContext);
// debug
void IMLDebug_DumpSegment(struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false);
void IMLDebug_DumpSegment(struct ppcImlGenContext_t* ctx, struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false);
void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext);

View File

@ -104,31 +104,48 @@ void IMLDebug_PrintLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* iml
}
}
void IMLDebug_DumpSegment(IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo)
std::string IMLDebug_GetSegmentName(ppcImlGenContext_t* ctx, IMLSegment* seg)
{
if (!ctx)
{
return "<NoNameWithoutCtx>";
}
// find segment index
for (size_t i = 0; i < ctx->segmentList2.size(); i++)
{
if (ctx->segmentList2[i] == seg)
{
return fmt::format("Seg{:04x}", i);
}
}
return "<SegmentNotInCtx>";
}
void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo)
{
StringBuf strOutput(1024);
strOutput.addFmt("SEGMENT 0x{:04x} 0x{:08x} PPC 0x{:08x} - 0x{:08x} Loop-depth {}", segmentIndex, imlSegment->ppcAddress, imlSegment->ppcAddrMin, imlSegment->ppcAddrMax, imlSegment->loopDepth);
strOutput.addFmt("SEGMENT {} | PPC=0x{:08x} Loop-depth {}", IMLDebug_GetSegmentName(ctx, imlSegment), imlSegment->ppcAddress, imlSegment->loopDepth);
if (imlSegment->isEnterable)
{
strOutput.addFmt(" ENTERABLE (0x{:08x})", imlSegment->enterPPCAddress);
}
else if (imlSegment->isJumpDestination)
{
strOutput.addFmt(" JUMP-DEST (0x{:08x})", imlSegment->jumpDestinationPPCAddress);
}
//else if (imlSegment->isJumpDestination)
//{
// strOutput.addFmt(" JUMP-DEST (0x{:08x})", imlSegment->jumpDestinationPPCAddress);
//}
debug_printf("%s\n", strOutput.c_str());
strOutput.reset();
strOutput.addFmt("SEGMENT NAME 0x{:016x}", (uintptr_t)imlSegment);
debug_printf("%s", strOutput.c_str());
//strOutput.reset();
//strOutput.addFmt("SEGMENT NAME 0x{:016x}", (uintptr_t)imlSegment);
//debug_printf("%s", strOutput.c_str());
if (printLivenessRangeInfo)
{
IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, RA_INTER_RANGE_START);
}
debug_printf("\n");
//debug_printf("\n");
sint32 lineOffsetParameters = 18;
@ -376,22 +393,22 @@ void IMLDebug_DumpSegment(IMLSegment* imlSegment, sint32 segmentIndex, bool prin
}
else if (inst.type == PPCREC_IML_TYPE_FPR_R_R)
{
strOutput.addFmt("{:-6} ", IMLDebug_GetOpcodeName(&inst));
strOutput.addFmt("fpr{:02d}, fpr{:02d}", inst.op_fpr_r_r.registerResult, inst.op_fpr_r_r.registerOperand);
strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst));
strOutput.addFmt("fpr{:02}, fpr{:02}", inst.op_fpr_r_r.registerResult, inst.op_fpr_r_r.registerOperand);
}
else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R_R)
{
strOutput.addFmt("{:-6} ", IMLDebug_GetOpcodeName(&inst));
strOutput.addFmt("fpr{:02d}, fpr{:02d}, fpr{:02d}, fpr{:02d}", inst.op_fpr_r_r_r_r.registerResult, inst.op_fpr_r_r_r_r.registerOperandA, inst.op_fpr_r_r_r_r.registerOperandB, inst.op_fpr_r_r_r_r.registerOperandC);
strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst));
strOutput.addFmt("fpr{:02}, fpr{:02}, fpr{:02}, fpr{:02}", inst.op_fpr_r_r_r_r.registerResult, inst.op_fpr_r_r_r_r.registerOperandA, inst.op_fpr_r_r_r_r.registerOperandB, inst.op_fpr_r_r_r_r.registerOperandC);
}
else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R)
{
strOutput.addFmt("{:-6} ", IMLDebug_GetOpcodeName(&inst));
strOutput.addFmt("fpr{:02d}, fpr{:02d}, fpr{:02d}", inst.op_fpr_r_r_r.registerResult, inst.op_fpr_r_r_r.registerOperandA, inst.op_fpr_r_r_r.registerOperandB);
strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst));
strOutput.addFmt("fpr{:02}, fpr{:02}, fpr{:02}", inst.op_fpr_r_r_r.registerResult, inst.op_fpr_r_r_r.registerOperandA, inst.op_fpr_r_r_r.registerOperandB);
}
else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
{
strOutput.addFmt("CYCLE_CHECK jm_{:08x}\n", inst.op_conditionalJump.jumpmarkAddress);
strOutput.addFmt("CYCLE_CHECK\n");
}
else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32)
{
@ -451,14 +468,15 @@ void IMLDebug_DumpSegment(IMLSegment* imlSegment, sint32 segmentIndex, bool prin
{
if (i)
debug_printf(", ");
debug_printf("%p", (void*)imlSegment->list_prevSegments[i]);
debug_printf("%s", IMLDebug_GetSegmentName(ctx, imlSegment->list_prevSegments[i]).c_str());
}
debug_printf("\n");
debug_printf("Links to: ");
if (imlSegment->nextSegmentBranchNotTaken)
debug_printf("%p (no branch), ", (void*)imlSegment->nextSegmentBranchNotTaken);
debug_printf("BranchNotTaken: %s\n", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchNotTaken).c_str());
if (imlSegment->nextSegmentBranchTaken)
debug_printf("%p (branch)", (void*)imlSegment->nextSegmentBranchTaken);
debug_printf("BranchTaken: %s\n", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchTaken).c_str());
if (imlSegment->nextSegmentIsUncertain)
debug_printf("Dynamic target\n");
debug_printf("\n");
}
@ -466,7 +484,7 @@ void IMLDebug_Dump(ppcImlGenContext_t* ppcImlGenContext)
{
for (size_t i = 0; i < ppcImlGenContext->segmentList2.size(); i++)
{
IMLDebug_DumpSegment(ppcImlGenContext->segmentList2[i], i);
IMLDebug_DumpSegment(ppcImlGenContext, ppcImlGenContext->segmentList2[i], false);
debug_printf("\n");
}
}

View File

@ -152,7 +152,7 @@ enum
PPCREC_IML_TYPE_R_S32, // r* (op) imm
PPCREC_IML_TYPE_MACRO,
PPCREC_IML_TYPE_CJUMP, // conditional jump
PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles >= 0
PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles < 0
PPCREC_IML_TYPE_PPC_ENTER, // used to mark locations that should be written to recompilerCallTable
PPCREC_IML_TYPE_CR, // condition register specific operations (one or more operands)
// conditional
@ -420,6 +420,11 @@ struct IMLInstruction
op_jumpmark.address = address;
}
void make_debugbreak(uint32 currentPPCAddress = 0)
{
make_macro(PPCREC_IML_MACRO_DEBUGBREAK, 0, currentPPCAddress, 0);
}
void make_macro(uint32 macroId, uint32 param, uint32 param2, uint16 paramU16)
{
type = PPCREC_IML_TYPE_MACRO;
@ -431,6 +436,7 @@ struct IMLInstruction
void make_ppcEnter(uint32 ppcAddress)
{
cemu_assert_suspicious(); // removed
type = PPCREC_IML_TYPE_PPC_ENTER;
operation = 0;
op_ppcEnter.ppcAddress = ppcAddress;

View File

@ -74,44 +74,44 @@ void PPCRecRA_identifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* iml
}
}
typedef struct
{
sint32 name;
sint32 virtualRegister;
sint32 physicalRegister;
bool isDirty;
}raRegisterState_t;
//typedef struct
//{
// sint32 name;
// sint32 virtualRegister;
// sint32 physicalRegister;
// bool isDirty;
//}raRegisterState_t;
const sint32 _raInfo_physicalGPRCount = PPC_X64_GPR_USABLE_REGISTERS;
raRegisterState_t* PPCRecRA_getRegisterState(raRegisterState_t* regState, sint32 virtualRegister)
{
for (sint32 i = 0; i < _raInfo_physicalGPRCount; i++)
{
if (regState[i].virtualRegister == virtualRegister)
{
#ifdef CEMU_DEBUG_ASSERT
if (regState[i].physicalRegister < 0)
assert_dbg();
#endif
return regState + i;
}
}
return nullptr;
}
raRegisterState_t* PPCRecRA_getFreePhysicalRegister(raRegisterState_t* regState)
{
for (sint32 i = 0; i < _raInfo_physicalGPRCount; i++)
{
if (regState[i].physicalRegister < 0)
{
regState[i].physicalRegister = i;
return regState + i;
}
}
return nullptr;
}
//const sint32 _raInfo_physicalGPRCount = PPC_X64_GPR_USABLE_REGISTERS;
//
//raRegisterState_t* PPCRecRA_getRegisterState(raRegisterState_t* regState, sint32 virtualRegister)
//{
// for (sint32 i = 0; i < _raInfo_physicalGPRCount; i++)
// {
// if (regState[i].virtualRegister == virtualRegister)
// {
//#ifdef CEMU_DEBUG_ASSERT
// if (regState[i].physicalRegister < 0)
// assert_dbg();
//#endif
// return regState + i;
// }
// }
// return nullptr;
//}
//
//raRegisterState_t* PPCRecRA_getFreePhysicalRegister(raRegisterState_t* regState)
//{
// for (sint32 i = 0; i < _raInfo_physicalGPRCount; i++)
// {
// if (regState[i].physicalRegister < 0)
// {
// regState[i].physicalRegister = i;
// return regState + i;
// }
// }
// return nullptr;
//}
typedef struct
{
@ -309,18 +309,32 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment)
#endif
}
void PPCRecRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
{
// this works as a pre-pass to actual register allocation. Assigning registers in advance based on fixed requirements (e.g. calling conventions and operations with fixed-reg input/output like x86 DIV/MUL)
// algorithm goes as follows:
// 1) Iterate all instructions from beginning to end and keep a list of covering ranges
// 2) If we encounter an instruction with a fixed-register we:
// 2.0) Check if there are any other ranges already using the same fixed-register and if yes, we split them and unassign the register for any follow-up instructions just prior to the current instruction
// 2.1) For inputs: Split the range that needs to be assigned a phys reg on the current instruction. Basically creating a 1-instruction long subrange that we can assign the physical register. RA will then schedule register allocation around that and avoid moves
// 2.2) For outputs: Split the range that needs to be assigned a phys reg on the current instruction
// Q: What if a specific fixed-register is used both for input and output and thus is destructive? A: Create temporary range
// Q: What if we have 3 different inputs that are all the same virtual register? A: Create temporary range
// Q: Assuming the above is implemented, do we even support overlapping two ranges of separate virtual regs on the same phys register? In theory the RA shouldn't care
// assume imlSegment->raInfo.linkedList_allSubranges is ordered ascending by start index already
// todo
}
bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
{
// sort subranges ascending by start index
//std::sort(imlSegment->raInfo.list_subranges.begin(), imlSegment->raInfo.list_subranges.end(), _sortSubrangesByStartIndexDepr);
_sortSegmentAllSubrangesLinkedList(imlSegment);
PPCRecRA_HandleFixedRegisters(ppcImlGenContext, imlSegment);
raLiveRangeInfo_t liveInfo;
liveInfo.liveRangesCount = 0;
//sint32 subrangeIndex = 0;
//for (auto& subrange : imlSegment->raInfo.list_subranges)
raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
while(subrangeItr)
{
@ -365,7 +379,7 @@ bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSe
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next;
continue;
}
// find free register
// find free register for this segment
uint32 physRegisterMask = (1<<PPC_X64_GPR_USABLE_REGISTERS)-1;
for (sint32 f = 0; f < liveInfo.liveRangesCount; f++)
{
@ -379,6 +393,7 @@ bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSe
uint32 unusedRegisterMask = physRegisterMask; // mask of registers that are currently not used (does not include range checks)
if (physRegisterMask != 0)
{
// check globally
allowedPhysRegisterMask = PPCRecRA_getAllowedRegisterMaskForFullRange(subrangeItr->range);
physRegisterMask &= allowedPhysRegisterMask;
}
@ -761,7 +776,6 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext,
sint32 suffixInstructionCount = imlSegment->HasSuffixInstruction() ? 1 : 0;
// load register ranges that are supplied from previous segments
raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
//for (auto& subrange : imlSegment->raInfo.list_subranges)
while(subrangeItr)
{
if (subrangeItr->start.index == RA_INTER_RANGE_START)
@ -933,7 +947,7 @@ void PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext);
void PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext);
void PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext_t* ppcImlGenContext);
void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext)
void PPCRecompilerImm_reshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext)
{
// insert empty segments after every non-taken branch if the linked segment has more than one input
// this gives the register allocator more room to create efficient spill code
@ -985,7 +999,7 @@ void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGen
void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext)
{
PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext);
PPCRecompilerImm_reshapeForRegisterAllocation(ppcImlGenContext);
ppcImlGenContext->raInfo.list_ranges = std::vector<raLivenessRange_t*>();
@ -1243,7 +1257,6 @@ void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, IMLSe
if (remainingScanDist <= 0)
return; // can't reach end
// also dont forget: Extending is easier if we allow 'non symmetric' branches. E.g. register range one enters one branch
IMLSegment* route[64];
route[0] = currentSegment;
if (currentSegment->nextSegmentBranchNotTaken)

View File

@ -1,6 +1,13 @@
#include "IMLInstruction.h"
#include "IMLSegment.h"
void IMLSegment::SetEnterable(uint32 enterAddress)
{
cemu_assert_debug(!isEnterable || enterPPCAddress == enterAddress);
isEnterable = true;
enterPPCAddress = enterAddress;
}
bool IMLSegment::HasSuffixInstruction() const
{
if (imlList.empty())
@ -16,8 +23,30 @@ IMLInstruction* IMLSegment::GetLastInstruction()
return &imlList.back();
}
void IMLSegment::SetLinkBranchNotTaken(IMLSegment* imlSegmentDst)
{
if (nextSegmentBranchNotTaken)
nextSegmentBranchNotTaken->list_prevSegments.erase(std::find(nextSegmentBranchNotTaken->list_prevSegments.begin(), nextSegmentBranchNotTaken->list_prevSegments.end(), this));
nextSegmentBranchNotTaken = imlSegmentDst;
if(imlSegmentDst)
imlSegmentDst->list_prevSegments.push_back(this);
}
void IMLSegment::SetLinkBranchTaken(IMLSegment* imlSegmentDst)
{
if (nextSegmentBranchTaken)
nextSegmentBranchTaken->list_prevSegments.erase(std::find(nextSegmentBranchTaken->list_prevSegments.begin(), nextSegmentBranchTaken->list_prevSegments.end(), this));
nextSegmentBranchTaken = imlSegmentDst;
if (imlSegmentDst)
imlSegmentDst->list_prevSegments.push_back(this);
}
IMLInstruction* IMLSegment::AppendInstruction()
{
IMLInstruction& inst = imlList.emplace_back();
memset(&inst, 0, sizeof(IMLInstruction));
return &inst;
}
void IMLSegment_SetLinkBranchNotTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst)
{

View File

@ -103,8 +103,8 @@ struct IMLSegment
bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary)
uint32 enterPPCAddress{}; // used if isEnterable is true
// jump destination segments
bool isJumpDestination{}; // segment is a destination for one or more (conditional) jumps
uint32 jumpDestinationPPCAddress{};
//bool isJumpDestination{}; // segment is a destination for one or more (conditional) jumps
//uint32 jumpDestinationPPCAddress{};
// PPC FPR use mask
bool ppcFPRUsed[32]{}; // same as ppcGPRUsed, but for FPR
// CR use mask
@ -115,10 +115,30 @@ struct IMLSegment
PPCSegmentRegisterAllocatorInfo_t raInfo{};
PPCRecVGPRDistances_t raDistances{};
bool raRangeExtendProcessed{};
// segment points
IMLSegmentPoint* segmentPointList{};
// segment state API
void SetEnterable(uint32 enterAddress);
void SetLinkBranchNotTaken(IMLSegment* imlSegmentDst);
void SetLinkBranchTaken(IMLSegment* imlSegmentDst);
IMLSegment* GetBranchTaken()
{
return nextSegmentBranchTaken;
}
IMLSegment* GetBranchNotTaken()
{
return nextSegmentBranchNotTaken;
}
// instruction API
IMLInstruction* AppendInstruction();
bool HasSuffixInstruction() const;
IMLInstruction* GetLastInstruction();
// segment points
IMLSegmentPoint* segmentPointList{};
};

View File

@ -21,6 +21,16 @@ public:
};
public:
~PPCFunctionBoundaryTracker()
{
while (!map_ranges.empty())
{
PPCRange_t* range = *map_ranges.begin();
delete range;
map_ranges.erase(map_ranges.begin());
}
}
void trackStartPoint(MPTR startAddress)
{
processRange(startAddress, nullptr, nullptr);
@ -40,10 +50,34 @@ public:
return false;
}
std::vector<PPCRange_t> GetRanges()
{
std::vector<PPCRange_t> r;
for (auto& it : map_ranges)
r.emplace_back(*it);
return r;
}
bool ContainsAddress(uint32 addr) const
{
for (auto& it : map_ranges)
{
if (addr >= it->startAddress && addr < it->getEndAddress())
return true;
}
return false;
}
const std::set<uint32>& GetBranchTargets() const
{
return map_branchTargetsAll;
}
private:
void addBranchDestination(PPCRange_t* sourceRange, MPTR address)
{
map_branchTargets.emplace(address);
map_queuedBranchTargets.emplace(address);
map_branchTargetsAll.emplace(address);
}
// process flow of instruction
@ -114,7 +148,7 @@ private:
Espresso::BOField BO;
uint32 BI;
bool LK;
Espresso::decodeOp_BCLR(opcode, BO, BI, LK);
Espresso::decodeOp_BCSPR(opcode, BO, BI, LK);
if (BO.branchAlways() && !LK)
{
// unconditional BLR
@ -218,7 +252,7 @@ private:
auto rangeItr = map_ranges.begin();
PPCRange_t* previousRange = nullptr;
for (std::set<uint32_t>::const_iterator targetItr = map_branchTargets.begin() ; targetItr != map_branchTargets.end(); )
for (std::set<uint32_t>::const_iterator targetItr = map_queuedBranchTargets.begin() ; targetItr != map_queuedBranchTargets.end(); )
{
while (rangeItr != map_ranges.end() && ((*rangeItr)->startAddress + (*rangeItr)->length) <= (*targetItr))
{
@ -239,7 +273,7 @@ private:
(*targetItr) < ((*rangeItr)->startAddress + (*rangeItr)->length))
{
// delete visited targets
targetItr = map_branchTargets.erase(targetItr);
targetItr = map_queuedBranchTargets.erase(targetItr);
continue;
}
@ -289,5 +323,6 @@ private:
};
std::set<PPCRange_t*, RangePtrCmp> map_ranges;
std::set<uint32> map_branchTargets;
std::set<uint32> map_queuedBranchTargets;
std::set<uint32> map_branchTargetsAll;
};

View File

@ -131,7 +131,7 @@ void PPCRecompiler_attemptEnter(PPCInterpreter_t* hCPU, uint32 enterAddress)
}
bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext);
PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PPCRange_t range, std::set<uint32>& entryAddresses, std::vector<std::pair<MPTR, uint32>>& entryPointsOut)
PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PPCRange_t range, std::set<uint32>& entryAddresses, std::vector<std::pair<MPTR, uint32>>& entryPointsOut, PPCFunctionBoundaryTracker& boundaryTracker)
{
if (range.startAddress >= PPC_REC_CODE_AREA_END)
{
@ -156,10 +156,10 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
PPCRecFunction_t* ppcRecFunc = new PPCRecFunction_t();
ppcRecFunc->ppcAddress = range.startAddress;
ppcRecFunc->ppcSize = range.length;
// generate intermediate code
ppcImlGenContext_t ppcImlGenContext = { 0 };
bool compiledSuccessfully = PPCRecompiler_generateIntermediateCode(ppcImlGenContext, ppcRecFunc, entryAddresses);
bool compiledSuccessfully = PPCRecompiler_generateIntermediateCode(ppcImlGenContext, ppcRecFunc, entryAddresses, boundaryTracker);
if (compiledSuccessfully == false)
{
delete ppcRecFunc;
@ -173,6 +173,16 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
return nullptr;
}
//if (ppcRecFunc->ppcAddress == 0x12345678)
//{
// debug_printf("----------------------------------------\n");
// IMLDebug_Dump(&ppcImlGenContext);
// __debugbreak();
//}
// Large functions for testing (botw):
// 3B4049C
// emit x64 code
bool x64GenerationSuccess = PPCRecompiler_generateX64Code(ppcRecFunc, &ppcImlGenContext);
if (x64GenerationSuccess == false)
@ -181,6 +191,9 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
}
// collect list of PPC-->x64 entry points
cemuLog_log(LogType::Force, "[Recompiler] Successfully compiled {:08x} - {:08x} Segments: {}", ppcRecFunc->ppcAddress, ppcRecFunc->ppcAddress + ppcRecFunc->ppcSize, ppcImlGenContext.segmentList2.size());
cemu_assert_debug(ppcImlGenContext.imlListCount == 0);
entryPointsOut.clear();
for(IMLSegment* imlSegment : ppcImlGenContext.segmentList2)
{
@ -359,7 +372,7 @@ void PPCRecompiler_recompileAtAddress(uint32 address)
PPCRecompilerState.recompilerSpinlock.unlock();
std::vector<std::pair<MPTR, uint32>> functionEntryPoints;
auto func = PPCRecompiler_recompileFunction(range, entryAddresses, functionEntryPoints);
auto func = PPCRecompiler_recompileFunction(range, entryAddresses, functionEntryPoints, funcBoundaries);
if (!func)
{

View File

@ -31,9 +31,12 @@ struct IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(struct pp
struct ppcImlGenContext_t
{
class PPCFunctionBoundaryTracker* boundaryTracker;
PPCRecFunction_t* functionRef;
uint32* currentInstruction;
uint32 ppcAddressOfCurrentInstruction;
IMLSegment* currentOutputSegment;
struct PPCBasicBlockInfo* currentBasicBlock{};
// fpr mode
bool LSQE{ true };
bool PSE{ true };
@ -82,6 +85,31 @@ struct ppcImlGenContext_t
{
return *PPCRecompilerImlGen_generateNewEmptyInstruction(this);
}
IMLSegment* NewSegment()
{
IMLSegment* seg = new IMLSegment();
segmentList2.emplace_back(seg);
return seg;
}
size_t GetSegmentIndex(IMLSegment* seg)
{
for (size_t i = 0; i < segmentList2.size(); i++)
{
if (segmentList2[i] == seg)
return i;
}
cemu_assert_error();
return 0;
}
IMLSegment* InsertSegment(size_t index)
{
IMLSegment* newSeg = new IMLSegment();
segmentList2.insert(segmentList2.begin() + index, 1, newSeg);
return newSeg;
}
};
typedef void ATTR_MS_ABI (*PPCREC_JUMP_ENTRY)();

View File

@ -1,7 +1,12 @@
#define PPCREC_CR_REG_TEMP 8 // there are only 8 cr registers (0-7) we use the 8th as temporary cr register that is never stored (BDNZ instruction for example)
bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* PPCRecFunction, std::set<uint32>& entryAddresses);
bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* PPCRecFunction, std::set<uint32>& entryAddresses, class PPCFunctionBoundaryTracker& boundaryTracker);
IMLSegment* PPCIMLGen_CreateSplitSegmentAtEnd(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo);
IMLSegment* PPCIMLGen_CreateNewSegmentAsBranchTarget(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo);
void PPCIMLGen_AssertIfNotLastSegmentInstruction(ppcImlGenContext_t& ppcImlGenContext);
IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext);
void PPCRecompiler_pushBackIMLInstructions(IMLSegment* imlSegment, sint32 index, sint32 shiftBackCount);

File diff suppressed because it is too large Load Diff

View File

@ -3,63 +3,67 @@
IMLSegment* PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext_t* ppcImlGenContext, uint32 ppcOffset)
{
for(IMLSegment* segIt : ppcImlGenContext->segmentList2)
{
if(segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset )
{
return segIt;
}
}
debug_printf("PPCRecompiler_getSegmentByPPCJumpAddress(): Unable to find segment (ppcOffset 0x%08x)\n", ppcOffset);
__debugbreak();
return nullptr;
//for(IMLSegment* segIt : ppcImlGenContext->segmentList2)
//{
// if(segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset )
// {
// return segIt;
// }
//}
//debug_printf("PPCRecompiler_getSegmentByPPCJumpAddress(): Unable to find segment (ppcOffset 0x%08x)\n", ppcOffset);
//return nullptr;
}
void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext)
{
size_t segCount = ppcImlGenContext->segmentList2.size();
for(size_t s=0; s<segCount; s++)
{
IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s];
__debugbreak(); // outdated
bool isLastSegment = (s+1)>=ppcImlGenContext->segmentList2.size();
IMLSegment* nextSegment = isLastSegment?nullptr:ppcImlGenContext->segmentList2[s+1];
// handle empty segment
if( imlSegment->imlList.empty())
{
if (isLastSegment == false)
IMLSegment_SetLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList2[s+1]); // continue execution to next segment
else
imlSegment->nextSegmentIsUncertain = true;
continue;
}
// check last instruction of segment
IMLInstruction* imlInstruction = imlSegment->imlList.data() + (imlSegment->imlList.size() - 1);
if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK )
{
// find destination segment by ppc jump address
IMLSegment* jumpDestSegment = PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext, imlInstruction->op_conditionalJump.jumpmarkAddress);
if( jumpDestSegment )
{
if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE)
IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment);
IMLSegment_SetLinkBranchTaken(imlSegment, jumpDestSegment);
}
else
{
imlSegment->nextSegmentIsUncertain = true;
}
}
else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO )
{
// currently we assume that the next segment is unknown for all macros
imlSegment->nextSegmentIsUncertain = true;
}
else
{
// all other instruction types do not branch
IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment);
}
}
//size_t segCount = ppcImlGenContext->segmentList2.size();
//for(size_t s=0; s<segCount; s++)
//{
// IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s];
// bool isLastSegment = (s+1)>=ppcImlGenContext->segmentList2.size();
// IMLSegment* nextSegment = isLastSegment?nullptr:ppcImlGenContext->segmentList2[s+1];
// // handle empty segment
// if( imlSegment->imlList.empty())
// {
// if (isLastSegment == false)
// IMLSegment_SetLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList2[s+1]); // continue execution to next segment
// else
// imlSegment->nextSegmentIsUncertain = true;
// continue;
// }
// // check last instruction of segment
// IMLInstruction* imlInstruction = imlSegment->imlList.data() + (imlSegment->imlList.size() - 1);
// if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK )
// {
// // find destination segment by ppc jump address
// IMLSegment* jumpDestSegment = PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext, imlInstruction->op_conditionalJump.jumpmarkAddress);
// if( jumpDestSegment )
// {
// if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE)
// IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment);
// IMLSegment_SetLinkBranchTaken(imlSegment, jumpDestSegment);
// }
// else
// {
// imlSegment->nextSegmentIsUncertain = true;
// }
// }
// else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO )
// {
// // currently we assume that the next segment is unknown for all macros
// imlSegment->nextSegmentIsUncertain = true;
// }
// else
// {
// // all other instruction types do not branch
// IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment);
// }
//}
}
void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext)

View File

@ -39,7 +39,6 @@ enum class LogType : sint32
NN_SL = 26,
TextureReadback = 29,
ProcUi = 39,
nlibcurl = 41,
@ -47,6 +46,7 @@ enum class LogType : sint32
NFC = 41,
NTAG = 42,
Recompiler = 60,
};
template <>