diff --git a/bin/keys.txt b/bin/keys.txt deleted file mode 100644 index 8782dbfe..00000000 --- a/bin/keys.txt +++ /dev/null @@ -1,4 +0,0 @@ -# this file contains keys needed for decryption of disc file system data (WUD/WUX) -# 1 key per line, any text after a '#' character is considered a comment -# the emulator will automatically pick the right key -541b9889519b27d363cd21604b97c67a # example key (can be deleted) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 7fcf5d99..4e045b67 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -737,16 +737,12 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if ( rRegResult == rRegOperand2 ) { // result = operand1 - result - // NEG result x64Gen_neg_reg64Low32(x64GenContext, rRegResult); - // ADD result, operand1 x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); } else { - // copy operand1 to destination register before doing addition x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1); - // sub operand2 x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); } } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 294fb0e3..c52878b7 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -7,6 +7,8 @@ #include "../BackendX64/BackendX64.h" +#include + struct IMLRegisterAllocatorContext { IMLRegisterAllocatorParameters* raParam; @@ -212,11 +214,67 @@ sint32 PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(IMLSegment* imlSe return minDistance; } -typedef struct +struct IMLRALivenessTimeline { - raLivenessSubrange_t* liveRangeList[64]; - sint32 liveRangesCount; -}raLiveRangeInfo_t; +// IMLRALivenessTimeline(raLivenessSubrange_t* subrangeChain) +// { +//#ifdef CEMU_DEBUG_ASSERT +// raLivenessSubrange_t* it = subrangeChain; +// raLivenessSubrange_t* prevIt = it; +// while (it) +// { +// cemu_assert_debug(prevIt->start.index <= it->start.index); +// prevIt = it; +// it = it->link_segmentSubrangesGPR.next; +// } +//#endif +// } + + IMLRALivenessTimeline() + { + } + + // manually add an active range + void AddActiveRange(raLivenessSubrange_t* subrange) + { + activeRanges.emplace_back(subrange); + } + + // remove all ranges from activeRanges with end <= instructionIndex + void ExpireRanges(sint32 instructionIndex) + { + expiredRanges.clear(); + size_t count = activeRanges.size(); + for (size_t f = 0; f < count; f++) + { + raLivenessSubrange_t* liverange = activeRanges[f]; + if (liverange->end.index <= instructionIndex) + { +#ifdef CEMU_DEBUG_ASSERT + if (instructionIndex != RA_INTER_RANGE_END && (liverange->subrangeBranchTaken || liverange->subrangeBranchNotTaken)) + assert_dbg(); // infinite subranges should not expire +#endif + expiredRanges.emplace_back(liverange); + // remove entry + activeRanges[f] = activeRanges[count-1]; + f--; + count--; + } + } + if(count != activeRanges.size()) + activeRanges.resize(count); + } + + std::span GetExpiredRanges() + { + return { expiredRanges.data(), expiredRanges.size() }; + } + + boost::container::small_vector activeRanges; + +private: + boost::container::small_vector expiredRanges; +}; bool IsRangeOverlapping(raLivenessSubrange_t* rangeA, raLivenessSubrange_t* rangeB) { @@ -244,10 +302,6 @@ void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange_t* range, IML subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; continue; } - - //if (subrange->start.index < subrangeItr->end.index && subrange->end.index > subrangeItr->start.index || - // (subrange->start.index == RA_INTER_RANGE_START && subrange->start.index == subrangeItr->start.index) || - // (subrange->end.index == RA_INTER_RANGE_END && subrange->end.index == subrangeItr->end.index) ) if(IsRangeOverlapping(subrange, subrangeItr)) { if (subrangeItr->range->physicalRegister >= 0) @@ -312,84 +366,95 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) #endif } -void PPCRecRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +raLivenessSubrange_t* _GetSubrangeByInstructionIndexAndVirtualReg(IMLSegment* imlSegment, IMLReg regToSearch, sint32 instructionIndex) +{ + uint32 regId = regToSearch & 0xFF; + raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_perVirtualGPR[regId]; + while (subrangeItr) + { + if (subrangeItr->start.index <= instructionIndex && subrangeItr->end.index > instructionIndex) + return subrangeItr; + subrangeItr = subrangeItr->link_sameVirtualRegisterGPR.next; + } + return nullptr; +} + +void IMLRA_IsolateRangeOnInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, raLivenessSubrange_t* subrange, sint32 instructionIndex) +{ + __debugbreak(); +} + +void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { // this works as a pre-pass to actual register allocation. Assigning registers in advance based on fixed requirements (e.g. calling conventions and operations with fixed-reg input/output like x86 DIV/MUL) // algorithm goes as follows: // 1) Iterate all instructions from beginning to end and keep a list of covering ranges - // 2) If we encounter an instruction with a fixed-register we: + // 2) If we encounter an instruction with a fixed register we: // 2.0) Check if there are any other ranges already using the same fixed-register and if yes, we split them and unassign the register for any follow-up instructions just prior to the current instruction // 2.1) For inputs: Split the range that needs to be assigned a phys reg on the current instruction. Basically creating a 1-instruction long subrange that we can assign the physical register. RA will then schedule register allocation around that and avoid moves // 2.2) For outputs: Split the range that needs to be assigned a phys reg on the current instruction // Q: What if a specific fixed-register is used both for input and output and thus is destructive? A: Create temporary range // Q: What if we have 3 different inputs that are all the same virtual register? A: Create temporary range // Q: Assuming the above is implemented, do we even support overlapping two ranges of separate virtual regs on the same phys register? In theory the RA shouldn't care - // assume imlSegment->raInfo.linkedList_allSubranges is ordered ascending by start index already - // todo + // experimental code + //for (size_t i = 0; i < imlSegment->imlList.size(); i++) + //{ + // IMLInstruction& inst = imlSegment->imlList[i]; + // if (inst.type == PPCREC_IML_TYPE_R_R_R) + // { + // if (inst.operation == PPCREC_IML_OP_LEFT_SHIFT) + // { + // // get the virtual reg which needs to be assigned a fixed register + // //IMLUsedRegisters usedReg; + // //inst.CheckRegisterUsage(&usedReg); + // IMLReg rB = inst.op_r_r_r.regB; + // // rB needs to use RCX/ECX + // raLivenessSubrange_t* subrange = _GetSubrangeByInstructionIndexAndVirtualReg(imlSegment, rB, i); + // cemu_assert_debug(subrange->range->physicalRegister < 0); // already has a phys reg assigned + // // make sure RCX/ECX is free + // // split before (if needed) and after instruction so that we get a new 1-instruction long range for which we can assign the physical register + // raLivenessSubrange_t* instructionRange = subrange->start.index < i ? PPCRecRA_splitLocalSubrange(ppcImlGenContext, subrange, i, false) : subrange; + // raLivenessSubrange_t* tailRange = PPCRecRA_splitLocalSubrange(ppcImlGenContext, instructionRange, i+1, false); + + // } + // } + //} } -bool PPCRecRA_assignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { // sort subranges ascending by start index _sortSegmentAllSubrangesLinkedList(imlSegment); - - PPCRecRA_HandleFixedRegisters(ppcImlGenContext, imlSegment); - raLiveRangeInfo_t liveInfo; - liveInfo.liveRangesCount = 0; + IMLRALivenessTimeline livenessTimeline; raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) { sint32 currentIndex = subrangeItr->start.index; - // validate subrange PPCRecRA_debugValidateSubrange(subrangeItr); - // expire ranges - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) - { - raLivenessSubrange_t* liverange = liveInfo.liveRangeList[f]; - if (liverange->end.index <= currentIndex && liverange->end.index != RA_INTER_RANGE_END) - { -#ifdef CEMU_DEBUG_ASSERT - if (liverange->subrangeBranchTaken || liverange->subrangeBranchNotTaken) - assert_dbg(); // infinite subranges should not expire -#endif - // remove entry - liveInfo.liveRangesCount--; - liveInfo.liveRangeList[f] = liveInfo.liveRangeList[liveInfo.liveRangesCount]; - f--; - } - } - // check if subrange already has register assigned + livenessTimeline.ExpireRanges(std::min(currentIndex, RA_INTER_RANGE_END-1)); // expire up to currentIndex (inclusive), but exclude infinite ranges + // if subrange already has register assigned then add it to the active list and continue if (subrangeItr->range->physicalRegister >= 0) { // verify if register is actually available #ifdef CEMU_DEBUG_ASSERT - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) + for (auto& liverangeItr : livenessTimeline.activeRanges) { - raLivenessSubrange_t* liverangeItr = liveInfo.liveRangeList[f]; - if (liverangeItr->range->physicalRegister == subrangeItr->range->physicalRegister) - { - // this should never happen because we try to preventively avoid register conflicts - assert_dbg(); - } + // check for register mismatch + cemu_assert_debug(liverangeItr->range->physicalRegister != subrangeItr->range->physicalRegister); } #endif - // add to live ranges - liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; - liveInfo.liveRangesCount++; - // next + livenessTimeline.AddActiveRange(subrangeItr); subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; continue; } - // find free register for this segment + // find free register for current subrangeItr and segment IMLPhysRegisterSet physRegSet = ctx.raParam->physicalRegisterPool; - - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) + for (auto& liverangeItr : livenessTimeline.activeRanges) { - raLivenessSubrange_t* liverange = liveInfo.liveRangeList[f]; - cemu_assert_debug(liverange->range->physicalRegister >= 0); - physRegSet.SetReserved(liverange->range->physicalRegister); + cemu_assert_debug(liverangeItr->range->physicalRegister >= 0); + physRegSet.SetReserved(liverangeItr->range->physicalRegister); } // check intersections with other ranges and determine allowed registers IMLPhysRegisterSet localAvailableRegsMask = physRegSet; // mask of registers that are currently not used (does not include range checks in other segments) @@ -449,9 +514,8 @@ bool PPCRecRA_assignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGen spillStrategies.localRangeHoleCutting.cost = INT_MAX; if (currentIndex >= 0) { - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) + for (auto candidate : livenessTimeline.activeRanges) { - raLivenessSubrange_t* candidate = liveInfo.liveRangeList[f]; if (candidate->end.index == RA_INTER_RANGE_END) continue; sint32 distance = PPCRecRA_countInstructionsUntilNextUse(candidate, currentIndex); @@ -513,12 +577,11 @@ bool PPCRecRA_assignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGen spillStrategies.explodeRange.cost = INT_MAX; spillStrategies.explodeRange.range = nullptr; spillStrategies.explodeRange.distance = -1; - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) + for (auto candidate : livenessTimeline.activeRanges) { - raLivenessSubrange_t* candidate = liveInfo.liveRangeList[f]; if (candidate->end.index != RA_INTER_RANGE_END) continue; - sint32 distance = PPCRecRA_countInstructionsUntilNextUse(liveInfo.liveRangeList[f], currentIndex); + sint32 distance = PPCRecRA_countInstructionsUntilNextUse(candidate, currentIndex); if( distance < 2) continue; sint32 cost; @@ -580,9 +643,8 @@ bool PPCRecRA_assignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGen spillStrategies.explodeRange.cost = INT_MAX; spillStrategies.explodeRange.range = nullptr; spillStrategies.explodeRange.distance = -1; - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) + for(auto candidate : livenessTimeline.activeRanges) { - raLivenessSubrange_t* candidate = liveInfo.liveRangeList[f]; if (candidate->end.index != RA_INTER_RANGE_END) continue; // only select candidates that clash with current subrange @@ -616,16 +678,14 @@ bool PPCRecRA_assignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGen } // assign register to range subrangeItr->range->physicalRegister = physRegSet.GetFirstAvailableReg(); - // add to live ranges - liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; - liveInfo.liveRangesCount++; + livenessTimeline.AddActiveRange(subrangeItr); // next subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; } return true; } -void PPCRecRA_assignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext) +void IMLRA_AssignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext) { // start with frequently executed segments first sint32 maxLoopDepth = 0; @@ -633,6 +693,10 @@ void PPCRecRA_assignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext { maxLoopDepth = std::max(maxLoopDepth, segIt->loopDepth); } + // assign fixed registers first + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + IMLRA_HandleFixedRegisters(ppcImlGenContext, segIt); + while (true) { bool done = false; @@ -642,7 +706,7 @@ void PPCRecRA_assignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext { if (segIt->loopDepth != d) continue; - done = PPCRecRA_assignSegmentRegisters(ctx, ppcImlGenContext, segIt); + done = IMLRA_AssignSegmentRegisters(ctx, ppcImlGenContext, segIt); if (done == false) break; } @@ -654,12 +718,12 @@ void PPCRecRA_assignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext } } -typedef struct +struct subrangeEndingInfo_t { raLivenessSubrange_t* subrangeList[SUBRANGE_LIST_SIZE]; sint32 subrangeCount; bool hasUndefinedEndings; -}subrangeEndingInfo_t; +}; void _findSubrangeWriteEndings(raLivenessSubrange_t* subrange, uint32 iterationIndex, sint32 depth, subrangeEndingInfo_t* info) { @@ -759,14 +823,13 @@ void _analyzeRangeDataFlow(raLivenessSubrange_t* subrange) } } -void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +void IMLRA_GenerateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { sint16 virtualReg2PhysReg[IML_RA_VIRT_REG_COUNT_MAX]; for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) virtualReg2PhysReg[i] = -1; std::unordered_map virt2PhysRegMap; // key = virtual register, value = physical register - raLiveRangeInfo_t liveInfo; - liveInfo.liveRangesCount = 0; + IMLRALivenessTimeline livenessTimeline; sint32 index = 0; sint32 suffixInstructionCount = imlSegment->HasSuffixInstruction() ? 1 : 0; // load register ranges that are supplied from previous segments @@ -775,8 +838,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, { if (subrangeItr->start.index == RA_INTER_RANGE_START) { - liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; - liveInfo.liveRangesCount++; + livenessTimeline.AddActiveRange(subrangeItr); #ifdef CEMU_DEBUG_ASSERT // load GPR if (subrangeItr->_noLoad == false) @@ -797,41 +859,34 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, while(index < imlSegment->imlList.size() + 1) { // expire ranges - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) + livenessTimeline.ExpireRanges(index); + for (auto& expiredRange : livenessTimeline.GetExpiredRanges()) { - raLivenessSubrange_t* liverange = liveInfo.liveRangeList[f]; - if (liverange->end.index <= index) + // update translation table + if (virtualReg2PhysReg[expiredRange->range->virtualRegister] == -1) + assert_dbg(); + virtualReg2PhysReg[expiredRange->range->virtualRegister] = -1; + virt2PhysRegMap.erase(expiredRange->range->virtualRegister); + // store GPR if required + // special care has to be taken to execute any stores before the suffix instruction since trailing instructions may not get executed + if (expiredRange->hasStore) { - // update translation table - if (virtualReg2PhysReg[liverange->range->virtualRegister] == -1) - assert_dbg(); - virtualReg2PhysReg[liverange->range->virtualRegister] = -1; - virt2PhysRegMap.erase(liverange->range->virtualRegister); - // store GPR if required - // special care has to be taken to execute any stores before the suffix instruction since trailing instructions may not get executed - if (liverange->hasStore) - { - PPCRecRA_insertGPRStoreInstruction(imlSegment, std::min(index, imlSegment->imlList.size() - suffixInstructionCount), liverange->range->physicalRegister, liverange->range->name); - index++; - } - // remove entry - liveInfo.liveRangesCount--; - liveInfo.liveRangeList[f] = liveInfo.liveRangeList[liveInfo.liveRangesCount]; - f--; + PPCRecRA_insertGPRStoreInstruction(imlSegment, std::min(index, imlSegment->imlList.size() - suffixInstructionCount), expiredRange->range->physicalRegister, expiredRange->range->name); + index++; } } + // load new ranges subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) { if (subrangeItr->start.index == index) { - liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; - liveInfo.liveRangesCount++; + livenessTimeline.AddActiveRange(subrangeItr); // load GPR // similar to stores, any loads for the next segment need to happen before the suffix instruction - // however, starting 17-12-2022 ranges that exit the segment at the end but do not cover the suffix instruction are illegal (e.g. RA_INTER_RANGE_END to RA_INTER_RANGE_END subrange) - // the limitation that name loads (for the follow-up segments) need to happen before the suffix instruction require that the range also reflects this, otherwise the RA would erroneously assume registers to be available during the suffix instruction + // however, ranges that exit the segment at the end but do not cover the suffix instruction are illegal (e.g. RA_INTER_RANGE_END to RA_INTER_RANGE_END subrange) + // this is to prevent the RA from inserting store/load instructions after the suffix instruction if (imlSegment->HasSuffixInstruction()) { cemu_assert_debug(subrangeItr->start.index <= imlSegment->GetSuffixInstructionIndex()); @@ -855,35 +910,25 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, // next iml instruction index++; } - // expire infinite subranges (subranges that cross the segment border) + // expire infinite subranges (subranges which cross the segment border) sint32 storeLoadListLength = 0; raLoadStoreInfo_t loadStoreList[IML_RA_VIRT_REG_COUNT_MAX]; - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) + livenessTimeline.ExpireRanges(RA_INTER_RANGE_END); + for (auto liverange : livenessTimeline.GetExpiredRanges()) { - raLivenessSubrange_t* liverange = liveInfo.liveRangeList[f]; - if (liverange->end.index == RA_INTER_RANGE_END) + // update translation table + cemu_assert_debug(virtualReg2PhysReg[liverange->range->virtualRegister] != -1); + virtualReg2PhysReg[liverange->range->virtualRegister] = -1; + virt2PhysRegMap.erase(liverange->range->virtualRegister); + // store GPR + if (liverange->hasStore) { - // update translation table - cemu_assert_debug(virtualReg2PhysReg[liverange->range->virtualRegister] != -1); - virtualReg2PhysReg[liverange->range->virtualRegister] = -1; - virt2PhysRegMap.erase(liverange->range->virtualRegister); - // store GPR - if (liverange->hasStore) - { - loadStoreList[storeLoadListLength].registerIndex = liverange->range->physicalRegister; - loadStoreList[storeLoadListLength].registerName = liverange->range->name; - storeLoadListLength++; - } - // remove entry - liveInfo.liveRangesCount--; - liveInfo.liveRangeList[f] = liveInfo.liveRangeList[liveInfo.liveRangesCount]; - f--; - } - else - { - cemu_assert_suspicious(); + loadStoreList[storeLoadListLength].registerIndex = liverange->range->physicalRegister; + loadStoreList[storeLoadListLength].registerName = liverange->range->name; + storeLoadListLength++; } } + cemu_assert_debug(livenessTimeline.activeRanges.empty()); if (storeLoadListLength > 0) { PPCRecRA_insertGPRStoreInstructions(imlSegment, imlSegment->imlList.size() - suffixInstructionCount, loadStoreList, storeLoadListLength); @@ -895,8 +940,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, { if (subrangeItr->start.index == RA_INTER_RANGE_END) { - liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; - liveInfo.liveRangesCount++; + livenessTimeline.AddActiveRange(subrangeItr); // load GPR if (subrangeItr->_noLoad == false) { @@ -918,20 +962,20 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, } } -void PPCRecRA_generateMoveInstructions(ppcImlGenContext_t* ppcImlGenContext) +void IMLRA_GenerateMoveInstructions(ppcImlGenContext_t* ppcImlGenContext) { for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) { IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s]; - PPCRecRA_generateSegmentInstructions(ppcImlGenContext, imlSegment); + IMLRA_GenerateSegmentInstructions(ppcImlGenContext, imlSegment); } } -void PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext_t* ppcImlGenContext); +void IMLRA_CalculateLivenessRanges(ppcImlGenContext_t* ppcImlGenContext); +void IMLRA_ProcessFlowAndCalculateLivenessRanges(ppcImlGenContext_t* ppcImlGenContext); +void IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompilerImm_reshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext) +void IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext) { // insert empty segments after every non-taken branch if the linked segment has more than one input // this gives the register allocator more room to create efficient spill code @@ -986,16 +1030,16 @@ void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext IMLRegisterAllocatorContext ctx; ctx.raParam = &raParam; - PPCRecompilerImm_reshapeForRegisterAllocation(ppcImlGenContext); + IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext); ppcImlGenContext->raInfo.list_ranges = std::vector(); - PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext); - PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext); - PPCRecRA_assignRegisters(ctx, ppcImlGenContext); + IMLRA_CalculateLivenessRanges(ppcImlGenContext); + IMLRA_ProcessFlowAndCalculateLivenessRanges(ppcImlGenContext); + IMLRA_AssignRegisters(ctx, ppcImlGenContext); - PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext); - PPCRecRA_generateMoveInstructions(ppcImlGenContext); + IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext); + IMLRA_GenerateMoveInstructions(ppcImlGenContext); PPCRecRA_deleteAllRanges(ppcImlGenContext); } @@ -1028,7 +1072,7 @@ void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, } } -void PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext) +void IMLRA_CalculateLivenessRanges(ppcImlGenContext_t* ppcImlGenContext) { // for each register calculate min/max index of usage range within each segment for (IMLSegment* segIt : ppcImlGenContext->segmentList2) @@ -1338,7 +1382,7 @@ void PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext_t* ppcImlGenContext) } } -void PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext) +void IMLRA_ProcessFlowAndCalculateLivenessRanges(ppcImlGenContext_t* ppcImlGenContext) { // merge close ranges PPCRecRA_mergeCloseRangesV2(ppcImlGenContext); @@ -1377,7 +1421,7 @@ void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessSubrange_t* subrange) subrange->_noLoad = true; } -void PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext_t* ppcImlGenContext) +void IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext_t* ppcImlGenContext) { // this function is called after _assignRegisters(), which means that all ranges are already final and wont change anymore // first do a per-subrange pass diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp index 14159c77..071a1d5e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp @@ -225,9 +225,9 @@ void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange) {} #endif // split subrange at the given index -// After the split there will be two ranges/subranges: -// head -> subrange is shortned to end at splitIndex -// tail -> a new subrange that reaches from splitIndex to the end of the original subrange +// After the split there will be two ranges and subranges: +// head -> subrange is shortened to end at splitIndex (exclusive) +// tail -> a new subrange that ranges from splitIndex (inclusive) to the end of the original subrange // if head has a physical register assigned it will not carry over to tail // The return value is the tail subrange // If trimToHole is true, the end of the head subrange and the start of the tail subrange will be moved to fit the locations @@ -236,7 +236,9 @@ raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenC { // validation #ifdef CEMU_DEBUG_ASSERT - if (subrange->end.index == RA_INTER_RANGE_END || subrange->end.index == RA_INTER_RANGE_START) + //if (subrange->end.index == RA_INTER_RANGE_END || subrange->end.index == RA_INTER_RANGE_START) + // assert_dbg(); + if (subrange->start.index == RA_INTER_RANGE_END || subrange->end.index == RA_INTER_RANGE_START) assert_dbg(); if (subrange->start.index >= splitIndex) assert_dbg(); @@ -282,6 +284,11 @@ raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenC tailSubrange->start.index = tailSubrange->list_locations.front().index; } } + else + { + // set head range to end at split index + subrange->end.index = splitIndex; + } return tailSubrange; } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 8d8a2cb5..3cd1a1c6 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -1023,7 +1023,7 @@ bool PPCRecompilerImlGen_RLWNM(ppcImlGenContext_t* ppcImlGenContext, uint32 opco bool PPCRecompilerImlGen_SRAW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { // unlike SRAWI, for SRAW the shift range is 0-63 (masked to 6 bits) - // but only shifts up to register bitwidth-1 are well defined in IML so this requires special handling for shifts >= 32 + // but only shifts up to register bitwidth minus one are well defined in IML so this requires special handling for shifts >= 32 sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); @@ -1093,9 +1093,9 @@ bool PPCRecompilerImlGen_SLW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode int rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg registerRS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg registerRB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg registerRA = _GetRegGPR(ppcImlGenContext, rA); ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SLW, registerRA, registerRS, registerRB); if ((opcode & PPC_OPC_RC)) PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); @@ -1106,13 +1106,12 @@ bool PPCRecompilerImlGen_SRW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { int rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SRW, registerRA, registerRS, registerRB); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SRW, regA, regS, regB); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -1120,12 +1119,11 @@ bool PPCRecompilerImlGen_EXTSH(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { int rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - PPC_ASSERT(rB==0); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN_S16_TO_S32, registerRA, registerRS); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN_S16_TO_S32, regA, regS); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -1133,11 +1131,11 @@ bool PPCRecompilerImlGen_EXTSB(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN_S8_TO_S32, registerRA, registerRS); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN_S8_TO_S32, regA, regS); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -1145,12 +1143,11 @@ bool PPCRecompilerImlGen_CNTLZW(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - PPC_ASSERT(rB==0); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_CNTLZW, registerRA, registerRS); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_CNTLZW, regA, regS); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0(ppcImlGenContext, registerRA); + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -1158,13 +1155,11 @@ bool PPCRecompilerImlGen_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - PPC_ASSERT(rB == 0); - - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - uint32 registerRD = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NEG, registerRD, registerRA); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NEG, regD, regA); if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0(ppcImlGenContext, registerRD); + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; }