mirror of https://github.com/cemu-project/Cemu.git
PPCRec: Refactor read/write access tracking for liveness ranges
This commit is contained in:
parent
96d7c754f9
commit
636b63fda2
|
@ -408,6 +408,27 @@ struct IMLUsedRegisters
|
|||
F(readGPR3);
|
||||
}
|
||||
|
||||
// temporary (for FPRs)
|
||||
template<typename Fn>
|
||||
void ForEachWrittenFPR(Fn F) const
|
||||
{
|
||||
if (writtenFPR1.IsValid())
|
||||
F(writtenFPR1);
|
||||
}
|
||||
|
||||
template<typename Fn>
|
||||
void ForEachReadFPR(Fn F) const
|
||||
{
|
||||
if (readFPR1.IsValid())
|
||||
F(readFPR1);
|
||||
if (readFPR2.IsValid())
|
||||
F(readFPR2);
|
||||
if (readFPR3.IsValid())
|
||||
F(readFPR3);
|
||||
if (readFPR4.IsValid())
|
||||
F(readFPR4);
|
||||
}
|
||||
|
||||
template<typename Fn>
|
||||
void ForEachAccessedGPR(Fn F) const
|
||||
{
|
||||
|
|
|
@ -168,7 +168,8 @@ static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRe
|
|||
{
|
||||
IMLPhysRegisterSet ps;
|
||||
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_EAX);
|
||||
fixedRegs.listInput.emplace_back(instruction->op_atomic_compare_store.regBoolOut, ps);
|
||||
fixedRegs.listInput.emplace_back(IMLREG_INVALID, ps); // none of the inputs may use EAX
|
||||
fixedRegs.listOutput.emplace_back(instruction->op_atomic_compare_store.regBoolOut, ps); // but we output to EAX
|
||||
}
|
||||
else if (instruction->type == PPCREC_IML_TYPE_CALL_IMM)
|
||||
{
|
||||
|
@ -262,30 +263,14 @@ void PPCRecRA_identifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* iml
|
|||
|
||||
sint32 PPCRecRA_countDistanceUntilNextUse2(raLivenessRange* subrange, raInstructionEdge startPosition)
|
||||
{
|
||||
sint32 startInstructionIndex;
|
||||
if (startPosition.ConnectsToPreviousSegment())
|
||||
startInstructionIndex = 0;
|
||||
else
|
||||
startInstructionIndex = startPosition.GetInstructionIndex();
|
||||
for (sint32 i = 0; i < subrange->list_locations.size(); i++)
|
||||
for (sint32 i = 0; i < subrange->list_accessLocations.size(); i++)
|
||||
{
|
||||
if (subrange->list_locations[i].index >= startInstructionIndex)
|
||||
if (subrange->list_accessLocations[i].pos >= startPosition)
|
||||
{
|
||||
sint32 preciseIndex = subrange->list_locations[i].index * 2;
|
||||
cemu_assert_debug(subrange->list_locations[i].isRead || subrange->list_locations[i].isWrite); // locations must have any access
|
||||
// check read edge
|
||||
if (subrange->list_locations[i].isRead)
|
||||
{
|
||||
if (preciseIndex >= startPosition.GetRaw())
|
||||
return preciseIndex - startPosition.GetRaw();
|
||||
}
|
||||
// check write edge
|
||||
if (subrange->list_locations[i].isWrite)
|
||||
{
|
||||
preciseIndex++;
|
||||
if (preciseIndex >= startPosition.GetRaw())
|
||||
return preciseIndex - startPosition.GetRaw();
|
||||
}
|
||||
auto& it = subrange->list_accessLocations[i];
|
||||
cemu_assert_debug(it.IsRead() != it.IsWrite()); // an access location can be either read or write
|
||||
cemu_assert_debug(!startPosition.ConnectsToPreviousSegment() && !startPosition.ConnectsToNextSegment());
|
||||
return it.pos.GetRaw() - startPosition.GetRaw();
|
||||
}
|
||||
}
|
||||
cemu_assert_debug(subrange->imlSegment->imlList.size() < 10000);
|
||||
|
@ -549,9 +534,7 @@ struct raFixedRegRequirementWithVGPR
|
|||
std::vector<raFixedRegRequirementWithVGPR> IMLRA_BuildSegmentInstructionFixedRegList(IMLSegment* imlSegment)
|
||||
{
|
||||
std::vector<raFixedRegRequirementWithVGPR> frrList;
|
||||
|
||||
size_t index = 0;
|
||||
IMLUsedRegisters gprTracking;
|
||||
while (index < imlSegment->imlList.size())
|
||||
{
|
||||
IMLFixedRegisters fixedRegs;
|
||||
|
@ -560,7 +543,7 @@ std::vector<raFixedRegRequirementWithVGPR> IMLRA_BuildSegmentInstructionFixedReg
|
|||
pos.Set(index, true);
|
||||
for (auto& fixedRegAccess : fixedRegs.listInput)
|
||||
{
|
||||
frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.GetRegID());
|
||||
frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.IsValid() ? fixedRegAccess.reg.GetRegID() : IMLRegID_INVALID);
|
||||
}
|
||||
pos = pos + 1;
|
||||
for (auto& fixedRegAccess : fixedRegs.listOutput)
|
||||
|
@ -1468,6 +1451,19 @@ raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx
|
|||
return subrange;
|
||||
}
|
||||
|
||||
void IMLRA_UpdateOrAddSubrangeLocation(raLivenessRange* subrange, raInstructionEdge pos)
|
||||
{
|
||||
if (subrange->list_accessLocations.empty())
|
||||
{
|
||||
subrange->list_accessLocations.emplace_back(pos);
|
||||
return;
|
||||
}
|
||||
if(subrange->list_accessLocations.back().pos == pos)
|
||||
return;
|
||||
cemu_assert_debug(subrange->list_accessLocations.back().pos < pos);
|
||||
subrange->list_accessLocations.emplace_back(pos);
|
||||
}
|
||||
|
||||
// take abstract range data and create LivenessRanges
|
||||
void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
|
||||
{
|
||||
|
@ -1500,12 +1496,27 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML
|
|||
while (index < imlSegment->imlList.size())
|
||||
{
|
||||
imlSegment->imlList[index].CheckRegisterUsage(&gprTracking);
|
||||
gprTracking.ForEachAccessedGPR([&](IMLReg gprReg, bool isWritten) {
|
||||
raInstructionEdge pos((sint32)index, true);
|
||||
gprTracking.ForEachReadGPR([&](IMLReg gprReg) {
|
||||
IMLRegID gprId = gprReg.GetRegID();
|
||||
raLivenessRange* subrange = regToSubrange.find(gprId)->second;
|
||||
PPCRecRA_updateOrAddSubrangeLocation(subrange, index, !isWritten, isWritten);
|
||||
cemu_assert_debug(!subrange->interval2.start.IsInstructionIndex() || subrange->interval2.start.GetInstructionIndex() <= index);
|
||||
cemu_assert_debug(!subrange->interval2.end.IsInstructionIndex() || subrange->interval2.end.GetInstructionIndex() >= index);
|
||||
IMLRA_UpdateOrAddSubrangeLocation(subrange, pos);
|
||||
});
|
||||
gprTracking.ForEachReadFPR([&](IMLReg gprReg) {
|
||||
IMLRegID gprId = gprReg.GetRegID();
|
||||
raLivenessRange* subrange = regToSubrange.find(gprId)->second;
|
||||
IMLRA_UpdateOrAddSubrangeLocation(subrange, pos);
|
||||
});
|
||||
pos = {(sint32)index, false};
|
||||
gprTracking.ForEachWrittenGPR([&](IMLReg gprReg) {
|
||||
IMLRegID gprId = gprReg.GetRegID();
|
||||
raLivenessRange* subrange = regToSubrange.find(gprId)->second;
|
||||
IMLRA_UpdateOrAddSubrangeLocation(subrange, pos);
|
||||
});
|
||||
gprTracking.ForEachWrittenFPR([&](IMLReg gprReg) {
|
||||
IMLRegID gprId = gprReg.GetRegID();
|
||||
raLivenessRange* subrange = regToSubrange.find(gprId)->second;
|
||||
IMLRA_UpdateOrAddSubrangeLocation(subrange, pos);
|
||||
});
|
||||
// check fixed register requirements
|
||||
IMLFixedRegisters fixedRegs;
|
||||
|
@ -1754,13 +1765,13 @@ void IMLRA_AnalyzeSubrangeDataDependency(raLivenessRange* subrange)
|
|||
bool isRead = false;
|
||||
bool isWritten = false;
|
||||
bool isOverwritten = false;
|
||||
for (auto& location : subrange->list_locations)
|
||||
for (auto& location : subrange->list_accessLocations)
|
||||
{
|
||||
if (location.isRead)
|
||||
if (location.IsRead())
|
||||
{
|
||||
isRead = true;
|
||||
}
|
||||
if (location.isWrite)
|
||||
if (location.IsWrite())
|
||||
{
|
||||
if (isRead == false)
|
||||
isOverwritten = true;
|
||||
|
|
|
@ -207,7 +207,7 @@ raLivenessRange* PPCRecRA_createSubrange2(ppcImlGenContext_t* ppcImlGenContext,
|
|||
{
|
||||
raLivenessRange* range = memPool_livenessSubrange.acquireObj();
|
||||
range->previousRanges.clear();
|
||||
range->list_locations.clear();
|
||||
range->list_accessLocations.clear();
|
||||
range->list_fixedRegRequirements.clear();
|
||||
range->imlSegment = imlSegment;
|
||||
|
||||
|
@ -259,39 +259,16 @@ void _unlinkSubrange(raLivenessRange* subrange)
|
|||
void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange)
|
||||
{
|
||||
_unlinkSubrange(subrange);
|
||||
//subrange->range->list_subranges.erase(std::find(subrange->range->list_subranges.begin(), subrange->range->list_subranges.end(), subrange));
|
||||
subrange->list_locations.clear();
|
||||
|
||||
//PPCRecompilerIml_removeSegmentPoint(&subrange->interval.start);
|
||||
//PPCRecompilerIml_removeSegmentPoint(&subrange->interval.end);
|
||||
subrange->list_accessLocations.clear();
|
||||
subrange->list_fixedRegRequirements.clear();
|
||||
memPool_livenessSubrange.releaseObj(subrange);
|
||||
}
|
||||
|
||||
// leaves range and linked ranges in invalid state. Only use at final clean up when no range is going to be accessed anymore
|
||||
void _PPCRecRA_deleteSubrangeNoUnlink(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange)
|
||||
{
|
||||
_unlinkSubrange(subrange);
|
||||
//PPCRecompilerIml_removeSegmentPoint(&subrange->interval.start);
|
||||
//PPCRecompilerIml_removeSegmentPoint(&subrange->interval.end);
|
||||
memPool_livenessSubrange.releaseObj(subrange);
|
||||
|
||||
// #ifdef CEMU_DEBUG_ASSERT
|
||||
// // DEBUG BEGIN
|
||||
// subrange->lastIterationIndex = 0xFFFFFFFE;
|
||||
// subrange->subrangeBranchTaken = (raLivenessRange*)(uintptr_t)-1;
|
||||
// subrange->subrangeBranchNotTaken = (raLivenessRange*)(uintptr_t)-1;
|
||||
//
|
||||
// // DEBUG END
|
||||
// #endif
|
||||
}
|
||||
|
||||
void PPCRecRA_deleteSubrangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange)
|
||||
{
|
||||
auto clusterRanges = subrange->GetAllSubrangesInCluster();
|
||||
for (auto& subrange : clusterRanges)
|
||||
{
|
||||
_PPCRecRA_deleteSubrangeNoUnlink(ppcImlGenContext, subrange);
|
||||
}
|
||||
PPCRecRA_deleteSubrange(ppcImlGenContext, subrange);
|
||||
}
|
||||
|
||||
void IMLRA_DeleteAllRanges(ppcImlGenContext_t* ppcImlGenContext)
|
||||
|
@ -300,9 +277,7 @@ void IMLRA_DeleteAllRanges(ppcImlGenContext_t* ppcImlGenContext)
|
|||
{
|
||||
raLivenessRange* cur;
|
||||
while(cur = seg->raInfo.linkedList_allSubranges)
|
||||
{
|
||||
_PPCRecRA_deleteSubrangeNoUnlink(ppcImlGenContext, cur);
|
||||
}
|
||||
PPCRecRA_deleteSubrange(ppcImlGenContext, cur);
|
||||
seg->raInfo.linkedList_allSubranges = nullptr;
|
||||
seg->raInfo.linkedList_perVirtualRegister.clear();
|
||||
}
|
||||
|
@ -322,7 +297,6 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan
|
|||
if (subrange == absorbedSubrange)
|
||||
assert_dbg();
|
||||
#endif
|
||||
|
||||
// update references
|
||||
subrange->subrangeBranchTaken = absorbedSubrange->subrangeBranchTaken;
|
||||
subrange->subrangeBranchNotTaken = absorbedSubrange->subrangeBranchNotTaken;
|
||||
|
@ -334,22 +308,9 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan
|
|||
*std::find(subrange->subrangeBranchNotTaken->previousRanges.begin(), subrange->subrangeBranchNotTaken->previousRanges.end(), absorbedSubrange) = subrange;
|
||||
|
||||
// merge usage locations
|
||||
// at the merge point both ranges might track the same instruction, we handle this by first merging this duplicate location
|
||||
if(subrange && absorbedSubrange && !subrange->list_locations.empty() && !absorbedSubrange->list_locations.empty())
|
||||
{
|
||||
if(subrange->list_locations.back().index == absorbedSubrange->list_locations.front().index)
|
||||
{
|
||||
subrange->list_locations.back().isRead |= absorbedSubrange->list_locations.front().isRead;
|
||||
subrange->list_locations.back().isWrite |= absorbedSubrange->list_locations.front().isWrite;
|
||||
absorbedSubrange->list_locations.erase(absorbedSubrange->list_locations.begin()); // inefficient
|
||||
}
|
||||
}
|
||||
for (auto& location : absorbedSubrange->list_locations)
|
||||
{
|
||||
cemu_assert_debug(subrange->list_locations.empty() || (subrange->list_locations.back().index < location.index)); // todo - sometimes a subrange can contain the same instruction at the merge point if they are covering half of the instruction edge
|
||||
subrange->list_locations.push_back(location);
|
||||
}
|
||||
absorbedSubrange->list_locations.clear();
|
||||
for (auto& accessLoc : absorbedSubrange->list_accessLocations)
|
||||
subrange->list_accessLocations.push_back(accessLoc);
|
||||
absorbedSubrange->list_accessLocations.clear();
|
||||
// merge fixed reg locations
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if(!subrange->list_fixedRegRequirements.empty() && !absorbedSubrange->list_fixedRegRequirements.empty())
|
||||
|
@ -358,9 +319,8 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan
|
|||
}
|
||||
#endif
|
||||
for (auto& fixedReg : absorbedSubrange->list_fixedRegRequirements)
|
||||
{
|
||||
subrange->list_fixedRegRequirements.push_back(fixedReg);
|
||||
}
|
||||
absorbedSubrange->list_fixedRegRequirements.clear();
|
||||
|
||||
subrange->interval2.end = absorbedSubrange->interval2.end;
|
||||
|
||||
|
@ -376,18 +336,29 @@ void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange
|
|||
auto clusterRanges = originRange->GetAllSubrangesInCluster();
|
||||
for (auto& subrange : clusterRanges)
|
||||
{
|
||||
if (subrange->list_locations.empty())
|
||||
if (subrange->list_accessLocations.empty())
|
||||
continue;
|
||||
raInterval interval;
|
||||
interval.SetInterval(subrange->list_locations.front().index, true, subrange->list_locations.back().index, true);
|
||||
interval.SetInterval(subrange->list_accessLocations.front().pos, subrange->list_accessLocations.back().pos);
|
||||
raLivenessRange* newSubrange = PPCRecRA_createSubrange2(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), interval.start, interval.end);
|
||||
// copy locations and fixed reg indices
|
||||
newSubrange->list_locations = subrange->list_locations;
|
||||
newSubrange->list_accessLocations = subrange->list_accessLocations;
|
||||
newSubrange->list_fixedRegRequirements = subrange->list_fixedRegRequirements;
|
||||
if(originRange->HasPhysicalRegister())
|
||||
{
|
||||
cemu_assert_debug(subrange->list_fixedRegRequirements.empty()); // avoid unassigning a register from a range with a fixed register requirement
|
||||
}
|
||||
// validate
|
||||
if(!newSubrange->list_accessLocations.empty())
|
||||
{
|
||||
cemu_assert_debug(newSubrange->list_accessLocations.front().pos >= newSubrange->interval2.start);
|
||||
cemu_assert_debug(newSubrange->list_accessLocations.back().pos <= newSubrange->interval2.end);
|
||||
}
|
||||
if(!newSubrange->list_fixedRegRequirements.empty())
|
||||
{
|
||||
cemu_assert_debug(newSubrange->list_fixedRegRequirements.front().pos >= newSubrange->interval2.start); // fixed register requirements outside of the actual access range probably means there is a mistake in GetInstructionFixedRegisters()
|
||||
cemu_assert_debug(newSubrange->list_fixedRegRequirements.back().pos <= newSubrange->interval2.end);
|
||||
}
|
||||
}
|
||||
// remove subranges
|
||||
PPCRecRA_deleteSubrangeCluster(ppcImlGenContext, originRange);
|
||||
|
@ -411,10 +382,10 @@ void PPCRecRA_debugValidateSubrange(raLivenessRange* range)
|
|||
cemu_assert_debug(range->interval2.start.ConnectsToPreviousSegment());
|
||||
}
|
||||
// validate locations
|
||||
if (!range->list_locations.empty())
|
||||
if (!range->list_accessLocations.empty())
|
||||
{
|
||||
cemu_assert_debug(range->list_locations.front().index >= range->interval2.start.GetInstructionIndexEx());
|
||||
cemu_assert_debug(range->list_locations.back().index <= range->interval2.end.GetInstructionIndexEx());
|
||||
cemu_assert_debug(range->list_accessLocations.front().pos >= range->interval2.start);
|
||||
cemu_assert_debug(range->list_accessLocations.back().pos <= range->interval2.end);
|
||||
}
|
||||
// validate fixed reg requirements
|
||||
if (!range->list_fixedRegRequirements.empty())
|
||||
|
@ -430,41 +401,11 @@ void PPCRecRA_debugValidateSubrange(raLivenessRange* range)
|
|||
void PPCRecRA_debugValidateSubrange(raLivenessRange* range) {}
|
||||
#endif
|
||||
|
||||
// since locations are per-instruction, but intervals are per-edge, it's possible that locations track reads/writes outside of the range
|
||||
// this function will remove any outside read/write locations
|
||||
void IMLRA_FixLocations(raLivenessRange* range)
|
||||
{
|
||||
if(range->list_locations.empty())
|
||||
return;
|
||||
if(range->interval2.start.IsInstructionIndex() && range->interval2.start.GetInstructionIndex() == range->list_locations.front().index)
|
||||
{
|
||||
auto& location = range->list_locations.front();
|
||||
if(range->interval2.start.IsOnOutputEdge())
|
||||
{
|
||||
location.isRead = false;
|
||||
if(!location.isRead && !location.isWrite)
|
||||
range->list_locations.erase(range->list_locations.begin());
|
||||
}
|
||||
}
|
||||
if(range->list_locations.empty())
|
||||
return;
|
||||
if(range->interval2.end.IsInstructionIndex() && range->interval2.end.GetInstructionIndex() == range->list_locations.back().index)
|
||||
{
|
||||
auto& location = range->list_locations.back();
|
||||
if(range->interval2.end.IsOnInputEdge())
|
||||
{
|
||||
location.isWrite = false;
|
||||
if(!location.isRead && !location.isWrite)
|
||||
range->list_locations.pop_back();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// trim start and end of range to match first and last read/write locations
|
||||
// does not trim start/endpoints which extend into the next/previous segment
|
||||
void IMLRA_TrimRangeToUse(raLivenessRange* range)
|
||||
{
|
||||
if(range->list_locations.empty())
|
||||
if(range->list_accessLocations.empty())
|
||||
{
|
||||
// special case where we trim ranges extending from other segments to a single instruction edge
|
||||
cemu_assert_debug(!range->interval2.start.IsInstructionIndex() || !range->interval2.end.IsInstructionIndex());
|
||||
|
@ -474,25 +415,18 @@ void IMLRA_TrimRangeToUse(raLivenessRange* range)
|
|||
range->interval2.end = range->interval2.start;
|
||||
return;
|
||||
}
|
||||
// trim start and end
|
||||
raInterval prevInterval = range->interval2;
|
||||
// trim start
|
||||
if(range->interval2.start.IsInstructionIndex())
|
||||
{
|
||||
bool isInputEdge = range->list_locations.front().isRead;
|
||||
range->interval2.start.Set(range->list_locations.front().index, isInputEdge);
|
||||
}
|
||||
// trim end
|
||||
range->interval2.start = range->list_accessLocations.front().pos;
|
||||
if(range->interval2.end.IsInstructionIndex())
|
||||
{
|
||||
bool isOutputEdge = range->list_locations.back().isWrite;
|
||||
range->interval2.end.Set(range->list_locations.back().index, !isOutputEdge);
|
||||
}
|
||||
range->interval2.end = range->list_accessLocations.back().pos;
|
||||
// extra checks
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
cemu_assert_debug(range->interval2.start <= range->interval2.end);
|
||||
for(auto& loc : range->list_locations)
|
||||
for(auto& loc : range->list_accessLocations)
|
||||
{
|
||||
cemu_assert_debug(range->interval2.ContainsInstructionIndex(loc.index));
|
||||
cemu_assert_debug(range->interval2.ContainsEdge(loc.pos));
|
||||
}
|
||||
cemu_assert_debug(prevInterval.ContainsWholeInterval(range->interval2));
|
||||
#endif
|
||||
|
@ -532,33 +466,25 @@ raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenConte
|
|||
*std::find(tailSubrange->subrangeBranchNotTaken->previousRanges.begin(), tailSubrange->subrangeBranchNotTaken->previousRanges.end(), subrange) = tailSubrange;
|
||||
// we assume that list_locations is ordered by instruction index and contains no duplicate indices, so lets check that here just in case
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if(!subrange->list_locations.empty())
|
||||
if(subrange->list_accessLocations.size() > 1)
|
||||
{
|
||||
sint32 curIdx = -1;
|
||||
for(auto& location : subrange->list_locations)
|
||||
for(size_t i=0; i<subrange->list_accessLocations.size()-1; i++)
|
||||
{
|
||||
cemu_assert_debug(curIdx < location.index);
|
||||
curIdx = location.index;
|
||||
cemu_assert_debug(subrange->list_accessLocations[i].pos < subrange->list_accessLocations[i+1].pos);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// split locations
|
||||
// since there are 2 edges per instruction and locations track both via a single index, locations on the split point might need to be copied into both ranges
|
||||
for (auto& location : subrange->list_locations)
|
||||
{
|
||||
if(tailInterval.ContainsInstructionIndex(location.index))
|
||||
tailSubrange->list_locations.push_back(location);
|
||||
}
|
||||
// remove tail locations from head
|
||||
for (sint32 i = 0; i < subrange->list_locations.size(); i++)
|
||||
{
|
||||
raLivenessLocation_t* location = subrange->list_locations.data() + i;
|
||||
if (!headInterval.ContainsInstructionIndex(location->index))
|
||||
{
|
||||
subrange->list_locations.resize(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
auto it = std::lower_bound(
|
||||
subrange->list_accessLocations.begin(), subrange->list_accessLocations.end(), splitPosition,
|
||||
[](const raAccessLocation& accessLoc, raInstructionEdge value) { return accessLoc.pos < value; }
|
||||
);
|
||||
size_t originalCount = subrange->list_accessLocations.size();
|
||||
tailSubrange->list_accessLocations.insert(tailSubrange->list_accessLocations.end(), it, subrange->list_accessLocations.end());
|
||||
subrange->list_accessLocations.erase(it, subrange->list_accessLocations.end());
|
||||
cemu_assert_debug(subrange->list_accessLocations.empty() || subrange->list_accessLocations.back().pos < splitPosition);
|
||||
cemu_assert_debug(tailSubrange->list_accessLocations.empty() || tailSubrange->list_accessLocations.front().pos >= splitPosition);
|
||||
cemu_assert_debug(subrange->list_accessLocations.size() + tailSubrange->list_accessLocations.size() == originalCount);
|
||||
// split fixed reg requirements
|
||||
for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++)
|
||||
{
|
||||
|
@ -581,15 +507,10 @@ raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenConte
|
|||
// adjust intervals
|
||||
subrange->interval2 = headInterval;
|
||||
tailSubrange->interval2 = tailInterval;
|
||||
// fix locations to only include read/write edges within the range
|
||||
if(subrange)
|
||||
IMLRA_FixLocations(subrange);
|
||||
if(tailSubrange)
|
||||
IMLRA_FixLocations(tailSubrange);
|
||||
// trim to hole
|
||||
if(trimToHole)
|
||||
{
|
||||
if(subrange->list_locations.empty() && (subrange->interval2.start.IsInstructionIndex() && subrange->interval2.end.IsInstructionIndex()))
|
||||
if(subrange->list_accessLocations.empty() && (subrange->interval2.start.IsInstructionIndex() && subrange->interval2.end.IsInstructionIndex()))
|
||||
{
|
||||
PPCRecRA_deleteSubrange(ppcImlGenContext, subrange);
|
||||
subrange = nullptr;
|
||||
|
@ -598,7 +519,7 @@ raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenConte
|
|||
{
|
||||
IMLRA_TrimRangeToUse(subrange);
|
||||
}
|
||||
if(tailSubrange->list_locations.empty() && (tailSubrange->interval2.start.IsInstructionIndex() && tailSubrange->interval2.end.IsInstructionIndex()))
|
||||
if(tailSubrange->list_accessLocations.empty() && (tailSubrange->interval2.start.IsInstructionIndex() && tailSubrange->interval2.end.IsInstructionIndex()))
|
||||
{
|
||||
PPCRecRA_deleteSubrange(ppcImlGenContext, tailSubrange);
|
||||
tailSubrange = nullptr;
|
||||
|
@ -622,26 +543,6 @@ raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenConte
|
|||
return tailSubrange;
|
||||
}
|
||||
|
||||
void PPCRecRA_updateOrAddSubrangeLocation(raLivenessRange* subrange, sint32 index, bool isRead, bool isWrite)
|
||||
{
|
||||
if (subrange->list_locations.empty())
|
||||
{
|
||||
subrange->list_locations.emplace_back(index, isRead, isWrite);
|
||||
return;
|
||||
}
|
||||
raLivenessLocation_t* lastLocation = subrange->list_locations.data() + (subrange->list_locations.size() - 1);
|
||||
cemu_assert_debug(lastLocation->index <= index);
|
||||
if (lastLocation->index == index)
|
||||
{
|
||||
// update
|
||||
lastLocation->isRead = lastLocation->isRead || isRead;
|
||||
lastLocation->isWrite = lastLocation->isWrite || isWrite;
|
||||
return;
|
||||
}
|
||||
// add new
|
||||
subrange->list_locations.emplace_back(index, isRead, isWrite);
|
||||
}
|
||||
|
||||
sint32 IMLRA_GetSegmentReadWriteCost(IMLSegment* imlSegment)
|
||||
{
|
||||
sint32 v = imlSegment->loopDepth + 1;
|
||||
|
@ -649,40 +550,6 @@ sint32 IMLRA_GetSegmentReadWriteCost(IMLSegment* imlSegment)
|
|||
return v*v; // 25, 100, 225, 400
|
||||
}
|
||||
|
||||
// calculate cost of entire range cluster
|
||||
sint32 PPCRecRARange_estimateTotalCost(std::span<raLivenessRange*> ranges)
|
||||
{
|
||||
sint32 cost = 0;
|
||||
|
||||
// todo - this algorithm isn't accurate. If we have 10 parallel branches with a load each then the actual cost is still only that of one branch (plus minimal extra cost for generating more code).
|
||||
|
||||
// currently we calculate the cost based on the most expensive entry/exit point
|
||||
|
||||
sint32 mostExpensiveRead = 0;
|
||||
sint32 mostExpensiveWrite = 0;
|
||||
sint32 readCount = 0;
|
||||
sint32 writeCount = 0;
|
||||
|
||||
for (auto& subrange : ranges)
|
||||
{
|
||||
if (!subrange->interval2.ExtendsPreviousSegment())
|
||||
{
|
||||
//cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment);
|
||||
mostExpensiveRead = std::max(mostExpensiveRead, IMLRA_GetSegmentReadWriteCost(subrange->imlSegment));
|
||||
readCount++;
|
||||
}
|
||||
if (!subrange->interval2.ExtendsIntoNextSegment())
|
||||
{
|
||||
//cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment);
|
||||
mostExpensiveWrite = std::max(mostExpensiveWrite, IMLRA_GetSegmentReadWriteCost(subrange->imlSegment));
|
||||
writeCount++;
|
||||
}
|
||||
}
|
||||
cost = mostExpensiveRead + mostExpensiveWrite;
|
||||
cost = cost + (readCount + writeCount) / 10;
|
||||
return cost;
|
||||
}
|
||||
|
||||
// calculate additional cost of range that it would have after calling _ExplodeRange() on it
|
||||
sint32 IMLRA_CalculateAdditionalCostOfRangeExplode(raLivenessRange* subrange)
|
||||
{
|
||||
|
@ -690,18 +557,19 @@ sint32 IMLRA_CalculateAdditionalCostOfRangeExplode(raLivenessRange* subrange)
|
|||
sint32 cost = 0;//-PPCRecRARange_estimateTotalCost(ranges);
|
||||
for (auto& subrange : ranges)
|
||||
{
|
||||
if (subrange->list_locations.empty())
|
||||
if (subrange->list_accessLocations.empty())
|
||||
continue; // this range would be deleted and thus has no cost
|
||||
sint32 segmentLoadStoreCost = IMLRA_GetSegmentReadWriteCost(subrange->imlSegment);
|
||||
bool hasAdditionalLoad = subrange->interval2.ExtendsPreviousSegment();
|
||||
bool hasAdditionalStore = subrange->interval2.ExtendsIntoNextSegment();
|
||||
if(hasAdditionalLoad && !subrange->list_locations.front().isRead && subrange->list_locations.front().isWrite) // if written before read, then a load isn't necessary
|
||||
if(hasAdditionalLoad && subrange->list_accessLocations.front().IsWrite()) // if written before read then a load isn't necessary
|
||||
{
|
||||
cemu_assert_debug(!subrange->list_accessLocations.front().IsRead());
|
||||
cost += segmentLoadStoreCost;
|
||||
}
|
||||
if(hasAdditionalStore)
|
||||
{
|
||||
bool hasWrite = std::find_if(subrange->list_locations.begin(), subrange->list_locations.end(), [](const raLivenessLocation_t& loc) { return loc.isWrite; }) != subrange->list_locations.end();
|
||||
bool hasWrite = std::find_if(subrange->list_accessLocations.begin(), subrange->list_accessLocations.end(), [](const raAccessLocation& loc) { return loc.IsWrite(); }) != subrange->list_accessLocations.end();
|
||||
if(!hasWrite) // ranges which don't modify their value do not need to be stored
|
||||
cost += segmentLoadStoreCost;
|
||||
}
|
||||
|
@ -721,60 +589,45 @@ sint32 IMLRA_CalculateAdditionalCostAfterSplit(raLivenessRange* subrange, raInst
|
|||
|
||||
sint32 cost = 0;
|
||||
// find split position in location list
|
||||
if (subrange->list_locations.empty())
|
||||
{
|
||||
assert_dbg(); // should not happen?
|
||||
if (subrange->list_accessLocations.empty())
|
||||
return 0;
|
||||
}
|
||||
sint32 splitInstructionIndex = splitPosition.GetInstructionIndex();
|
||||
if (splitInstructionIndex <= subrange->list_locations.front().index)
|
||||
if (splitPosition <= subrange->list_accessLocations.front().pos)
|
||||
return 0;
|
||||
if (splitInstructionIndex > subrange->list_locations.back().index)
|
||||
if (splitPosition > subrange->list_accessLocations.back().pos)
|
||||
return 0;
|
||||
|
||||
// this can be optimized, but we should change list_locations to track instruction edges instead of instruction indices
|
||||
std::vector<raLivenessLocation_t> headLocations;
|
||||
std::vector<raLivenessLocation_t> tailLocations;
|
||||
for (auto& location : subrange->list_locations)
|
||||
size_t firstTailLocationIndex = 0;
|
||||
for (size_t i = 0; i < subrange->list_accessLocations.size(); i++)
|
||||
{
|
||||
if(location.GetReadPos() < splitPosition || location.GetWritePos() < splitPosition)
|
||||
headLocations.push_back(location);
|
||||
if(location.GetReadPos() >= splitPosition || location.GetWritePos() >= splitPosition)
|
||||
tailLocations.push_back(location);
|
||||
}
|
||||
// fixup locations
|
||||
if(!headLocations.empty() && headLocations.back().GetWritePos() >= splitPosition)
|
||||
{
|
||||
headLocations.back().isWrite = false;
|
||||
if(!headLocations.back().isRead && !headLocations.back().isWrite)
|
||||
headLocations.pop_back();
|
||||
}
|
||||
if(!tailLocations.empty() && tailLocations.front().GetReadPos() < splitPosition)
|
||||
{
|
||||
tailLocations.front().isRead = false;
|
||||
if(!tailLocations.front().isRead && !tailLocations.front().isWrite)
|
||||
tailLocations.erase(tailLocations.begin());
|
||||
if (subrange->list_accessLocations[i].pos >= splitPosition)
|
||||
{
|
||||
firstTailLocationIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
std::span<raAccessLocation> headLocations{subrange->list_accessLocations.data(), firstTailLocationIndex};
|
||||
std::span<raAccessLocation> tailLocations{subrange->list_accessLocations.data() + firstTailLocationIndex, subrange->list_accessLocations.size() - firstTailLocationIndex};
|
||||
cemu_assert_debug(headLocations.empty() || headLocations.back().pos < splitPosition);
|
||||
cemu_assert_debug(tailLocations.empty() || tailLocations.front().pos >= splitPosition);
|
||||
|
||||
// based on
|
||||
sint32 segmentLoadStoreCost = IMLRA_GetSegmentReadWriteCost(subrange->imlSegment);
|
||||
|
||||
auto CalculateCostFromLocationRange = [segmentLoadStoreCost](const std::vector<raLivenessLocation_t>& locations, bool trackLoadCost = true, bool trackStoreCost = true) -> sint32
|
||||
auto CalculateCostFromLocationRange = [segmentLoadStoreCost](std::span<raAccessLocation> locations, bool trackLoadCost = true, bool trackStoreCost = true) -> sint32
|
||||
{
|
||||
if(locations.empty())
|
||||
return 0;
|
||||
sint32 cost = 0;
|
||||
if(locations.front().isRead && trackLoadCost)
|
||||
if(locations.front().IsRead() && trackLoadCost)
|
||||
cost += segmentLoadStoreCost; // not overwritten, so there is a load cost
|
||||
bool hasWrite = std::find_if(locations.begin(), locations.end(), [](const raLivenessLocation_t& loc) { return loc.isWrite; }) != locations.end();
|
||||
bool hasWrite = std::find_if(locations.begin(), locations.end(), [](const raAccessLocation& loc) { return loc.IsWrite(); }) != locations.end();
|
||||
if(hasWrite && trackStoreCost)
|
||||
cost += segmentLoadStoreCost; // modified, so there is a store cost
|
||||
return cost;
|
||||
};
|
||||
|
||||
sint32 baseCost = CalculateCostFromLocationRange(subrange->list_locations);
|
||||
sint32 baseCost = CalculateCostFromLocationRange(subrange->list_accessLocations);
|
||||
|
||||
bool tailOverwritesValue = !tailLocations.empty() && !tailLocations.front().isRead && tailLocations.front().isWrite;
|
||||
bool tailOverwritesValue = !tailLocations.empty() && !tailLocations.front().IsRead() && tailLocations.front().IsWrite();
|
||||
|
||||
sint32 newCost = CalculateCostFromLocationRange(headLocations) + CalculateCostFromLocationRange(tailLocations, !tailOverwritesValue, true);
|
||||
cemu_assert_debug(newCost >= baseCost);
|
||||
|
|
|
@ -155,26 +155,21 @@ private:
|
|||
|
||||
};
|
||||
|
||||
struct raLivenessLocation_t
|
||||
struct raAccessLocation
|
||||
{
|
||||
sint32 index;
|
||||
bool isRead;
|
||||
bool isWrite;
|
||||
raAccessLocation(raInstructionEdge pos) : pos(pos) {}
|
||||
|
||||
raLivenessLocation_t() = default;
|
||||
|
||||
raLivenessLocation_t(sint32 index, bool isRead, bool isWrite)
|
||||
: index(index), isRead(isRead), isWrite(isWrite) {};
|
||||
|
||||
raInstructionEdge GetReadPos()
|
||||
bool IsRead() const
|
||||
{
|
||||
return raInstructionEdge(index, true);
|
||||
return pos.IsOnInputEdge();
|
||||
}
|
||||
|
||||
raInstructionEdge GetWritePos()
|
||||
bool IsWrite() const
|
||||
{
|
||||
return raInstructionEdge(index, false);
|
||||
return pos.IsOnOutputEdge();
|
||||
}
|
||||
|
||||
raInstructionEdge pos;
|
||||
};
|
||||
|
||||
struct raInterval
|
||||
|
@ -321,7 +316,7 @@ struct raLivenessRange
|
|||
// processing
|
||||
uint32 lastIterationIndex;
|
||||
// instruction read/write locations
|
||||
std::vector<raLivenessLocation_t> list_locations;
|
||||
std::vector<raAccessLocation> list_accessLocations;
|
||||
// ordered list of all raInstructionEdge indices which require a fixed register
|
||||
std::vector<raFixedRegRequirement> list_fixedRegRequirements;
|
||||
// linked list (subranges with same GPR virtual register)
|
||||
|
@ -360,7 +355,6 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan
|
|||
|
||||
raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange*& subrange, raInstructionEdge splitPosition, bool trimToHole = false);
|
||||
|
||||
void PPCRecRA_updateOrAddSubrangeLocation(raLivenessRange* subrange, sint32 index, bool isRead, bool isWrite);
|
||||
void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange);
|
||||
|
||||
// cost estimation
|
||||
|
|
Loading…
Reference in New Issue