coreinit: Rework thread creation

New implementation is much closer to console behavior. For example we didn't align the stack which would cause crashes in the Miiverse applet
This commit is contained in:
Exzap 2024-05-04 07:05:59 +02:00
parent 041f29a914
commit a16c37f0c5
10 changed files with 297 additions and 117 deletions

View File

@ -187,7 +187,7 @@ int Latte_ThreadEntry()
rule.overwrite_settings.width >= 0 || rule.overwrite_settings.height >= 0 || rule.overwrite_settings.depth >= 0) rule.overwrite_settings.width >= 0 || rule.overwrite_settings.height >= 0 || rule.overwrite_settings.depth >= 0)
{ {
LatteGPUState.allowFramebufferSizeOptimization = false; LatteGPUState.allowFramebufferSizeOptimization = false;
cemuLog_log(LogType::Force, "Graphic pack {} prevents rendertarget size optimization.", pack->GetName()); cemuLog_log(LogType::Force, "Graphic pack \"{}\" prevents rendertarget size optimization. This warning can be ignored and is intended for graphic pack developers", pack->GetName());
break; break;
} }
} }

View File

@ -35,12 +35,12 @@
#include "Cafe/OS/libs/coreinit/coreinit_MEM_BlockHeap.h" #include "Cafe/OS/libs/coreinit/coreinit_MEM_BlockHeap.h"
#include "Cafe/OS/libs/coreinit/coreinit_MEM_ExpHeap.h" #include "Cafe/OS/libs/coreinit/coreinit_MEM_ExpHeap.h"
CoreinitSharedData* gCoreinitData = NULL; CoreinitSharedData* gCoreinitData = nullptr;
sint32 ScoreStackTrace(OSThread_t* thread, MPTR sp) sint32 ScoreStackTrace(OSThread_t* thread, MPTR sp)
{ {
uint32 stackMinAddr = _swapEndianU32(thread->stackEnd); uint32 stackMinAddr = thread->stackEnd.GetMPTR();
uint32 stackMaxAddr = _swapEndianU32(thread->stackBase); uint32 stackMaxAddr = thread->stackBase.GetMPTR();
sint32 score = 0; sint32 score = 0;
uint32 currentStackPtr = sp; uint32 currentStackPtr = sp;
@ -95,8 +95,8 @@ void DebugLogStackTrace(OSThread_t* thread, MPTR sp)
// print stack trace // print stack trace
uint32 currentStackPtr = highestScoreSP; uint32 currentStackPtr = highestScoreSP;
uint32 stackMinAddr = _swapEndianU32(thread->stackEnd); uint32 stackMinAddr = thread->stackEnd.GetMPTR();
uint32 stackMaxAddr = _swapEndianU32(thread->stackBase); uint32 stackMaxAddr = thread->stackBase.GetMPTR();
for (sint32 i = 0; i < 20; i++) for (sint32 i = 0; i < 20; i++)
{ {
uint32 nextStackPtr = memory_readU32(currentStackPtr); uint32 nextStackPtr = memory_readU32(currentStackPtr);

View File

@ -22,7 +22,7 @@ namespace coreinit
MPTR _iob_lock[GHS_FOPEN_MAX]; MPTR _iob_lock[GHS_FOPEN_MAX];
uint16be __gh_FOPEN_MAX; uint16be __gh_FOPEN_MAX;
MEMPTR<void> ghs_environ; MEMPTR<void> ghs_environ;
uint32 ghs_Errno; // exposed by __gh_errno_ptr() or via 'errno' data export uint32 ghs_Errno; // exposed as 'errno' data export
}; };
SysAllocator<GHSAccessibleData> g_ghs_data; SysAllocator<GHSAccessibleData> g_ghs_data;
@ -159,7 +159,7 @@ namespace coreinit
void* __gh_errno_ptr() void* __gh_errno_ptr()
{ {
OSThread_t* currentThread = coreinit::OSGetCurrentThread(); OSThread_t* currentThread = coreinit::OSGetCurrentThread();
return &currentThread->context.error; return &currentThread->context.ghs_errno;
} }
void* __get_eh_store_globals() void* __get_eh_store_globals()

View File

@ -204,7 +204,7 @@ namespace coreinit
// and a message queue large enough to hold the maximum number of commands (IPC_NUM_RESOURCE_BUFFERS) // and a message queue large enough to hold the maximum number of commands (IPC_NUM_RESOURCE_BUFFERS)
OSInitMessageQueue(gIPCThreadMsgQueue.GetPtr() + coreIndex, _gIPCThreadSemaphoreStorage.GetPtr() + coreIndex * IPC_NUM_RESOURCE_BUFFERS, IPC_NUM_RESOURCE_BUFFERS); OSInitMessageQueue(gIPCThreadMsgQueue.GetPtr() + coreIndex, _gIPCThreadSemaphoreStorage.GetPtr() + coreIndex * IPC_NUM_RESOURCE_BUFFERS, IPC_NUM_RESOURCE_BUFFERS);
OSThread_t* ipcThread = gIPCThread.GetPtr() + coreIndex; OSThread_t* ipcThread = gIPCThread.GetPtr() + coreIndex;
OSCreateThreadType(ipcThread, PPCInterpreter_makeCallableExportDepr(__IPCDriverThreadFunc), 0, nullptr, _gIPCThreadStack.GetPtr() + 0x4000 * coreIndex + 0x4000, 0x4000, 15, (1 << coreIndex), OSThread_t::THREAD_TYPE::TYPE_DRIVER); __OSCreateThreadType(ipcThread, PPCInterpreter_makeCallableExportDepr(__IPCDriverThreadFunc), 0, nullptr, _gIPCThreadStack.GetPtr() + 0x4000 * coreIndex + 0x4000, 0x4000, 15, (1 << coreIndex), OSThread_t::THREAD_TYPE::TYPE_DRIVER);
sprintf((char*)_gIPCThreadNameStorage.GetPtr()+coreIndex*0x18, "{SYS IPC Core %d}", coreIndex); sprintf((char*)_gIPCThreadNameStorage.GetPtr()+coreIndex*0x18, "{SYS IPC Core %d}", coreIndex);
OSSetThreadName(ipcThread, (char*)_gIPCThreadNameStorage.GetPtr() + coreIndex * 0x18); OSSetThreadName(ipcThread, (char*)_gIPCThreadNameStorage.GetPtr() + coreIndex * 0x18);
OSResumeThread(ipcThread); OSResumeThread(ipcThread);

View File

@ -215,14 +215,171 @@ namespace coreinit
hCPU->spr.LR = lr; hCPU->spr.LR = lr;
hCPU->gpr[3] = r3; hCPU->gpr[3] = r3;
hCPU->gpr[4] = r4; hCPU->gpr[4] = r4;
hCPU->instructionPointer = _swapEndianU32(currentThread->entrypoint); hCPU->instructionPointer = currentThread->entrypoint.GetMPTR();
} }
void coreinitExport_OSExitThreadDepr(PPCInterpreter_t* hCPU); void coreinitExport_OSExitThreadDepr(PPCInterpreter_t* hCPU);
void OSCreateThreadInternal(OSThread_t* thread, uint32 entryPoint, MPTR stackLowerBaseAddr, uint32 stackSize, uint8 affinityMask, OSThread_t::THREAD_TYPE threadType) void __OSInitContext(OSContext_t* ctx, MEMPTR<void> initialIP, MEMPTR<void> initialStackPointer)
{
ctx->SetContextMagic();
ctx->gpr[0] = 0; // r0 is left uninitialized on console?
for(auto& it : ctx->gpr)
it = 0;
ctx->gpr[1] = _swapEndianU32(initialStackPointer.GetMPTR());
ctx->gpr[2] = _swapEndianU32(RPLLoader_GetSDA2Base());
ctx->gpr[13] = _swapEndianU32(RPLLoader_GetSDA1Base());
ctx->srr0 = initialIP.GetMPTR();
ctx->cr = 0;
ctx->ukn0A8 = 0;
ctx->ukn0AC = 0;
ctx->gqr[0] = 0;
ctx->gqr[1] = 0;
ctx->gqr[2] = 0;
ctx->gqr[3] = 0;
ctx->gqr[4] = 0;
ctx->gqr[5] = 0;
ctx->gqr[6] = 0;
ctx->gqr[7] = 0;
ctx->dsi_dar = 0;
ctx->srr1 = 0x9032;
ctx->xer = 0;
ctx->dsi_dsisr = 0;
ctx->upir = 0;
ctx->boostCount = 0;
ctx->state = 0;
for(auto& it : ctx->coretime)
it = 0;
ctx->starttime = 0;
ctx->ghs_errno = 0;
ctx->upmc1 = 0;
ctx->upmc2 = 0;
ctx->upmc3 = 0;
ctx->upmc4 = 0;
ctx->ummcr0 = 0;
ctx->ummcr1 = 0;
}
void __OSThreadInit(OSThread_t* thread, MEMPTR<void> entrypoint, uint32 argInt, MEMPTR<void> argPtr, MEMPTR<void> stackTop, uint32 stackSize, sint32 priority, uint32 upirCoreIndex, OSThread_t::THREAD_TYPE threadType)
{
thread->effectivePriority = priority;
thread->type = threadType;
thread->basePriority = priority;
thread->SetThreadMagic();
thread->id = 0x8000;
thread->waitAlarm = nullptr;
thread->entrypoint = entrypoint;
thread->quantumTicks = 0;
if(entrypoint)
{
thread->state = OSThread_t::THREAD_STATE::STATE_READY;
thread->suspendCounter = 1;
}
else
{
thread->state = OSThread_t::THREAD_STATE::STATE_NONE;
thread->suspendCounter = 0;
}
thread->exitValue = (uint32)-1;
thread->requestFlags = OSThread_t::REQUEST_FLAG_BIT::REQUEST_FLAG_NONE;
thread->pendingSuspend = 0;
thread->suspendResult = 0xFFFFFFFF;
thread->coretimeSumQuantumStart = 0;
thread->deallocatorFunc = nullptr;
thread->cleanupCallback = nullptr;
thread->waitingForFastMutex = nullptr;
thread->stateFlags = 0;
thread->waitingForMutex = nullptr;
memset(&thread->crt, 0, sizeof(thread->crt));
static_assert(sizeof(thread->crt) == 0x1D8);
thread->tlsBlocksMPTR = 0;
thread->numAllocatedTLSBlocks = 0;
thread->tlsStatus = 0;
OSInitThreadQueueEx(&thread->joinQueue, thread);
OSInitThreadQueueEx(&thread->suspendQueue, thread);
thread->mutexQueue.ukn08 = thread;
thread->mutexQueue.ukn0C = 0;
thread->mutexQueue.tail = nullptr;
thread->mutexQueue.head = nullptr;
thread->ownedFastMutex.next = nullptr;
thread->ownedFastMutex.prev = nullptr;
thread->contendedFastMutex.next = nullptr;
thread->contendedFastMutex.prev = nullptr;
MEMPTR<void> alignedStackTop{MEMPTR<void>(stackTop).GetMPTR() & 0xFFFFFFF8};
MEMPTR<uint32be> alignedStackTop32{alignedStackTop};
alignedStackTop32[-1] = 0;
alignedStackTop32[-2] = 0;
__OSInitContext(&thread->context, MEMPTR<void>(PPCInterpreter_makeCallableExportDepr(threadEntry)), (void*)(alignedStackTop32.GetPtr() - 2));
thread->stackBase = stackTop; // without alignment
thread->stackEnd = ((uint8*)stackTop.GetPtr() - stackSize);
thread->context.upir = upirCoreIndex;
thread->context.lr = _swapEndianU32(PPCInterpreter_makeCallableExportDepr(coreinitExport_OSExitThreadDepr));
thread->context.gpr[3] = _swapEndianU32(argInt);
thread->context.gpr[4] = _swapEndianU32(argPtr.GetMPTR());
*(uint32be*)((uint8*)stackTop.GetPtr() - stackSize) = 0xDEADBABE;
thread->alarmRelatedUkn = 0;
for(auto& it : thread->specificArray)
it = nullptr;
thread->context.fpscr.fpscr = 4;
for(sint32 i=0; i<32; i++)
{
thread->context.fp_ps0[i] = 0.0;
thread->context.fp_ps1[i] = 0.0;
}
thread->context.gqr[2] = 0x40004;
thread->context.gqr[3] = 0x50005;
thread->context.gqr[4] = 0x60006;
thread->context.gqr[5] = 0x70007;
for(sint32 i=0; i<Espresso::CORE_COUNT; i++)
thread->context.coretime[i] = 0;
// currentRunQueue and waitQueueLink is not initialized by COS and instead overwritten without validation
// since we already have integrity checks in other functions, lets initialize it here
for(sint32 i=0; i<Espresso::CORE_COUNT; i++)
thread->currentRunQueue[i] = nullptr;
thread->waitQueueLink.prev = nullptr;
thread->waitQueueLink.next = nullptr;
thread->wakeTimeRelatedUkn2 = 0;
thread->wakeUpCount = 0;
thread->wakeUpTime = 0;
thread->wakeTimeRelatedUkn1 = 0x7FFFFFFFFFFFFFFF;
thread->quantumTicks = 0;
thread->coretimeSumQuantumStart = 0;
thread->totalCycles = 0;
for(auto& it : thread->padding68C)
it = 0;
}
void SetThreadAffinityToCore(OSThread_t* thread, uint32 coreIndex)
{
cemu_assert_debug(coreIndex < 3);
thread->attr &= ~(OSThread_t::ATTR_BIT::ATTR_AFFINITY_CORE0 | OSThread_t::ATTR_BIT::ATTR_AFFINITY_CORE1 | OSThread_t::ATTR_BIT::ATTR_AFFINITY_CORE2 | OSThread_t::ATTR_BIT::ATTR_UKN_010);
thread->context.affinity &= 0xFFFFFFF8;
if (coreIndex == 0)
{
thread->attr |= OSThread_t::ATTR_BIT::ATTR_AFFINITY_CORE0;
thread->context.affinity |= (1<<0);
}
else if (coreIndex == 1)
{
thread->attr |= OSThread_t::ATTR_BIT::ATTR_AFFINITY_CORE1;
thread->context.affinity |= (1<<1);
}
else // if (coreIndex == 2)
{
thread->attr |= OSThread_t::ATTR_BIT::ATTR_AFFINITY_CORE2;
thread->context.affinity |= (1<<2);
}
}
void __OSCreateThreadOnActiveThreadWorkaround(OSThread_t* thread)
{ {
cemu_assert_debug(thread != nullptr); // make thread struct mandatory. Caller can always use SysAllocator
__OSLockScheduler(); __OSLockScheduler();
bool isThreadStillActive = __OSIsThreadActive(thread); bool isThreadStillActive = __OSIsThreadActive(thread);
if (isThreadStillActive) if (isThreadStillActive)
@ -248,84 +405,97 @@ namespace coreinit
} }
cemu_assert_debug(__OSIsThreadActive(thread) == false); cemu_assert_debug(__OSIsThreadActive(thread) == false);
__OSUnlockScheduler(); __OSUnlockScheduler();
memset(thread, 0x00, sizeof(OSThread_t)); }
// init signatures
thread->SetMagic();
thread->type = threadType;
thread->state = (entryPoint != MPTR_NULL) ? OSThread_t::THREAD_STATE::STATE_READY : OSThread_t::THREAD_STATE::STATE_NONE;
thread->entrypoint = _swapEndianU32(entryPoint);
__OSSetThreadBasePriority(thread, 0);
__OSUpdateThreadEffectivePriority(thread);
// untested, but seems to work (Batman Arkham City uses these values to calculate the stack size for duplicated threads)
thread->stackBase = _swapEndianU32(stackLowerBaseAddr + stackSize); // these fields are quite important and lots of games rely on them being accurate (Examples: Darksiders 2, SMW3D, Batman Arkham City)
thread->stackEnd = _swapEndianU32(stackLowerBaseAddr);
// init stackpointer
thread->context.gpr[GPR_SP] = _swapEndianU32(stackLowerBaseAddr + stackSize - 0x20); // how many free bytes should there be at the beginning of the stack?
// init misc stuff
thread->attr = affinityMask;
thread->context.setAffinity(affinityMask);
thread->context.srr0 = PPCInterpreter_makeCallableExportDepr(threadEntry);
thread->context.lr = _swapEndianU32(PPCInterpreter_makeCallableExportDepr(coreinitExport_OSExitThreadDepr));
thread->id = 0x8000; // Warriors Orochi 3 softlocks if this is zero due to confusing threads (_OSActivateThread should set this?)
// init ugqr
thread->context.gqr[0] = 0x00000000;
thread->context.gqr[1] = 0x00000000;
thread->context.gqr[2] = 0x00040004;
thread->context.gqr[3] = 0x00050005;
thread->context.gqr[4] = 0x00060006;
thread->context.gqr[5] = 0x00070007;
thread->context.gqr[6] = 0x00000000;
thread->context.gqr[7] = 0x00000000;
// init r2 (SDA2) and r3 (SDA)
thread->context.gpr[2] = _swapEndianU32(RPLLoader_GetSDA2Base());
thread->context.gpr[13] = _swapEndianU32(RPLLoader_GetSDA1Base());
// GHS related thread init?
__OSLockScheduler(); bool __OSCreateThreadInternal2(OSThread_t* thread, MEMPTR<void> entrypoint, uint32 argInt, MEMPTR<void> argPtr, MEMPTR<void> stackBase, uint32 stackSize, sint32 priority, uint32 attrBits, OSThread_t::THREAD_TYPE threadType)
// if entrypoint is non-zero then put the thread on the active list and suspend it {
if (entryPoint != MPTR_NULL) __OSCreateThreadOnActiveThreadWorkaround(thread);
OSThread_t* currentThread = OSGetCurrentThread();
if (priority < 0 || priority >= 32)
{ {
thread->suspendCounter = 1; cemuLog_log(LogType::APIErrors, "OSCreateThreadInternal: Thread priority must be in range 0-31");
__OSActivateThread(thread); return false;
thread->state = OSThread_t::THREAD_STATE::STATE_READY; }
if (threadType == OSThread_t::THREAD_TYPE::TYPE_IO)
{
priority = priority + 0x20;
}
else if (threadType == OSThread_t::THREAD_TYPE::TYPE_APP)
{
priority = priority + 0x40;
}
if(attrBits >= 0x20 || stackBase == nullptr || stackSize == 0)
{
cemuLog_logDebug(LogType::APIErrors, "OSCreateThreadInternal: Invalid attributes, stack base or size");
return false;
}
uint32 im = OSDisableInterrupts();
__OSLockScheduler(thread);
uint32 coreIndex = PPCInterpreter_getCurrentInstance() ? OSGetCoreId() : 1;
__OSThreadInit(thread, entrypoint, argInt, argPtr, stackBase, stackSize, priority, coreIndex, threadType);
thread->threadName = nullptr;
thread->context.affinity = attrBits & 7;
thread->attr = attrBits;
if ((attrBits & 7) == 0) // if no explicit affinity is given, use the current core
SetThreadAffinityToCore(thread, OSGetCoreId());
if(currentThread)
{
for(sint32 i=0; i<Espresso::CORE_COUNT; i++)
{
thread->dsiCallback[i] = currentThread->dsiCallback[i];
thread->isiCallback[i] = currentThread->isiCallback[i];
thread->programCallback[i] = currentThread->programCallback[i];
thread->perfMonCallback[i] = currentThread->perfMonCallback[i];
thread->alignmentExceptionCallback[i] = currentThread->alignmentExceptionCallback[i];
}
thread->context.srr1 = thread->context.srr1 | (currentThread->context.srr1 & 0x900);
thread->context.fpscr.fpscr = thread->context.fpscr.fpscr | (currentThread->context.fpscr.fpscr & 0xF8);
} }
else else
thread->suspendCounter = 0; {
__OSUnlockScheduler(); for(sint32 i=0; i<Espresso::CORE_COUNT; i++)
{
thread->dsiCallback[i] = 0;
thread->isiCallback[i] = 0;
thread->programCallback[i] = 0;
thread->perfMonCallback[i] = 0;
thread->alignmentExceptionCallback[i] = nullptr;
}
}
if (entrypoint)
{
thread->id = 0x8000;
__OSActivateThread(thread); // also handles adding the thread to g_activeThreadQueue
}
__OSUnlockScheduler(thread);
OSRestoreInterrupts(im);
// recompile entry point function
if (entrypoint)
PPCRecompiler_recompileIfUnvisited(entrypoint.GetMPTR());
return true;
} }
bool OSCreateThreadType(OSThread_t* thread, MPTR entryPoint, sint32 numParam, void* ptrParam, void* stackTop, sint32 stackSize, sint32 priority, uint32 attr, OSThread_t::THREAD_TYPE threadType) bool OSCreateThreadType(OSThread_t* thread, MPTR entryPoint, sint32 numParam, void* ptrParam, void* stackTop, sint32 stackSize, sint32 priority, uint32 attr, OSThread_t::THREAD_TYPE threadType)
{ {
OSCreateThreadInternal(thread, entryPoint, memory_getVirtualOffsetFromPointer(stackTop) - stackSize, stackSize, attr, threadType); if(threadType != OSThread_t::THREAD_TYPE::TYPE_APP && threadType != OSThread_t::THREAD_TYPE::TYPE_IO)
thread->context.gpr[3] = _swapEndianU32(numParam); // num arguments {
thread->context.gpr[4] = _swapEndianU32(memory_getVirtualOffsetFromPointer(ptrParam)); // arguments pointer cemuLog_logDebug(LogType::APIErrors, "OSCreateThreadType: Invalid thread type");
__OSSetThreadBasePriority(thread, priority); cemu_assert_suspicious();
__OSUpdateThreadEffectivePriority(thread); return false;
// set affinity }
uint8 affinityMask = 0; return __OSCreateThreadInternal2(thread, MEMPTR<void>(entryPoint), numParam, ptrParam, stackTop, stackSize, priority, attr, threadType);
affinityMask = attr & 0x7;
// if no core is selected -> set current one
if (affinityMask == 0)
affinityMask |= (1 << PPCInterpreter_getCoreIndex(PPCInterpreter_getCurrentInstance()));
// set attr
// todo: Support for other attr bits
thread->attr = (affinityMask & 0xFF) | (attr & OSThread_t::ATTR_BIT::ATTR_DETACHED);
thread->context.setAffinity(affinityMask);
// recompile entry point function
if (entryPoint != MPTR_NULL)
PPCRecompiler_recompileIfUnvisited(entryPoint);
return true;
} }
bool OSCreateThread(OSThread_t* thread, MPTR entryPoint, sint32 numParam, void* ptrParam, void* stackTop, sint32 stackSize, sint32 priority, uint32 attr) bool OSCreateThread(OSThread_t* thread, MPTR entryPoint, sint32 numParam, void* ptrParam, void* stackTop, sint32 stackSize, sint32 priority, uint32 attr)
{ {
return OSCreateThreadType(thread, entryPoint, numParam, ptrParam, stackTop, stackSize, priority, attr, OSThread_t::THREAD_TYPE::TYPE_APP); return __OSCreateThreadInternal2(thread, MEMPTR<void>(entryPoint), numParam, ptrParam, stackTop, stackSize, priority, attr, OSThread_t::THREAD_TYPE::TYPE_APP);
} }
// alias to OSCreateThreadType, similar to OSCreateThread, but with an additional parameter for the thread type // similar to OSCreateThreadType, but can be used to create any type of thread
bool __OSCreateThreadType(OSThread_t* thread, MPTR entryPoint, sint32 numParam, void* ptrParam, void* stackTop, sint32 stackSize, sint32 priority, uint32 attr, OSThread_t::THREAD_TYPE threadType) bool __OSCreateThreadType(OSThread_t* thread, MPTR entryPoint, sint32 numParam, void* ptrParam, void* stackTop, sint32 stackSize, sint32 priority, uint32 attr, OSThread_t::THREAD_TYPE threadType)
{ {
return OSCreateThreadType(thread, entryPoint, numParam, ptrParam, stackTop, stackSize, priority, attr, threadType); return __OSCreateThreadInternal2(thread, MEMPTR<void>(entryPoint), numParam, ptrParam, stackTop, stackSize, priority, attr, threadType);
} }
bool OSRunThread(OSThread_t* thread, MPTR funcAddress, sint32 numParam, void* ptrParam) bool OSRunThread(OSThread_t* thread, MPTR funcAddress, sint32 numParam, void* ptrParam)
@ -352,7 +522,7 @@ namespace coreinit
// set thread state // set thread state
// todo - this should fully reinitialize the thread? // todo - this should fully reinitialize the thread?
thread->entrypoint = _swapEndianU32(funcAddress); thread->entrypoint = funcAddress;
thread->context.srr0 = PPCInterpreter_makeCallableExportDepr(threadEntry); thread->context.srr0 = PPCInterpreter_makeCallableExportDepr(threadEntry);
thread->context.lr = _swapEndianU32(PPCInterpreter_makeCallableExportDepr(coreinitExport_OSExitThreadDepr)); thread->context.lr = _swapEndianU32(PPCInterpreter_makeCallableExportDepr(coreinitExport_OSExitThreadDepr));
thread->context.gpr[3] = _swapEndianU32(numParam); thread->context.gpr[3] = _swapEndianU32(numParam);
@ -378,10 +548,10 @@ namespace coreinit
OSThread_t* currentThread = coreinit::OSGetCurrentThread(); OSThread_t* currentThread = coreinit::OSGetCurrentThread();
// thread cleanup callback // thread cleanup callback
if (!currentThread->cleanupCallback2.IsNull()) if (currentThread->cleanupCallback)
{ {
currentThread->stateFlags = _swapEndianU32(_swapEndianU32(currentThread->stateFlags) | 0x00000001); currentThread->stateFlags = _swapEndianU32(_swapEndianU32(currentThread->stateFlags) | 0x00000001);
PPCCoreCallback(currentThread->cleanupCallback2.GetMPTR(), currentThread, _swapEndianU32(currentThread->stackEnd)); PPCCoreCallback(currentThread->cleanupCallback.GetMPTR(), currentThread, currentThread->stackEnd);
} }
// cpp exception cleanup // cpp exception cleanup
if (gCoreinitData->__cpp_exception_cleanup_ptr != 0 && currentThread->crt.eh_globals != nullptr) if (gCoreinitData->__cpp_exception_cleanup_ptr != 0 && currentThread->crt.eh_globals != nullptr)
@ -602,7 +772,10 @@ namespace coreinit
sint32 previousSuspendCount = thread->suspendCounter; sint32 previousSuspendCount = thread->suspendCounter;
cemu_assert_debug(previousSuspendCount >= 0); cemu_assert_debug(previousSuspendCount >= 0);
if (previousSuspendCount == 0) if (previousSuspendCount == 0)
{
cemuLog_log(LogType::APIErrors, "OSResumeThread: Resuming thread 0x{:08x} which isn't suspended", MEMPTR<OSThread_t>(thread).GetMPTR());
return 0; return 0;
}
thread->suspendCounter = previousSuspendCount - resumeCount; thread->suspendCounter = previousSuspendCount - resumeCount;
if (thread->suspendCounter < 0) if (thread->suspendCounter < 0)
thread->suspendCounter = 0; thread->suspendCounter = 0;
@ -732,8 +905,8 @@ namespace coreinit
void* OSSetThreadCleanupCallback(OSThread_t* thread, void* cleanupCallback) void* OSSetThreadCleanupCallback(OSThread_t* thread, void* cleanupCallback)
{ {
__OSLockScheduler(); __OSLockScheduler();
void* previousFunc = thread->cleanupCallback2.GetPtr(); void* previousFunc = thread->cleanupCallback.GetPtr();
thread->cleanupCallback2 = cleanupCallback; thread->cleanupCallback = cleanupCallback;
__OSUnlockScheduler(); __OSUnlockScheduler();
return previousFunc; return previousFunc;
} }
@ -1341,7 +1514,7 @@ namespace coreinit
void __OSQueueThreadDeallocation(OSThread_t* thread) void __OSQueueThreadDeallocation(OSThread_t* thread)
{ {
uint32 coreIndex = OSGetCoreId(); uint32 coreIndex = OSGetCoreId();
TerminatorThread::DeallocatorQueueEntry queueEntry(thread, memory_getPointerFromVirtualOffset(_swapEndianU32(thread->stackEnd)), thread->deallocatorFunc); TerminatorThread::DeallocatorQueueEntry queueEntry(thread, thread->stackEnd, thread->deallocatorFunc);
s_terminatorThreads[coreIndex].queueDeallocators.push(queueEntry); s_terminatorThreads[coreIndex].queueDeallocators.push(queueEntry);
OSSignalSemaphoreInternal(s_terminatorThreads[coreIndex].semaphoreQueuedDeallocators.GetPtr(), false); // do not reschedule here! Current thread must not be interrupted otherwise deallocator will run too early OSSignalSemaphoreInternal(s_terminatorThreads[coreIndex].semaphoreQueuedDeallocators.GetPtr(), false); // do not reschedule here! Current thread must not be interrupted otherwise deallocator will run too early
} }

View File

@ -2,9 +2,6 @@
#include "Cafe/HW/Espresso/Const.h" #include "Cafe/HW/Espresso/Const.h"
#include "Cafe/OS/libs/coreinit/coreinit_Scheduler.h" #include "Cafe/OS/libs/coreinit/coreinit_Scheduler.h"
#define OS_CONTEXT_MAGIC_0 'OSCo'
#define OS_CONTEXT_MAGIC_1 'ntxt'
struct OSThread_t; struct OSThread_t;
struct OSContextRegFPSCR_t struct OSContextRegFPSCR_t
@ -16,6 +13,9 @@ struct OSContextRegFPSCR_t
struct OSContext_t struct OSContext_t
{ {
static constexpr uint32 OS_CONTEXT_MAGIC_0 = 0x4f53436f; // "OSCo"
static constexpr uint32 OS_CONTEXT_MAGIC_1 = 0x6e747874; // "ntxt"
/* +0x000 */ betype<uint32> magic0; /* +0x000 */ betype<uint32> magic0;
/* +0x004 */ betype<uint32> magic1; /* +0x004 */ betype<uint32> magic1;
/* +0x008 */ uint32 gpr[32]; /* +0x008 */ uint32 gpr[32];
@ -36,24 +36,29 @@ struct OSContext_t
/* +0x1BC */ uint32 gqr[8]; // GQR/UGQR /* +0x1BC */ uint32 gqr[8]; // GQR/UGQR
/* +0x1DC */ uint32be upir; // set to current core index /* +0x1DC */ uint32be upir; // set to current core index
/* +0x1E0 */ uint64be fp_ps1[32]; /* +0x1E0 */ uint64be fp_ps1[32];
/* +0x2E0 */ uint64 uknTime2E0; /* +0x2E0 */ uint64be coretime[3];
/* +0x2E8 */ uint64 uknTime2E8; /* +0x2F8 */ uint64be starttime;
/* +0x2F0 */ uint64 uknTime2F0; /* +0x300 */ uint32be ghs_errno; // returned by __gh_errno_ptr() (used by socketlasterr)
/* +0x2F8 */ uint64 uknTime2F8;
/* +0x300 */ uint32 error; // returned by __gh_errno_ptr() (used by socketlasterr)
/* +0x304 */ uint32be affinity; /* +0x304 */ uint32be affinity;
/* +0x308 */ uint32 ukn0308; /* +0x308 */ uint32be upmc1;
/* +0x30C */ uint32 ukn030C; /* +0x30C */ uint32be upmc2;
/* +0x310 */ uint32 ukn0310; /* +0x310 */ uint32be upmc3;
/* +0x314 */ uint32 ukn0314; /* +0x314 */ uint32be upmc4;
/* +0x318 */ uint32 ukn0318; /* +0x318 */ uint32be ummcr0;
/* +0x31C */ uint32 ukn031C; /* +0x31C */ uint32be ummcr1;
bool checkMagic() bool checkMagic()
{ {
return magic0 == (uint32)OS_CONTEXT_MAGIC_0 && magic1 == (uint32)OS_CONTEXT_MAGIC_1; return magic0 == (uint32)OS_CONTEXT_MAGIC_0 && magic1 == (uint32)OS_CONTEXT_MAGIC_1;
} }
void SetContextMagic()
{
magic0 = OS_CONTEXT_MAGIC_0;
magic1 = OS_CONTEXT_MAGIC_1;
}
bool hasCoreAffinitySet(uint32 coreIndex) const bool hasCoreAffinitySet(uint32 coreIndex) const
{ {
return (((uint32)affinity >> coreIndex) & 1) != 0; return (((uint32)affinity >> coreIndex) & 1) != 0;
@ -361,6 +366,8 @@ namespace coreinit
struct OSThread_t struct OSThread_t
{ {
static constexpr uint32 MAGIC_THREAD = 0x74487244; // "tHrD"
enum class THREAD_TYPE : uint32 enum class THREAD_TYPE : uint32
{ {
TYPE_DRIVER = 0, TYPE_DRIVER = 0,
@ -383,7 +390,7 @@ struct OSThread_t
ATTR_AFFINITY_CORE1 = 0x2, ATTR_AFFINITY_CORE1 = 0x2,
ATTR_AFFINITY_CORE2 = 0x4, ATTR_AFFINITY_CORE2 = 0x4,
ATTR_DETACHED = 0x8, ATTR_DETACHED = 0x8,
// more flags? ATTR_UKN_010 = 0x10,
}; };
enum REQUEST_FLAG_BIT : uint32 enum REQUEST_FLAG_BIT : uint32
@ -404,23 +411,21 @@ struct OSThread_t
return 0; return 0;
} }
void SetMagic() void SetThreadMagic()
{ {
context.magic0 = OS_CONTEXT_MAGIC_0; magic = MAGIC_THREAD;
context.magic1 = OS_CONTEXT_MAGIC_1;
magic = 'tHrD';
} }
bool IsValidMagic() const bool IsValidMagic() const
{ {
return magic == 'tHrD' && context.magic0 == OS_CONTEXT_MAGIC_0 && context.magic1 == OS_CONTEXT_MAGIC_1; return magic == MAGIC_THREAD && context.magic0 == OSContext_t::OS_CONTEXT_MAGIC_0 && context.magic1 == OSContext_t::OS_CONTEXT_MAGIC_1;
} }
/* +0x000 */ OSContext_t context; /* +0x000 */ OSContext_t context;
/* +0x320 */ uint32be magic; // 'tHrD' /* +0x320 */ uint32be magic; // "tHrD" (0x74487244)
/* +0x324 */ betype<THREAD_STATE> state; /* +0x324 */ betype<THREAD_STATE> state;
/* +0x325 */ uint8 attr; /* +0x325 */ uint8 attr;
/* +0x326 */ uint16be id; // Warriors Orochi 3 uses this to identify threads. Seems like this is always set to 0x8000 ? /* +0x326 */ uint16be id; // Warriors Orochi 3 uses this to identify threads
/* +0x328 */ betype<sint32> suspendCounter; /* +0x328 */ betype<sint32> suspendCounter;
/* +0x32C */ sint32be effectivePriority; // effective priority (lower is higher) /* +0x32C */ sint32be effectivePriority; // effective priority (lower is higher)
/* +0x330 */ sint32be basePriority; // base priority (lower is higher) /* +0x330 */ sint32be basePriority; // base priority (lower is higher)
@ -440,21 +445,21 @@ struct OSThread_t
/* +0x38C */ coreinit::OSThreadLink activeThreadChain; // queue of active threads (g_activeThreadQueue) /* +0x38C */ coreinit::OSThreadLink activeThreadChain; // queue of active threads (g_activeThreadQueue)
/* +0x394 */ MPTR stackBase; // upper limit of stack /* +0x394 */ MEMPTR<void> stackBase; // upper limit of stack
/* +0x398 */ MPTR stackEnd; // lower limit of stack /* +0x398 */ MEMPTR<void> stackEnd; // lower limit of stack
/* +0x39C */ MPTR entrypoint; /* +0x39C */ MEMPTR<void> entrypoint;
/* +0x3A0 */ crt_t crt; /* +0x3A0 */ crt_t crt;
/* +0x578 */ sint32 alarmRelatedUkn; /* +0x578 */ sint32 alarmRelatedUkn;
/* +0x57C */ std::array<MEMPTR<void>, 16> specificArray; /* +0x57C */ std::array<MEMPTR<void>, 16> specificArray;
/* +0x5BC */ betype<THREAD_TYPE> type; /* +0x5BC */ betype<THREAD_TYPE> type;
/* +0x5C0 */ MEMPTR<const char> threadName; /* +0x5C0 */ MEMPTR<const char> threadName;
/* +0x5C4 */ MPTR waitAlarm; // used only by OSWaitEventWithTimeout/OSSignalEvent ? /* +0x5C4 */ MEMPTR<void> waitAlarm; // used only by OSWaitEventWithTimeout/OSSignalEvent ?
/* +0x5C8 */ uint32 userStackPointer; /* +0x5C8 */ uint32 userStackPointer;
/* +0x5CC */ MEMPTR<void> cleanupCallback2; /* +0x5CC */ MEMPTR<void> cleanupCallback;
/* +0x5D0 */ MEMPTR<void> deallocatorFunc; /* +0x5D0 */ MEMPTR<void> deallocatorFunc;
/* +0x5D4 */ uint32 stateFlags; // 0x5D4 | various flags? Controls if canceling/suspension is allowed (at cancel points) or not? If 1 -> Cancel/Suspension not allowed, if 0 -> Cancel/Suspension allowed /* +0x5D4 */ uint32 stateFlags; // 0x5D4 | various flags? Controls if canceling/suspension is allowed (at cancel points) or not? If 1 -> Cancel/Suspension not allowed, if 0 -> Cancel/Suspension allowed
@ -480,6 +485,8 @@ struct OSThread_t
/* +0x660 */ uint32 ukn660; /* +0x660 */ uint32 ukn660;
// todo - some of the members towards the end of the struct were only added in later COS versions. Figure out the mapping between version and members
// TLS // TLS
/* +0x664 */ uint16 numAllocatedTLSBlocks; /* +0x664 */ uint16 numAllocatedTLSBlocks;
/* +0x666 */ sint16 tlsStatus; /* +0x666 */ sint16 tlsStatus;
@ -488,11 +495,11 @@ struct OSThread_t
/* +0x66C */ MEMPTR<coreinit::OSFastMutex> waitingForFastMutex; /* +0x66C */ MEMPTR<coreinit::OSFastMutex> waitingForFastMutex;
/* +0x670 */ coreinit::OSFastMutexLink contendedFastMutex; /* +0x670 */ coreinit::OSFastMutexLink contendedFastMutex;
/* +0x678 */ coreinit::OSFastMutexLink ownedFastMutex; /* +0x678 */ coreinit::OSFastMutexLink ownedFastMutex;
/* +0x680 */ MEMPTR<void> alignmentExceptionCallback[Espresso::CORE_COUNT];
/* +0x680 */ uint32 padding680[28 / 4]; /* +0x68C */ uint32 padding68C[20 / 4];
}; };
static_assert(sizeof(OSThread_t) == 0x6A0);
static_assert(sizeof(OSThread_t) == 0x6A0-4); // todo - determine correct size
namespace coreinit namespace coreinit
{ {

View File

@ -117,10 +117,10 @@ void nsysnetExport_socket_lib_finish(PPCInterpreter_t* hCPU)
osLib_returnFromFunction(hCPU, 0); // 0 -> Success osLib_returnFromFunction(hCPU, 0); // 0 -> Success
} }
uint32* __gh_errno_ptr() static uint32be* __gh_errno_ptr()
{ {
OSThread_t* osThread = coreinit::OSGetCurrentThread(); OSThread_t* osThread = coreinit::OSGetCurrentThread();
return &osThread->context.error; return &osThread->context.ghs_errno;
} }
void _setSockError(sint32 errCode) void _setSockError(sint32 errCode)

View File

@ -963,7 +963,7 @@ namespace snd_core
OSInitMessageQueue(__AXIstThreadMsgQueue.GetPtr(), __AXIstThreadMsgArray.GetPtr(), 0x10); OSInitMessageQueue(__AXIstThreadMsgQueue.GetPtr(), __AXIstThreadMsgArray.GetPtr(), 0x10);
// create thread // create thread
uint8 istThreadAttr = 0; uint8 istThreadAttr = 0;
coreinit::OSCreateThreadType(__AXIstThread.GetPtr(), PPCInterpreter_makeCallableExportDepr(AXIst_ThreadEntry), 0, &__AXIstThreadMsgQueue, __AXIstThreadStack.GetPtr() + 0x4000, 0x4000, 14, istThreadAttr, OSThread_t::THREAD_TYPE::TYPE_DRIVER); coreinit::__OSCreateThreadType(__AXIstThread.GetPtr(), PPCInterpreter_makeCallableExportDepr(AXIst_ThreadEntry), 0, &__AXIstThreadMsgQueue, __AXIstThreadStack.GetPtr() + 0x4000, 0x4000, 14, istThreadAttr, OSThread_t::THREAD_TYPE::TYPE_DRIVER);
coreinit::OSResumeThread(__AXIstThread.GetPtr()); coreinit::OSResumeThread(__AXIstThread.GetPtr());
} }

View File

@ -155,7 +155,7 @@ void ExceptionHandler_LogGeneralInfo()
const char* threadName = "NULL"; const char* threadName = "NULL";
if (!threadItrBE->threadName.IsNull()) if (!threadItrBE->threadName.IsNull())
threadName = threadItrBE->threadName.GetPtr(); threadName = threadItrBE->threadName.GetPtr();
sprintf(dumpLine, "%08x Ent %08x IP %08x LR %08x %-9s Aff %d%d%d Pri %2d Name %s", threadItrMPTR, _swapEndianU32(threadItrBE->entrypoint), threadItrBE->context.srr0, _swapEndianU32(threadItrBE->context.lr), threadStateStr, (affinity >> 0) & 1, (affinity >> 1) & 1, (affinity >> 2) & 1, effectivePriority, threadName); sprintf(dumpLine, "%08x Ent %08x IP %08x LR %08x %-9s Aff %d%d%d Pri %2d Name %s", threadItrMPTR, threadItrBE->entrypoint.GetMPTR(), threadItrBE->context.srr0, _swapEndianU32(threadItrBE->context.lr), threadStateStr, (affinity >> 0) & 1, (affinity >> 1) & 1, (affinity >> 2) & 1, effectivePriority, threadName);
// write line to log // write line to log
CrashLog_WriteLine(dumpLine); CrashLog_WriteLine(dumpLine);
} }

View File

@ -195,10 +195,10 @@ void DebugPPCThreadsWindow::RefreshThreadList()
m_thread_list->InsertItem(item); m_thread_list->InsertItem(item);
m_thread_list->SetItemData(item, (long)threadItrMPTR); m_thread_list->SetItemData(item, (long)threadItrMPTR);
// entry point // entry point
sprintf(tempStr, "%08X", _swapEndianU32(cafeThread->entrypoint)); sprintf(tempStr, "%08X", cafeThread->entrypoint.GetMPTR());
m_thread_list->SetItem(i, 1, tempStr); m_thread_list->SetItem(i, 1, tempStr);
// stack base (low) // stack base (low)
sprintf(tempStr, "%08X - %08X", _swapEndianU32(cafeThread->stackEnd), _swapEndianU32(cafeThread->stackBase)); sprintf(tempStr, "%08X - %08X", cafeThread->stackEnd.GetMPTR(), cafeThread->stackBase.GetMPTR());
m_thread_list->SetItem(i, 2, tempStr); m_thread_list->SetItem(i, 2, tempStr);
// pc // pc
RPLStoredSymbol* symbol = rplSymbolStorage_getByAddress(cafeThread->context.srr0); RPLStoredSymbol* symbol = rplSymbolStorage_getByAddress(cafeThread->context.srr0);