mirror of https://github.com/cemu-project/Cemu.git
ih264d: Small optimizations and experiments with multi-threading
Using the multi-threaded decoder doesn't seem to be worth it but at least we have a way to enable it now
This commit is contained in:
parent
f04c7575d7
commit
fda5ec2697
|
@ -183,4 +183,10 @@ endif()
|
||||||
|
|
||||||
if(MSVC)
|
if(MSVC)
|
||||||
set_property(TARGET ih264d PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
|
set_property(TARGET ih264d PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
|
||||||
|
|
||||||
|
# tune settings for slightly better performance
|
||||||
|
target_compile_options(ih264d PRIVATE $<$<CONFIG:Release,RelWithDebInfo>:/Oi>) # enable intrinsic functions
|
||||||
|
target_compile_options(ih264d PRIVATE $<$<CONFIG:Release,RelWithDebInfo>:/Ot>) # favor speed
|
||||||
|
target_compile_options(ih264d PRIVATE "/GS-") # disable runtime checks
|
||||||
|
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -85,28 +85,59 @@ UWORD32 ithread_get_mutex_lock_size(void)
|
||||||
return sizeof(CRITICAL_SECTION);
|
return sizeof(CRITICAL_SECTION);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct _ithread_launch_param
|
||||||
|
{
|
||||||
|
void (*startFunc)(void* argument);
|
||||||
|
void* argument;
|
||||||
|
};
|
||||||
|
|
||||||
|
DWORD WINAPI _ithread_WinThreadStartRoutine(LPVOID lpThreadParameter)
|
||||||
|
{
|
||||||
|
struct _ithread_launch_param* param = (struct _ithread_launch_param*)lpThreadParameter;
|
||||||
|
typedef void *(*ThreadStartRoutineType)(void *);
|
||||||
|
ThreadStartRoutineType pfnThreadRoutine = (ThreadStartRoutineType)param->startFunc;
|
||||||
|
void* arg = param->argument;
|
||||||
|
free(param);
|
||||||
|
pfnThreadRoutine(arg);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
WORD32 ithread_create(void* thread_handle, void* attribute, void* strt, void* argument)
|
WORD32 ithread_create(void* thread_handle, void* attribute, void* strt, void* argument)
|
||||||
{
|
{
|
||||||
//UNUSED(attribute);
|
UNUSED(attribute);
|
||||||
//return pthread_create((pthread_t*)thread_handle, NULL, (void* (*)(void*)) strt, argument);
|
struct _ithread_launch_param* param = malloc(sizeof(struct _ithread_launch_param));
|
||||||
__debugbreak();
|
param->startFunc = (void (*)(void*))strt;
|
||||||
|
param->argument = argument;
|
||||||
|
HANDLE *handle = (HANDLE*)thread_handle;
|
||||||
|
*handle = CreateThread(NULL, 0, _ithread_WinThreadStartRoutine, param, 0, NULL);
|
||||||
|
if(*handle == NULL)
|
||||||
|
{
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
WORD32 ithread_join(void* thread_handle, void** val_ptr)
|
WORD32 ithread_join(void* thread_handle, void** val_ptr)
|
||||||
{
|
{
|
||||||
//UNUSED(val_ptr);
|
//UNUSED(val_ptr);
|
||||||
//pthread_t* pthread_handle = (pthread_t*)thread_handle;
|
HANDLE *handle = (HANDLE*)thread_handle;
|
||||||
//return pthread_join(*pthread_handle, NULL);
|
DWORD result = WaitForSingleObject(*handle, INFINITE);
|
||||||
|
if(result == WAIT_OBJECT_0)
|
||||||
__debugbreak();
|
{
|
||||||
return 0;
|
CloseHandle(*handle);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
WORD32 ithread_get_mutex_struct_size(void)
|
WORD32 ithread_get_mutex_struct_size(void)
|
||||||
{
|
{
|
||||||
return sizeof(CRITICAL_SECTION);
|
return sizeof(CRITICAL_SECTION);
|
||||||
}
|
}
|
||||||
|
|
||||||
WORD32 ithread_mutex_init(void* mutex)
|
WORD32 ithread_mutex_init(void* mutex)
|
||||||
{
|
{
|
||||||
InitializeCriticalSection((LPCRITICAL_SECTION)mutex);
|
InitializeCriticalSection((LPCRITICAL_SECTION)mutex);
|
||||||
|
@ -153,7 +184,6 @@ UWORD32 ithread_get_sem_struct_size(void)
|
||||||
//return(sizeof(sem_t));
|
//return(sizeof(sem_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
WORD32 ithread_sem_init(void* sem, WORD32 pshared, UWORD32 value)
|
WORD32 ithread_sem_init(void* sem, WORD32 pshared, UWORD32 value)
|
||||||
{
|
{
|
||||||
__debugbreak();
|
__debugbreak();
|
||||||
|
@ -168,7 +198,6 @@ WORD32 ithread_sem_post(void* sem)
|
||||||
//return sem_post((sem_t*)sem);
|
//return sem_post((sem_t*)sem);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
WORD32 ithread_sem_wait(void* sem)
|
WORD32 ithread_sem_wait(void* sem)
|
||||||
{
|
{
|
||||||
__debugbreak();
|
__debugbreak();
|
||||||
|
@ -176,7 +205,6 @@ WORD32 ithread_sem_wait(void* sem)
|
||||||
//return sem_wait((sem_t*)sem);
|
//return sem_wait((sem_t*)sem);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
WORD32 ithread_sem_destroy(void* sem)
|
WORD32 ithread_sem_destroy(void* sem)
|
||||||
{
|
{
|
||||||
__debugbreak();
|
__debugbreak();
|
||||||
|
|
|
@ -79,10 +79,8 @@
|
||||||
static inline int __builtin_clz(unsigned x)
|
static inline int __builtin_clz(unsigned x)
|
||||||
{
|
{
|
||||||
unsigned long n;
|
unsigned long n;
|
||||||
if (x == 0)
|
|
||||||
return 32;
|
|
||||||
_BitScanReverse(&n, x);
|
_BitScanReverse(&n, x);
|
||||||
return 31 - n;
|
return n ^ 31;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int __builtin_ctz(unsigned x) {
|
static inline int __builtin_ctz(unsigned x) {
|
||||||
|
|
|
@ -254,6 +254,8 @@ namespace H264
|
||||||
m_codecCtx->pv_fxns = (void*)&ih264d_api_function;
|
m_codecCtx->pv_fxns = (void*)&ih264d_api_function;
|
||||||
m_codecCtx->u4_size = sizeof(iv_obj_t);
|
m_codecCtx->u4_size = sizeof(iv_obj_t);
|
||||||
|
|
||||||
|
SetDecoderCoreCount(1);
|
||||||
|
|
||||||
m_isBufferedMode = isBufferedMode;
|
m_isBufferedMode = isBufferedMode;
|
||||||
|
|
||||||
UpdateParameters(false);
|
UpdateParameters(false);
|
||||||
|
@ -278,6 +280,19 @@ namespace H264
|
||||||
m_codecCtx = nullptr;
|
m_codecCtx = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SetDecoderCoreCount(uint32 coreCount)
|
||||||
|
{
|
||||||
|
ih264d_ctl_set_num_cores_ip_t s_set_cores_ip;
|
||||||
|
ih264d_ctl_set_num_cores_op_t s_set_cores_op;
|
||||||
|
s_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL;
|
||||||
|
s_set_cores_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IH264D_CMD_CTL_SET_NUM_CORES;
|
||||||
|
s_set_cores_ip.u4_num_cores = coreCount; // valid numbers are 1-4
|
||||||
|
s_set_cores_ip.u4_size = sizeof(ih264d_ctl_set_num_cores_ip_t);
|
||||||
|
s_set_cores_op.u4_size = sizeof(ih264d_ctl_set_num_cores_op_t);
|
||||||
|
IV_API_CALL_STATUS_T status = ih264d_api_function(m_codecCtx, (void *)&s_set_cores_ip, (void *)&s_set_cores_op);
|
||||||
|
cemu_assert(status == IV_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
static bool GetImageInfo(uint8* stream, uint32 length, uint32& imageWidth, uint32& imageHeight)
|
static bool GetImageInfo(uint8* stream, uint32 length, uint32& imageWidth, uint32& imageHeight)
|
||||||
{
|
{
|
||||||
// create temporary decoder
|
// create temporary decoder
|
||||||
|
@ -702,7 +717,6 @@ namespace H264
|
||||||
decodeResult = m_bufferedResults.front();
|
decodeResult = m_bufferedResults.front();
|
||||||
m_bufferedResults.erase(m_bufferedResults.begin());
|
m_bufferedResults.erase(m_bufferedResults.begin());
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
iv_obj_t* m_codecCtx{nullptr};
|
iv_obj_t* m_codecCtx{nullptr};
|
||||||
bool m_hasBufferSizeInfo{ false };
|
bool m_hasBufferSizeInfo{ false };
|
||||||
|
|
Loading…
Reference in New Issue