mirror of https://github.com/cemu-project/Cemu.git
h264: Use asynchronous decoding when possible (#1257)
This commit is contained in:
parent
4b9c7c0d30
commit
f1685eab66
|
@ -374,7 +374,9 @@ add_library(CemuCafe
|
||||||
OS/libs/gx2/GX2_Texture.h
|
OS/libs/gx2/GX2_Texture.h
|
||||||
OS/libs/gx2/GX2_TilingAperture.cpp
|
OS/libs/gx2/GX2_TilingAperture.cpp
|
||||||
OS/libs/h264_avc/H264Dec.cpp
|
OS/libs/h264_avc/H264Dec.cpp
|
||||||
|
OS/libs/h264_avc/H264DecBackendAVC.cpp
|
||||||
OS/libs/h264_avc/h264dec.h
|
OS/libs/h264_avc/h264dec.h
|
||||||
|
OS/libs/h264_avc/H264DecInternal.h
|
||||||
OS/libs/h264_avc/parser
|
OS/libs/h264_avc/parser
|
||||||
OS/libs/h264_avc/parser/H264Parser.cpp
|
OS/libs/h264_avc/parser/H264Parser.cpp
|
||||||
OS/libs/h264_avc/parser/H264Parser.h
|
OS/libs/h264_avc/parser/H264Parser.h
|
||||||
|
|
|
@ -14,13 +14,10 @@ namespace coreinit
|
||||||
return coreinit::MEMAllocFromExpHeapEx(_sysHeapHandle, size, alignment);
|
return coreinit::MEMAllocFromExpHeapEx(_sysHeapHandle, size, alignment);
|
||||||
}
|
}
|
||||||
|
|
||||||
void export_OSAllocFromSystem(PPCInterpreter_t* hCPU)
|
void OSFreeToSystem(void* ptr)
|
||||||
{
|
{
|
||||||
ppcDefineParamU32(size, 0);
|
_sysHeapFreeCounter++;
|
||||||
ppcDefineParamS32(alignment, 1);
|
coreinit::MEMFreeToExpHeap(_sysHeapHandle, ptr);
|
||||||
MEMPTR<void> mem = OSAllocFromSystem(size, alignment);
|
|
||||||
cemuLog_logDebug(LogType::Force, "OSAllocFromSystem(0x{:x}, {}) -> 0x{:08x}", size, alignment, mem.GetMPTR());
|
|
||||||
osLib_returnFromFunction(hCPU, mem.GetMPTR());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void InitSysHeap()
|
void InitSysHeap()
|
||||||
|
@ -34,7 +31,8 @@ namespace coreinit
|
||||||
|
|
||||||
void InitializeSysHeap()
|
void InitializeSysHeap()
|
||||||
{
|
{
|
||||||
osLib_addFunction("coreinit", "OSAllocFromSystem", export_OSAllocFromSystem);
|
cafeExportRegister("h264", OSAllocFromSystem, LogType::CoreinitMem);
|
||||||
|
cafeExportRegister("h264", OSFreeToSystem, LogType::CoreinitMem);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,5 +4,8 @@ namespace coreinit
|
||||||
{
|
{
|
||||||
void InitSysHeap();
|
void InitSysHeap();
|
||||||
|
|
||||||
|
void* OSAllocFromSystem(uint32 size, uint32 alignment);
|
||||||
|
void OSFreeToSystem(void* ptr);
|
||||||
|
|
||||||
void InitializeSysHeap();
|
void InitializeSysHeap();
|
||||||
}
|
}
|
|
@ -1,17 +1,12 @@
|
||||||
#include "Cafe/OS/common/OSCommon.h"
|
#include "Cafe/OS/common/OSCommon.h"
|
||||||
#include "Cafe/HW/Espresso/PPCCallback.h"
|
#include "Cafe/HW/Espresso/PPCCallback.h"
|
||||||
#include "Cafe/OS/libs/h264_avc/parser/H264Parser.h"
|
#include "Cafe/OS/libs/h264_avc/parser/H264Parser.h"
|
||||||
|
#include "Cafe/OS/libs/h264_avc/H264DecInternal.h"
|
||||||
#include "util/highresolutiontimer/HighResolutionTimer.h"
|
#include "util/highresolutiontimer/HighResolutionTimer.h"
|
||||||
#include "Cafe/CafeSystem.h"
|
#include "Cafe/CafeSystem.h"
|
||||||
|
|
||||||
#include "h264dec.h"
|
#include "h264dec.h"
|
||||||
|
|
||||||
extern "C"
|
|
||||||
{
|
|
||||||
#include "../dependencies/ih264d/common/ih264_typedefs.h"
|
|
||||||
#include "../dependencies/ih264d/decoder/ih264d.h"
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class H264DEC_STATUS : uint32
|
enum class H264DEC_STATUS : uint32
|
||||||
{
|
{
|
||||||
SUCCESS = 0x0,
|
SUCCESS = 0x0,
|
||||||
|
@ -33,10 +28,35 @@ namespace H264
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct H264Context
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
MEMPTR<void> ptr{ nullptr };
|
||||||
|
uint32be length{ 0 };
|
||||||
|
float64be timestamp;
|
||||||
|
}BitStream;
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
MEMPTR<void> outputFunc{ nullptr };
|
||||||
|
uint8be outputPerFrame{ 0 }; // whats the default?
|
||||||
|
MEMPTR<void> userMemoryParam{ nullptr };
|
||||||
|
}Param;
|
||||||
|
// misc
|
||||||
|
uint32be sessionHandle;
|
||||||
|
|
||||||
|
// decoder state
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
uint32 numFramesInFlight{0};
|
||||||
|
}decoderState;
|
||||||
|
};
|
||||||
|
|
||||||
uint32 H264DECMemoryRequirement(uint32 codecProfile, uint32 codecLevel, uint32 width, uint32 height, uint32be* sizeRequirementOut)
|
uint32 H264DECMemoryRequirement(uint32 codecProfile, uint32 codecLevel, uint32 width, uint32 height, uint32be* sizeRequirementOut)
|
||||||
{
|
{
|
||||||
if (H264_IsBotW())
|
if (H264_IsBotW())
|
||||||
{
|
{
|
||||||
|
static_assert(sizeof(H264Context) < 256);
|
||||||
*sizeRequirementOut = 256;
|
*sizeRequirementOut = 256;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -169,590 +189,47 @@ namespace H264
|
||||||
return H264DEC_STATUS::BAD_STREAM;
|
return H264DEC_STATUS::BAD_STREAM;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct H264Context
|
|
||||||
{
|
|
||||||
struct
|
|
||||||
{
|
|
||||||
MEMPTR<void> ptr{ nullptr };
|
|
||||||
uint32be length{ 0 };
|
|
||||||
float64be timestamp;
|
|
||||||
}BitStream;
|
|
||||||
struct
|
|
||||||
{
|
|
||||||
MEMPTR<void> outputFunc{ nullptr };
|
|
||||||
uint8be outputPerFrame{ 0 }; // whats the default?
|
|
||||||
MEMPTR<void> userMemoryParam{ nullptr };
|
|
||||||
}Param;
|
|
||||||
// misc
|
|
||||||
uint32be sessionHandle;
|
|
||||||
};
|
|
||||||
|
|
||||||
class H264AVCDecoder
|
|
||||||
{
|
|
||||||
static void* ivd_aligned_malloc(void* ctxt, WORD32 alignment, WORD32 size)
|
|
||||||
{
|
|
||||||
#ifdef _WIN32
|
|
||||||
return _aligned_malloc(size, alignment);
|
|
||||||
#else
|
|
||||||
// alignment is atleast sizeof(void*)
|
|
||||||
alignment = std::max<WORD32>(alignment, sizeof(void*));
|
|
||||||
|
|
||||||
//smallest multiple of 2 at least as large as alignment
|
|
||||||
alignment--;
|
|
||||||
alignment |= alignment << 1;
|
|
||||||
alignment |= alignment >> 1;
|
|
||||||
alignment |= alignment >> 2;
|
|
||||||
alignment |= alignment >> 4;
|
|
||||||
alignment |= alignment >> 8;
|
|
||||||
alignment |= alignment >> 16;
|
|
||||||
alignment ^= (alignment >> 1);
|
|
||||||
|
|
||||||
void* temp;
|
|
||||||
posix_memalign(&temp, (size_t)alignment, (size_t)size);
|
|
||||||
return temp;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static void ivd_aligned_free(void* ctxt, void* buf)
|
|
||||||
{
|
|
||||||
#ifdef _WIN32
|
|
||||||
_aligned_free(buf);
|
|
||||||
#else
|
|
||||||
free(buf);
|
|
||||||
#endif
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
|
||||||
struct DecodeResult
|
|
||||||
{
|
|
||||||
bool frameReady{ false };
|
|
||||||
double timestamp;
|
|
||||||
void* imageOutput;
|
|
||||||
ivd_video_decode_op_t decodeOutput;
|
|
||||||
};
|
|
||||||
|
|
||||||
void Init(bool isBufferedMode)
|
|
||||||
{
|
|
||||||
ih264d_create_ip_t s_create_ip{ 0 };
|
|
||||||
ih264d_create_op_t s_create_op{ 0 };
|
|
||||||
|
|
||||||
s_create_ip.s_ivd_create_ip_t.u4_size = sizeof(ih264d_create_ip_t);
|
|
||||||
s_create_ip.s_ivd_create_ip_t.e_cmd = IVD_CMD_CREATE;
|
|
||||||
s_create_ip.s_ivd_create_ip_t.u4_share_disp_buf = 1; // shared display buffer mode -> We give the decoder a list of buffers that it will use (?)
|
|
||||||
|
|
||||||
s_create_op.s_ivd_create_op_t.u4_size = sizeof(ih264d_create_op_t);
|
|
||||||
s_create_ip.s_ivd_create_ip_t.e_output_format = IV_YUV_420SP_UV;
|
|
||||||
s_create_ip.s_ivd_create_ip_t.pf_aligned_alloc = ivd_aligned_malloc;
|
|
||||||
s_create_ip.s_ivd_create_ip_t.pf_aligned_free = ivd_aligned_free;
|
|
||||||
s_create_ip.s_ivd_create_ip_t.pv_mem_ctxt = NULL;
|
|
||||||
|
|
||||||
WORD32 status = ih264d_api_function(m_codecCtx, &s_create_ip, &s_create_op);
|
|
||||||
cemu_assert(!status);
|
|
||||||
|
|
||||||
m_codecCtx = (iv_obj_t*)s_create_op.s_ivd_create_op_t.pv_handle;
|
|
||||||
m_codecCtx->pv_fxns = (void*)&ih264d_api_function;
|
|
||||||
m_codecCtx->u4_size = sizeof(iv_obj_t);
|
|
||||||
|
|
||||||
SetDecoderCoreCount(1);
|
|
||||||
|
|
||||||
m_isBufferedMode = isBufferedMode;
|
|
||||||
|
|
||||||
UpdateParameters(false);
|
|
||||||
|
|
||||||
m_bufferedResults.clear();
|
|
||||||
m_numDecodedFrames = 0;
|
|
||||||
m_hasBufferSizeInfo = false;
|
|
||||||
m_timestampIndex = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Destroy()
|
|
||||||
{
|
|
||||||
if (!m_codecCtx)
|
|
||||||
return;
|
|
||||||
ih264d_delete_ip_t s_delete_ip{ 0 };
|
|
||||||
ih264d_delete_op_t s_delete_op{ 0 };
|
|
||||||
s_delete_ip.s_ivd_delete_ip_t.u4_size = sizeof(ih264d_delete_ip_t);
|
|
||||||
s_delete_ip.s_ivd_delete_ip_t.e_cmd = IVD_CMD_DELETE;
|
|
||||||
s_delete_op.s_ivd_delete_op_t.u4_size = sizeof(ih264d_delete_op_t);
|
|
||||||
WORD32 status = ih264d_api_function(m_codecCtx, &s_delete_ip, &s_delete_op);
|
|
||||||
cemu_assert_debug(!status);
|
|
||||||
m_codecCtx = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SetDecoderCoreCount(uint32 coreCount)
|
|
||||||
{
|
|
||||||
ih264d_ctl_set_num_cores_ip_t s_set_cores_ip;
|
|
||||||
ih264d_ctl_set_num_cores_op_t s_set_cores_op;
|
|
||||||
s_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL;
|
|
||||||
s_set_cores_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IH264D_CMD_CTL_SET_NUM_CORES;
|
|
||||||
s_set_cores_ip.u4_num_cores = coreCount; // valid numbers are 1-4
|
|
||||||
s_set_cores_ip.u4_size = sizeof(ih264d_ctl_set_num_cores_ip_t);
|
|
||||||
s_set_cores_op.u4_size = sizeof(ih264d_ctl_set_num_cores_op_t);
|
|
||||||
IV_API_CALL_STATUS_T status = ih264d_api_function(m_codecCtx, (void *)&s_set_cores_ip, (void *)&s_set_cores_op);
|
|
||||||
cemu_assert(status == IV_SUCCESS);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool GetImageInfo(uint8* stream, uint32 length, uint32& imageWidth, uint32& imageHeight)
|
|
||||||
{
|
|
||||||
// create temporary decoder
|
|
||||||
ih264d_create_ip_t s_create_ip{ 0 };
|
|
||||||
ih264d_create_op_t s_create_op{ 0 };
|
|
||||||
s_create_ip.s_ivd_create_ip_t.u4_size = sizeof(ih264d_create_ip_t);
|
|
||||||
s_create_ip.s_ivd_create_ip_t.e_cmd = IVD_CMD_CREATE;
|
|
||||||
s_create_ip.s_ivd_create_ip_t.u4_share_disp_buf = 0;
|
|
||||||
s_create_op.s_ivd_create_op_t.u4_size = sizeof(ih264d_create_op_t);
|
|
||||||
s_create_ip.s_ivd_create_ip_t.e_output_format = IV_YUV_420SP_UV;
|
|
||||||
s_create_ip.s_ivd_create_ip_t.pf_aligned_alloc = ivd_aligned_malloc;
|
|
||||||
s_create_ip.s_ivd_create_ip_t.pf_aligned_free = ivd_aligned_free;
|
|
||||||
s_create_ip.s_ivd_create_ip_t.pv_mem_ctxt = NULL;
|
|
||||||
iv_obj_t* ctx = nullptr;
|
|
||||||
WORD32 status = ih264d_api_function(ctx, &s_create_ip, &s_create_op);
|
|
||||||
cemu_assert_debug(!status);
|
|
||||||
if (status != IV_SUCCESS)
|
|
||||||
return false;
|
|
||||||
ctx = (iv_obj_t*)s_create_op.s_ivd_create_op_t.pv_handle;
|
|
||||||
ctx->pv_fxns = (void*)&ih264d_api_function;
|
|
||||||
ctx->u4_size = sizeof(iv_obj_t);
|
|
||||||
// set header-only mode
|
|
||||||
ih264d_ctl_set_config_ip_t s_h264d_ctl_ip{ 0 };
|
|
||||||
ih264d_ctl_set_config_op_t s_h264d_ctl_op{ 0 };
|
|
||||||
ivd_ctl_set_config_ip_t* ps_ctl_ip = &s_h264d_ctl_ip.s_ivd_ctl_set_config_ip_t;
|
|
||||||
ivd_ctl_set_config_op_t* ps_ctl_op = &s_h264d_ctl_op.s_ivd_ctl_set_config_op_t;
|
|
||||||
ps_ctl_ip->u4_disp_wd = 0;
|
|
||||||
ps_ctl_ip->e_frm_skip_mode = IVD_SKIP_NONE;
|
|
||||||
ps_ctl_ip->e_frm_out_mode = IVD_DISPLAY_FRAME_OUT;
|
|
||||||
ps_ctl_ip->e_vid_dec_mode = IVD_DECODE_HEADER;
|
|
||||||
ps_ctl_ip->e_cmd = IVD_CMD_VIDEO_CTL;
|
|
||||||
ps_ctl_ip->e_sub_cmd = IVD_CMD_CTL_SETPARAMS;
|
|
||||||
ps_ctl_ip->u4_size = sizeof(ih264d_ctl_set_config_ip_t);
|
|
||||||
ps_ctl_op->u4_size = sizeof(ih264d_ctl_set_config_op_t);
|
|
||||||
status = ih264d_api_function(ctx, &s_h264d_ctl_ip, &s_h264d_ctl_op);
|
|
||||||
cemu_assert(!status);
|
|
||||||
// decode stream
|
|
||||||
ivd_video_decode_ip_t s_dec_ip{ 0 };
|
|
||||||
ivd_video_decode_op_t s_dec_op{ 0 };
|
|
||||||
s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t);
|
|
||||||
s_dec_op.u4_size = sizeof(ivd_video_decode_op_t);
|
|
||||||
s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE;
|
|
||||||
s_dec_ip.pv_stream_buffer = stream;
|
|
||||||
s_dec_ip.u4_num_Bytes = length;
|
|
||||||
s_dec_ip.s_out_buffer.u4_num_bufs = 0;
|
|
||||||
|
|
||||||
s_dec_op.u4_raw_wd = 0;
|
|
||||||
s_dec_op.u4_raw_ht = 0;
|
|
||||||
|
|
||||||
status = ih264d_api_function(ctx, &s_dec_ip, &s_dec_op);
|
|
||||||
//cemu_assert(status == 0); -> This errors when not both the headers are present, but it will still set the parameters we need
|
|
||||||
bool isValid = false;
|
|
||||||
if (true)//status == 0)
|
|
||||||
{
|
|
||||||
imageWidth = s_dec_op.u4_raw_wd;
|
|
||||||
imageHeight = s_dec_op.u4_raw_ht;
|
|
||||||
cemu_assert_debug(imageWidth != 0 && imageHeight != 0);
|
|
||||||
isValid = true;
|
|
||||||
}
|
|
||||||
// destroy decoder
|
|
||||||
ih264d_delete_ip_t s_delete_ip{ 0 };
|
|
||||||
ih264d_delete_op_t s_delete_op{ 0 };
|
|
||||||
s_delete_ip.s_ivd_delete_ip_t.u4_size = sizeof(ih264d_delete_ip_t);
|
|
||||||
s_delete_ip.s_ivd_delete_ip_t.e_cmd = IVD_CMD_DELETE;
|
|
||||||
s_delete_op.s_ivd_delete_op_t.u4_size = sizeof(ih264d_delete_op_t);
|
|
||||||
status = ih264d_api_function(ctx, &s_delete_ip, &s_delete_op);
|
|
||||||
cemu_assert_debug(!status);
|
|
||||||
return isValid;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Decode(void* data, uint32 length, double timestamp, void* imageOutput, DecodeResult& decodeResult)
|
|
||||||
{
|
|
||||||
if (!m_hasBufferSizeInfo)
|
|
||||||
{
|
|
||||||
uint32 numByteConsumed = 0;
|
|
||||||
if (!DetermineBufferSizes(data, length, numByteConsumed))
|
|
||||||
{
|
|
||||||
cemuLog_log(LogType::Force, "H264: Unable to determine picture size. Ignoring decode input");
|
|
||||||
decodeResult.frameReady = false;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
length -= numByteConsumed;
|
|
||||||
data = (uint8*)data + numByteConsumed;
|
|
||||||
m_hasBufferSizeInfo = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
ivd_video_decode_ip_t s_dec_ip{ 0 };
|
|
||||||
ivd_video_decode_op_t s_dec_op{ 0 };
|
|
||||||
s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t);
|
|
||||||
s_dec_op.u4_size = sizeof(ivd_video_decode_op_t);
|
|
||||||
|
|
||||||
s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE;
|
|
||||||
|
|
||||||
// remember timestamp and associated output buffer
|
|
||||||
m_timestamps[m_timestampIndex] = timestamp;
|
|
||||||
m_imageBuffers[m_timestampIndex] = imageOutput;
|
|
||||||
s_dec_ip.u4_ts = m_timestampIndex;
|
|
||||||
m_timestampIndex = (m_timestampIndex + 1) % 64;
|
|
||||||
|
|
||||||
s_dec_ip.pv_stream_buffer = (uint8*)data;
|
|
||||||
s_dec_ip.u4_num_Bytes = length;
|
|
||||||
|
|
||||||
s_dec_ip.s_out_buffer.u4_min_out_buf_size[0] = 0;
|
|
||||||
s_dec_ip.s_out_buffer.u4_min_out_buf_size[1] = 0;
|
|
||||||
s_dec_ip.s_out_buffer.u4_num_bufs = 0;
|
|
||||||
|
|
||||||
BenchmarkTimer bt;
|
|
||||||
bt.Start();
|
|
||||||
WORD32 status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op);
|
|
||||||
if (status != 0 && (s_dec_op.u4_error_code&0xFF) == IVD_RES_CHANGED)
|
|
||||||
{
|
|
||||||
// resolution change
|
|
||||||
ResetDecoder();
|
|
||||||
m_hasBufferSizeInfo = false;
|
|
||||||
Decode(data, length, timestamp, imageOutput, decodeResult);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
else if (status != 0)
|
|
||||||
{
|
|
||||||
cemuLog_log(LogType::Force, "H264: Failed to decode frame (error 0x{:08x})", status);
|
|
||||||
decodeResult.frameReady = false;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
bt.Stop();
|
|
||||||
double decodeTime = bt.GetElapsedMilliseconds();
|
|
||||||
|
|
||||||
cemu_assert(s_dec_op.u4_frame_decoded_flag);
|
|
||||||
cemu_assert_debug(s_dec_op.u4_num_bytes_consumed == length);
|
|
||||||
|
|
||||||
cemu_assert_debug(m_isBufferedMode || s_dec_op.u4_output_present); // if buffered mode is disabled, then every input should output a frame (except for partial slices?)
|
|
||||||
|
|
||||||
if (s_dec_op.u4_output_present)
|
|
||||||
{
|
|
||||||
cemu_assert(s_dec_op.e_output_format == IV_YUV_420SP_UV);
|
|
||||||
if (H264_IsBotW())
|
|
||||||
{
|
|
||||||
if (s_dec_op.s_disp_frm_buf.u4_y_wd == 1920 && s_dec_op.s_disp_frm_buf.u4_y_ht == 1088)
|
|
||||||
s_dec_op.s_disp_frm_buf.u4_y_ht = 1080;
|
|
||||||
}
|
|
||||||
DecodeResult tmpResult;
|
|
||||||
tmpResult.frameReady = s_dec_op.u4_output_present != 0;
|
|
||||||
tmpResult.timestamp = m_timestamps[s_dec_op.u4_ts];
|
|
||||||
tmpResult.imageOutput = m_imageBuffers[s_dec_op.u4_ts];
|
|
||||||
tmpResult.decodeOutput = s_dec_op;
|
|
||||||
AddBufferedResult(tmpResult);
|
|
||||||
// transfer image to PPC output buffer and also correct stride
|
|
||||||
bt.Start();
|
|
||||||
CopyImageToResultBuffer((uint8*)s_dec_op.s_disp_frm_buf.pv_y_buf, (uint8*)s_dec_op.s_disp_frm_buf.pv_u_buf, (uint8*)m_imageBuffers[s_dec_op.u4_ts], s_dec_op);
|
|
||||||
bt.Stop();
|
|
||||||
double copyTime = bt.GetElapsedMilliseconds();
|
|
||||||
// release buffer
|
|
||||||
sint32 bufferId = -1;
|
|
||||||
for (size_t i = 0; i < m_displayBuf.size(); i++)
|
|
||||||
{
|
|
||||||
if (s_dec_op.s_disp_frm_buf.pv_y_buf >= m_displayBuf[i].data() && s_dec_op.s_disp_frm_buf.pv_y_buf < (m_displayBuf[i].data() + m_displayBuf[i].size()))
|
|
||||||
{
|
|
||||||
bufferId = (sint32)i;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
cemu_assert_debug(bufferId == s_dec_op.u4_disp_buf_id);
|
|
||||||
cemu_assert(bufferId >= 0);
|
|
||||||
ivd_rel_display_frame_ip_t s_video_rel_disp_ip{ 0 };
|
|
||||||
ivd_rel_display_frame_op_t s_video_rel_disp_op{ 0 };
|
|
||||||
s_video_rel_disp_ip.e_cmd = IVD_CMD_REL_DISPLAY_FRAME;
|
|
||||||
s_video_rel_disp_ip.u4_size = sizeof(ivd_rel_display_frame_ip_t);
|
|
||||||
s_video_rel_disp_op.u4_size = sizeof(ivd_rel_display_frame_op_t);
|
|
||||||
s_video_rel_disp_ip.u4_disp_buf_id = bufferId;
|
|
||||||
status = ih264d_api_function(m_codecCtx, &s_video_rel_disp_ip, &s_video_rel_disp_op);
|
|
||||||
cemu_assert(!status);
|
|
||||||
|
|
||||||
cemuLog_log(LogType::H264, "H264Bench | DecodeTime {}ms CopyTime {}ms", decodeTime, copyTime);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
cemuLog_log(LogType::H264, "H264Bench | DecodeTime{}ms", decodeTime);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (s_dec_op.u4_frame_decoded_flag)
|
|
||||||
m_numDecodedFrames++;
|
|
||||||
|
|
||||||
if (m_isBufferedMode)
|
|
||||||
{
|
|
||||||
// in buffered mode, always buffer 5 frames regardless of actual reordering and decoder latency
|
|
||||||
if (m_numDecodedFrames > 5)
|
|
||||||
GetCurrentBufferedResult(decodeResult);
|
|
||||||
}
|
|
||||||
else if(m_numDecodedFrames > 0)
|
|
||||||
GetCurrentBufferedResult(decodeResult);
|
|
||||||
|
|
||||||
// get VUI
|
|
||||||
//ih264d_ctl_get_vui_params_ip_t s_ctl_get_vui_params_ip;
|
|
||||||
//ih264d_ctl_get_vui_params_op_t s_ctl_get_vui_params_op;
|
|
||||||
|
|
||||||
//s_ctl_get_vui_params_ip.e_cmd = IVD_CMD_VIDEO_CTL;
|
|
||||||
//s_ctl_get_vui_params_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IH264D_CMD_CTL_GET_VUI_PARAMS;
|
|
||||||
//s_ctl_get_vui_params_ip.u4_size = sizeof(ih264d_ctl_get_vui_params_ip_t);
|
|
||||||
//s_ctl_get_vui_params_op.u4_size = sizeof(ih264d_ctl_get_vui_params_op_t);
|
|
||||||
|
|
||||||
//status = ih264d_api_function(mCodecCtx, &s_ctl_get_vui_params_ip, &s_ctl_get_vui_params_op);
|
|
||||||
//cemu_assert(status == 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<DecodeResult> Flush()
|
|
||||||
{
|
|
||||||
std::vector<DecodeResult> results;
|
|
||||||
// set flush mode
|
|
||||||
ivd_ctl_flush_ip_t s_video_flush_ip{ 0 };
|
|
||||||
ivd_ctl_flush_op_t s_video_flush_op{ 0 };
|
|
||||||
s_video_flush_ip.e_cmd = IVD_CMD_VIDEO_CTL;
|
|
||||||
s_video_flush_ip.e_sub_cmd = IVD_CMD_CTL_FLUSH;
|
|
||||||
s_video_flush_ip.u4_size = sizeof(ivd_ctl_flush_ip_t);
|
|
||||||
s_video_flush_op.u4_size = sizeof(ivd_ctl_flush_op_t);
|
|
||||||
WORD32 status = ih264d_api_function(m_codecCtx, &s_video_flush_ip, &s_video_flush_op);
|
|
||||||
if (status != 0)
|
|
||||||
cemuLog_log(LogType::Force, "H264Dec: Unexpected error during flush ({})", status);
|
|
||||||
// get all frames from the codec
|
|
||||||
while (true)
|
|
||||||
{
|
|
||||||
ivd_video_decode_ip_t s_dec_ip{ 0 };
|
|
||||||
ivd_video_decode_op_t s_dec_op{ 0 };
|
|
||||||
s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t);
|
|
||||||
s_dec_op.u4_size = sizeof(ivd_video_decode_op_t);
|
|
||||||
s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE;
|
|
||||||
s_dec_ip.pv_stream_buffer = NULL;
|
|
||||||
s_dec_ip.u4_num_Bytes = 0;
|
|
||||||
s_dec_ip.s_out_buffer.u4_min_out_buf_size[0] = 0;
|
|
||||||
s_dec_ip.s_out_buffer.u4_min_out_buf_size[1] = 0;
|
|
||||||
s_dec_ip.s_out_buffer.u4_num_bufs = 0;
|
|
||||||
status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op);
|
|
||||||
if (status != 0)
|
|
||||||
break;
|
|
||||||
cemu_assert_debug(s_dec_op.u4_output_present != 0); // should never be zero?
|
|
||||||
if(s_dec_op.u4_output_present == 0)
|
|
||||||
continue;
|
|
||||||
if (H264_IsBotW())
|
|
||||||
{
|
|
||||||
if (s_dec_op.s_disp_frm_buf.u4_y_wd == 1920 && s_dec_op.s_disp_frm_buf.u4_y_ht == 1088)
|
|
||||||
s_dec_op.s_disp_frm_buf.u4_y_ht = 1080;
|
|
||||||
}
|
|
||||||
DecodeResult tmpResult;
|
|
||||||
tmpResult.frameReady = s_dec_op.u4_output_present != 0;
|
|
||||||
tmpResult.timestamp = m_timestamps[s_dec_op.u4_ts];
|
|
||||||
tmpResult.imageOutput = m_imageBuffers[s_dec_op.u4_ts];
|
|
||||||
tmpResult.decodeOutput = s_dec_op;
|
|
||||||
AddBufferedResult(tmpResult);
|
|
||||||
CopyImageToResultBuffer((uint8*)s_dec_op.s_disp_frm_buf.pv_y_buf, (uint8*)s_dec_op.s_disp_frm_buf.pv_u_buf, (uint8*)m_imageBuffers[s_dec_op.u4_ts], s_dec_op);
|
|
||||||
}
|
|
||||||
results = std::move(m_bufferedResults);
|
|
||||||
return results;
|
|
||||||
}
|
|
||||||
|
|
||||||
void CopyImageToResultBuffer(uint8* yIn, uint8* uvIn, uint8* bufOut, ivd_video_decode_op_t& decodeInfo)
|
|
||||||
{
|
|
||||||
uint32 imageWidth = decodeInfo.s_disp_frm_buf.u4_y_wd;
|
|
||||||
uint32 imageHeight = decodeInfo.s_disp_frm_buf.u4_y_ht;
|
|
||||||
|
|
||||||
size_t inputStride = decodeInfo.s_disp_frm_buf.u4_y_strd;
|
|
||||||
size_t outputStride = (imageWidth + 0xFF) & ~0xFF;
|
|
||||||
|
|
||||||
// copy Y
|
|
||||||
uint8* yOut = bufOut;
|
|
||||||
for (uint32 row = 0; row < imageHeight; row++)
|
|
||||||
{
|
|
||||||
memcpy(yOut, yIn, imageWidth);
|
|
||||||
yIn += inputStride;
|
|
||||||
yOut += outputStride;
|
|
||||||
}
|
|
||||||
|
|
||||||
// copy UV
|
|
||||||
uint8* uvOut = bufOut + outputStride * imageHeight;
|
|
||||||
for (uint32 row = 0; row < imageHeight/2; row++)
|
|
||||||
{
|
|
||||||
memcpy(uvOut, uvIn, imageWidth);
|
|
||||||
uvIn += inputStride;
|
|
||||||
uvOut += outputStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
|
|
||||||
bool DetermineBufferSizes(void* data, uint32 length, uint32& numByteConsumed)
|
|
||||||
{
|
|
||||||
numByteConsumed = 0;
|
|
||||||
UpdateParameters(true);
|
|
||||||
|
|
||||||
ivd_video_decode_ip_t s_dec_ip{ 0 };
|
|
||||||
ivd_video_decode_op_t s_dec_op{ 0 };
|
|
||||||
s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t);
|
|
||||||
s_dec_op.u4_size = sizeof(ivd_video_decode_op_t);
|
|
||||||
|
|
||||||
s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE;
|
|
||||||
s_dec_ip.pv_stream_buffer = (uint8*)data;
|
|
||||||
s_dec_ip.u4_num_Bytes = length;
|
|
||||||
s_dec_ip.s_out_buffer.u4_num_bufs = 0;
|
|
||||||
WORD32 status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op);
|
|
||||||
if (status != 0)
|
|
||||||
{
|
|
||||||
cemuLog_log(LogType::Force, "H264: Unable to determine buffer sizes for stream");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
numByteConsumed = s_dec_op.u4_num_bytes_consumed;
|
|
||||||
cemu_assert(status == 0);
|
|
||||||
if (s_dec_op.u4_pic_wd == 0 || s_dec_op.u4_pic_ht == 0)
|
|
||||||
return false;
|
|
||||||
UpdateParameters(false);
|
|
||||||
ReinitBuffers();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ReinitBuffers()
|
|
||||||
{
|
|
||||||
ivd_ctl_getbufinfo_ip_t s_ctl_ip{ 0 };
|
|
||||||
ivd_ctl_getbufinfo_op_t s_ctl_op{ 0 };
|
|
||||||
WORD32 outlen = 0;
|
|
||||||
|
|
||||||
s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
|
|
||||||
s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_GETBUFINFO;
|
|
||||||
s_ctl_ip.u4_size = sizeof(ivd_ctl_getbufinfo_ip_t);
|
|
||||||
s_ctl_op.u4_size = sizeof(ivd_ctl_getbufinfo_op_t);
|
|
||||||
|
|
||||||
WORD32 status = ih264d_api_function(m_codecCtx, &s_ctl_ip, &s_ctl_op);
|
|
||||||
cemu_assert(!status);
|
|
||||||
|
|
||||||
// allocate
|
|
||||||
for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++)
|
|
||||||
{
|
|
||||||
m_displayBuf.emplace_back().resize(s_ctl_op.u4_min_out_buf_size[0] + s_ctl_op.u4_min_out_buf_size[1]);
|
|
||||||
}
|
|
||||||
// set
|
|
||||||
ivd_set_display_frame_ip_t s_set_display_frame_ip{ 0 }; // make sure to zero-initialize this. The codec seems to check the first 3 pointers/sizes per frame, regardless of the value of u4_num_bufs
|
|
||||||
ivd_set_display_frame_op_t s_set_display_frame_op{ 0 };
|
|
||||||
|
|
||||||
s_set_display_frame_ip.e_cmd = IVD_CMD_SET_DISPLAY_FRAME;
|
|
||||||
s_set_display_frame_ip.u4_size = sizeof(ivd_set_display_frame_ip_t);
|
|
||||||
s_set_display_frame_op.u4_size = sizeof(ivd_set_display_frame_op_t);
|
|
||||||
|
|
||||||
cemu_assert_debug(s_ctl_op.u4_min_num_out_bufs == 2);
|
|
||||||
cemu_assert_debug(s_ctl_op.u4_min_out_buf_size[0] != 0 && s_ctl_op.u4_min_out_buf_size[1] != 0);
|
|
||||||
|
|
||||||
s_set_display_frame_ip.num_disp_bufs = s_ctl_op.u4_num_disp_bufs;
|
|
||||||
|
|
||||||
for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++)
|
|
||||||
{
|
|
||||||
s_set_display_frame_ip.s_disp_buffer[i].u4_num_bufs = 2;
|
|
||||||
s_set_display_frame_ip.s_disp_buffer[i].u4_min_out_buf_size[0] = s_ctl_op.u4_min_out_buf_size[0];
|
|
||||||
s_set_display_frame_ip.s_disp_buffer[i].u4_min_out_buf_size[1] = s_ctl_op.u4_min_out_buf_size[1];
|
|
||||||
s_set_display_frame_ip.s_disp_buffer[i].pu1_bufs[0] = m_displayBuf[i].data() + 0;
|
|
||||||
s_set_display_frame_ip.s_disp_buffer[i].pu1_bufs[1] = m_displayBuf[i].data() + s_ctl_op.u4_min_out_buf_size[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
status = ih264d_api_function(m_codecCtx, &s_set_display_frame_ip, &s_set_display_frame_op);
|
|
||||||
cemu_assert(!status);
|
|
||||||
|
|
||||||
|
|
||||||
// mark all as released (available)
|
|
||||||
for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++)
|
|
||||||
{
|
|
||||||
ivd_rel_display_frame_ip_t s_video_rel_disp_ip{ 0 };
|
|
||||||
ivd_rel_display_frame_op_t s_video_rel_disp_op{ 0 };
|
|
||||||
|
|
||||||
s_video_rel_disp_ip.e_cmd = IVD_CMD_REL_DISPLAY_FRAME;
|
|
||||||
s_video_rel_disp_ip.u4_size = sizeof(ivd_rel_display_frame_ip_t);
|
|
||||||
s_video_rel_disp_op.u4_size = sizeof(ivd_rel_display_frame_op_t);
|
|
||||||
s_video_rel_disp_ip.u4_disp_buf_id = i;
|
|
||||||
|
|
||||||
status = ih264d_api_function(m_codecCtx, &s_video_rel_disp_ip, &s_video_rel_disp_op);
|
|
||||||
cemu_assert(!status);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void ResetDecoder()
|
|
||||||
{
|
|
||||||
ivd_ctl_reset_ip_t s_ctl_ip;
|
|
||||||
ivd_ctl_reset_op_t s_ctl_op;
|
|
||||||
|
|
||||||
s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
|
|
||||||
s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_RESET;
|
|
||||||
s_ctl_ip.u4_size = sizeof(ivd_ctl_reset_ip_t);
|
|
||||||
s_ctl_op.u4_size = sizeof(ivd_ctl_reset_op_t);
|
|
||||||
|
|
||||||
WORD32 status = ih264d_api_function(m_codecCtx, (void*)&s_ctl_ip, (void*)&s_ctl_op);
|
|
||||||
cemu_assert_debug(status == 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void UpdateParameters(bool headerDecodeOnly)
|
|
||||||
{
|
|
||||||
ih264d_ctl_set_config_ip_t s_h264d_ctl_ip{ 0 };
|
|
||||||
ih264d_ctl_set_config_op_t s_h264d_ctl_op{ 0 };
|
|
||||||
ivd_ctl_set_config_ip_t* ps_ctl_ip = &s_h264d_ctl_ip.s_ivd_ctl_set_config_ip_t;
|
|
||||||
ivd_ctl_set_config_op_t* ps_ctl_op = &s_h264d_ctl_op.s_ivd_ctl_set_config_op_t;
|
|
||||||
|
|
||||||
ps_ctl_ip->u4_disp_wd = 0;
|
|
||||||
ps_ctl_ip->e_frm_skip_mode = IVD_SKIP_NONE;
|
|
||||||
ps_ctl_ip->e_frm_out_mode = m_isBufferedMode ? IVD_DISPLAY_FRAME_OUT : IVD_DECODE_FRAME_OUT;
|
|
||||||
ps_ctl_ip->e_vid_dec_mode = headerDecodeOnly ? IVD_DECODE_HEADER : IVD_DECODE_FRAME;
|
|
||||||
ps_ctl_ip->e_cmd = IVD_CMD_VIDEO_CTL;
|
|
||||||
ps_ctl_ip->e_sub_cmd = IVD_CMD_CTL_SETPARAMS;
|
|
||||||
ps_ctl_ip->u4_size = sizeof(ih264d_ctl_set_config_ip_t);
|
|
||||||
ps_ctl_op->u4_size = sizeof(ih264d_ctl_set_config_op_t);
|
|
||||||
|
|
||||||
WORD32 status = ih264d_api_function(m_codecCtx, &s_h264d_ctl_ip, &s_h264d_ctl_op);
|
|
||||||
cemu_assert(status == 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* In non-flush mode we have a delay of (at least?) 5 frames */
|
|
||||||
void AddBufferedResult(DecodeResult& decodeResult)
|
|
||||||
{
|
|
||||||
if (decodeResult.frameReady)
|
|
||||||
m_bufferedResults.emplace_back(decodeResult);
|
|
||||||
}
|
|
||||||
|
|
||||||
void GetCurrentBufferedResult(DecodeResult& decodeResult)
|
|
||||||
{
|
|
||||||
cemu_assert(!m_bufferedResults.empty());
|
|
||||||
if (m_bufferedResults.empty())
|
|
||||||
{
|
|
||||||
decodeResult.frameReady = false;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
decodeResult = m_bufferedResults.front();
|
|
||||||
m_bufferedResults.erase(m_bufferedResults.begin());
|
|
||||||
}
|
|
||||||
private:
|
|
||||||
iv_obj_t* m_codecCtx{nullptr};
|
|
||||||
bool m_hasBufferSizeInfo{ false };
|
|
||||||
bool m_isBufferedMode{ false };
|
|
||||||
double m_timestamps[64];
|
|
||||||
void* m_imageBuffers[64];
|
|
||||||
uint32 m_timestampIndex{0};
|
|
||||||
std::vector<DecodeResult> m_bufferedResults;
|
|
||||||
uint32 m_numDecodedFrames{0};
|
|
||||||
std::vector<std::vector<uint8>> m_displayBuf;
|
|
||||||
};
|
|
||||||
|
|
||||||
H264DEC_STATUS H264DECGetImageSize(uint8* stream, uint32 length, uint32 offset, uint32be* outputWidth, uint32be* outputHeight)
|
H264DEC_STATUS H264DECGetImageSize(uint8* stream, uint32 length, uint32 offset, uint32be* outputWidth, uint32be* outputHeight)
|
||||||
{
|
{
|
||||||
cemu_assert(offset <= length);
|
if(!stream || length < 4 || !outputWidth || !outputHeight)
|
||||||
|
return H264DEC_STATUS::INVALID_PARAM;
|
||||||
uint32 imageWidth, imageHeight;
|
if( (offset+4) > length )
|
||||||
|
return H264DEC_STATUS::INVALID_PARAM;
|
||||||
if (H264AVCDecoder::GetImageInfo(stream, length, imageWidth, imageHeight))
|
uint8* cur = stream + offset;
|
||||||
|
uint8* end = stream + length;
|
||||||
|
cur += 2; // we access cur[-2] and cur[-1] so we need to start at offset 2
|
||||||
|
while(cur < end-2)
|
||||||
{
|
{
|
||||||
if (H264_IsBotW())
|
// check for start code
|
||||||
|
if(*cur != 1)
|
||||||
{
|
{
|
||||||
if (imageWidth == 1920 && imageHeight == 1088)
|
cur++;
|
||||||
imageHeight = 1080;
|
continue;
|
||||||
}
|
}
|
||||||
*outputWidth = imageWidth;
|
// check if this is a valid NAL header
|
||||||
*outputHeight = imageHeight;
|
if(cur[-2] != 0 || cur[-1] != 0 || cur[0] != 1)
|
||||||
|
{
|
||||||
|
cur++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
uint8 nalHeader = cur[1];
|
||||||
|
if((nalHeader & 0x1F) != 7)
|
||||||
|
{
|
||||||
|
cur++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
h264State_seq_parameter_set_t psp;
|
||||||
|
bool r = h264Parser_ParseSPS(cur+2, end-cur-2, psp);
|
||||||
|
if(!r)
|
||||||
|
{
|
||||||
|
cemu_assert_suspicious(); // should not happen
|
||||||
|
return H264DEC_STATUS::BAD_STREAM;
|
||||||
|
}
|
||||||
|
*outputWidth = (psp.pic_width_in_mbs_minus1 + 1) * 16;
|
||||||
|
*outputHeight = (psp.pic_height_in_map_units_minus1 + 1) * 16; // affected by frame_mbs_only_flag?
|
||||||
|
return H264DEC_STATUS::SUCCESS;
|
||||||
}
|
}
|
||||||
else
|
return H264DEC_STATUS::BAD_STREAM;
|
||||||
{
|
|
||||||
*outputWidth = 0;
|
|
||||||
*outputHeight = 0;
|
|
||||||
return H264DEC_STATUS::BAD_STREAM;
|
|
||||||
}
|
|
||||||
|
|
||||||
return H264DEC_STATUS::SUCCESS;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 H264DECInitParam(uint32 workMemorySize, void* workMemory)
|
uint32 H264DECInitParam(uint32 workMemorySize, void* workMemory)
|
||||||
|
@ -762,26 +239,28 @@ namespace H264
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unordered_map<uint32, H264AVCDecoder*> sDecoderSessions;
|
std::unordered_map<uint32, H264DecoderBackend*> sDecoderSessions;
|
||||||
std::mutex sDecoderSessionsMutex;
|
std::mutex sDecoderSessionsMutex;
|
||||||
std::atomic_uint32_t sCurrentSessionHandle{ 1 };
|
std::atomic_uint32_t sCurrentSessionHandle{ 1 };
|
||||||
|
|
||||||
static H264AVCDecoder* _CreateDecoderSession(uint32& handleOut)
|
H264DecoderBackend* CreateAVCDecoder();
|
||||||
|
|
||||||
|
static H264DecoderBackend* _CreateDecoderSession(uint32& handleOut)
|
||||||
{
|
{
|
||||||
std::unique_lock _lock(sDecoderSessionsMutex);
|
std::unique_lock _lock(sDecoderSessionsMutex);
|
||||||
handleOut = sCurrentSessionHandle.fetch_add(1);
|
handleOut = sCurrentSessionHandle.fetch_add(1);
|
||||||
H264AVCDecoder* session = new H264AVCDecoder();
|
H264DecoderBackend* session = CreateAVCDecoder();
|
||||||
sDecoderSessions.try_emplace(handleOut, session);
|
sDecoderSessions.try_emplace(handleOut, session);
|
||||||
return session;
|
return session;
|
||||||
}
|
}
|
||||||
|
|
||||||
static H264AVCDecoder* _AcquireDecoderSession(uint32 handle)
|
static H264DecoderBackend* _AcquireDecoderSession(uint32 handle)
|
||||||
{
|
{
|
||||||
std::unique_lock _lock(sDecoderSessionsMutex);
|
std::unique_lock _lock(sDecoderSessionsMutex);
|
||||||
auto it = sDecoderSessions.find(handle);
|
auto it = sDecoderSessions.find(handle);
|
||||||
if (it == sDecoderSessions.end())
|
if (it == sDecoderSessions.end())
|
||||||
return nullptr;
|
return nullptr;
|
||||||
H264AVCDecoder* session = it->second;
|
H264DecoderBackend* session = it->second;
|
||||||
if (sDecoderSessions.size() >= 5)
|
if (sDecoderSessions.size() >= 5)
|
||||||
{
|
{
|
||||||
cemuLog_log(LogType::Force, "H264: Warning - more than 5 active sessions");
|
cemuLog_log(LogType::Force, "H264: Warning - more than 5 active sessions");
|
||||||
|
@ -790,7 +269,7 @@ namespace H264
|
||||||
return session;
|
return session;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void _ReleaseDecoderSession(H264AVCDecoder* session)
|
static void _ReleaseDecoderSession(H264DecoderBackend* session)
|
||||||
{
|
{
|
||||||
std::unique_lock _lock(sDecoderSessionsMutex);
|
std::unique_lock _lock(sDecoderSessionsMutex);
|
||||||
|
|
||||||
|
@ -802,7 +281,7 @@ namespace H264
|
||||||
auto it = sDecoderSessions.find(handle);
|
auto it = sDecoderSessions.find(handle);
|
||||||
if (it == sDecoderSessions.end())
|
if (it == sDecoderSessions.end())
|
||||||
return;
|
return;
|
||||||
H264AVCDecoder* session = it->second;
|
H264DecoderBackend* session = it->second;
|
||||||
session->Destroy();
|
session->Destroy();
|
||||||
delete session;
|
delete session;
|
||||||
sDecoderSessions.erase(it);
|
sDecoderSessions.erase(it);
|
||||||
|
@ -830,45 +309,44 @@ namespace H264
|
||||||
uint32 H264DECBegin(void* workMemory)
|
uint32 H264DECBegin(void* workMemory)
|
||||||
{
|
{
|
||||||
H264Context* ctx = (H264Context*)workMemory;
|
H264Context* ctx = (H264Context*)workMemory;
|
||||||
H264AVCDecoder* session = _AcquireDecoderSession(ctx->sessionHandle);
|
H264DecoderBackend* session = _AcquireDecoderSession(ctx->sessionHandle);
|
||||||
if (!session)
|
if (!session)
|
||||||
{
|
{
|
||||||
cemuLog_log(LogType::Force, "H264DECBegin(): Invalid session");
|
cemuLog_log(LogType::Force, "H264DECBegin(): Invalid session");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
session->Init(ctx->Param.outputPerFrame == 0);
|
session->Init(ctx->Param.outputPerFrame == 0);
|
||||||
|
ctx->decoderState.numFramesInFlight = 0;
|
||||||
_ReleaseDecoderSession(session);
|
_ReleaseDecoderSession(session);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void H264DoFrameOutputCallback(H264Context* ctx, H264AVCDecoder::DecodeResult& decodeResult);
|
void H264DoFrameOutputCallback(H264Context* ctx, H264DecoderBackend::DecodeResult& decodeResult);
|
||||||
|
|
||||||
void _async_H264DECEnd(coreinit::OSEvent* executeDoneEvent, H264AVCDecoder* session, H264Context* ctx, std::vector<H264AVCDecoder::DecodeResult>* decodeResultsOut)
|
|
||||||
{
|
|
||||||
*decodeResultsOut = session->Flush();
|
|
||||||
coreinit::OSSignalEvent(executeDoneEvent);
|
|
||||||
}
|
|
||||||
|
|
||||||
H264DEC_STATUS H264DECEnd(void* workMemory)
|
H264DEC_STATUS H264DECEnd(void* workMemory)
|
||||||
{
|
{
|
||||||
H264Context* ctx = (H264Context*)workMemory;
|
H264Context* ctx = (H264Context*)workMemory;
|
||||||
H264AVCDecoder* session = _AcquireDecoderSession(ctx->sessionHandle);
|
H264DecoderBackend* session = _AcquireDecoderSession(ctx->sessionHandle);
|
||||||
if (!session)
|
if (!session)
|
||||||
{
|
{
|
||||||
cemuLog_log(LogType::Force, "H264DECEnd(): Invalid session");
|
cemuLog_log(LogType::Force, "H264DECEnd(): Invalid session");
|
||||||
return H264DEC_STATUS::SUCCESS;
|
return H264DEC_STATUS::SUCCESS;
|
||||||
}
|
}
|
||||||
StackAllocator<coreinit::OSEvent> executeDoneEvent;
|
coreinit::OSEvent* flushEvt = &session->GetFlushEvent();
|
||||||
coreinit::OSInitEvent(&executeDoneEvent, coreinit::OSEvent::EVENT_STATE::STATE_NOT_SIGNALED, coreinit::OSEvent::EVENT_MODE::MODE_MANUAL);
|
coreinit::OSResetEvent(flushEvt);
|
||||||
std::vector<H264AVCDecoder::DecodeResult> results;
|
session->QueueFlush();
|
||||||
auto asyncTask = std::async(std::launch::async, _async_H264DECEnd, executeDoneEvent.GetPointer(), session, ctx, &results);
|
coreinit::OSWaitEvent(flushEvt);
|
||||||
coreinit::OSWaitEvent(&executeDoneEvent);
|
while(true)
|
||||||
_ReleaseDecoderSession(session);
|
|
||||||
if (!results.empty())
|
|
||||||
{
|
{
|
||||||
for (auto& itr : results)
|
H264DecoderBackend::DecodeResult decodeResult;
|
||||||
H264DoFrameOutputCallback(ctx, itr);
|
if( !session->GetFrameOutputIfReady(decodeResult) )
|
||||||
|
break;
|
||||||
|
// todo - output all frames in a single callback?
|
||||||
|
H264DoFrameOutputCallback(ctx, decodeResult);
|
||||||
|
ctx->decoderState.numFramesInFlight--;
|
||||||
}
|
}
|
||||||
|
cemu_assert_debug(ctx->decoderState.numFramesInFlight == 0); // no frames should be in flight anymore. Exact behavior is not well understood but we may have to output dummy frames if necessary
|
||||||
|
_ReleaseDecoderSession(session);
|
||||||
return H264DEC_STATUS::SUCCESS;
|
return H264DEC_STATUS::SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -930,7 +408,6 @@ namespace H264
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
struct H264DECFrameOutput
|
struct H264DECFrameOutput
|
||||||
{
|
{
|
||||||
/* +0x00 */ uint32be result;
|
/* +0x00 */ uint32be result;
|
||||||
|
@ -967,7 +444,7 @@ namespace H264
|
||||||
|
|
||||||
static_assert(sizeof(H264OutputCBStruct) == 12);
|
static_assert(sizeof(H264OutputCBStruct) == 12);
|
||||||
|
|
||||||
void H264DoFrameOutputCallback(H264Context* ctx, H264AVCDecoder::DecodeResult& decodeResult)
|
void H264DoFrameOutputCallback(H264Context* ctx, H264DecoderBackend::DecodeResult& decodeResult)
|
||||||
{
|
{
|
||||||
sint32 outputFrameCount = 1;
|
sint32 outputFrameCount = 1;
|
||||||
|
|
||||||
|
@ -984,14 +461,14 @@ namespace H264
|
||||||
frameOutput->imagePtr = (uint8*)decodeResult.imageOutput;
|
frameOutput->imagePtr = (uint8*)decodeResult.imageOutput;
|
||||||
frameOutput->result = 100;
|
frameOutput->result = 100;
|
||||||
frameOutput->timestamp = decodeResult.timestamp;
|
frameOutput->timestamp = decodeResult.timestamp;
|
||||||
frameOutput->frameWidth = decodeResult.decodeOutput.u4_pic_wd;
|
frameOutput->frameWidth = decodeResult.frameWidth;
|
||||||
frameOutput->frameHeight = decodeResult.decodeOutput.u4_pic_ht;
|
frameOutput->frameHeight = decodeResult.frameHeight;
|
||||||
frameOutput->bytesPerRow = (decodeResult.decodeOutput.u4_pic_wd + 0xFF) & ~0xFF;
|
frameOutput->bytesPerRow = decodeResult.bytesPerRow;
|
||||||
frameOutput->cropEnable = decodeResult.decodeOutput.u1_frame_cropping_flag;
|
frameOutput->cropEnable = decodeResult.cropEnable;
|
||||||
frameOutput->cropTop = decodeResult.decodeOutput.u1_frame_cropping_rect_top_ofst;
|
frameOutput->cropTop = decodeResult.cropTop;
|
||||||
frameOutput->cropBottom = decodeResult.decodeOutput.u1_frame_cropping_rect_bottom_ofst;
|
frameOutput->cropBottom = decodeResult.cropBottom;
|
||||||
frameOutput->cropLeft = decodeResult.decodeOutput.u1_frame_cropping_rect_left_ofst;
|
frameOutput->cropLeft = decodeResult.cropLeft;
|
||||||
frameOutput->cropRight = decodeResult.decodeOutput.u1_frame_cropping_rect_right_ofst;
|
frameOutput->cropRight = decodeResult.cropRight;
|
||||||
|
|
||||||
StackAllocator<H264OutputCBStruct> stack_fptrOutputData;
|
StackAllocator<H264OutputCBStruct> stack_fptrOutputData;
|
||||||
stack_fptrOutputData->frameCount = outputFrameCount;
|
stack_fptrOutputData->frameCount = outputFrameCount;
|
||||||
|
@ -1006,29 +483,41 @@ namespace H264
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void _async_H264DECExecute(coreinit::OSEvent* executeDoneEvent, H264AVCDecoder* session, H264Context* ctx, void* imageOutput, H264AVCDecoder::DecodeResult* decodeResult)
|
|
||||||
{
|
|
||||||
session->Decode(ctx->BitStream.ptr.GetPtr(), ctx->BitStream.length, ctx->BitStream.timestamp, imageOutput, *decodeResult);
|
|
||||||
coreinit::OSSignalEvent(executeDoneEvent);
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32 H264DECExecute(void* workMemory, void* imageOutput)
|
uint32 H264DECExecute(void* workMemory, void* imageOutput)
|
||||||
{
|
{
|
||||||
|
BenchmarkTimer bt;
|
||||||
|
bt.Start();
|
||||||
H264Context* ctx = (H264Context*)workMemory;
|
H264Context* ctx = (H264Context*)workMemory;
|
||||||
H264AVCDecoder* session = _AcquireDecoderSession(ctx->sessionHandle);
|
H264DecoderBackend* session = _AcquireDecoderSession(ctx->sessionHandle);
|
||||||
if (!session)
|
if (!session)
|
||||||
{
|
{
|
||||||
cemuLog_log(LogType::Force, "H264DECExecute(): Invalid session");
|
cemuLog_log(LogType::Force, "H264DECExecute(): Invalid session");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
StackAllocator<coreinit::OSEvent> executeDoneEvent;
|
// feed data to backend
|
||||||
coreinit::OSInitEvent(&executeDoneEvent, coreinit::OSEvent::EVENT_STATE::STATE_NOT_SIGNALED, coreinit::OSEvent::EVENT_MODE::MODE_MANUAL);
|
session->QueueForDecode((uint8*)ctx->BitStream.ptr.GetPtr(), ctx->BitStream.length, ctx->BitStream.timestamp, imageOutput);
|
||||||
H264AVCDecoder::DecodeResult decodeResult;
|
ctx->decoderState.numFramesInFlight++;
|
||||||
auto asyncTask = std::async(std::launch::async, _async_H264DECExecute, &executeDoneEvent, session, ctx, imageOutput , &decodeResult);
|
// H264DECExecute is synchronous and will return a frame after either every call (non-buffered) or after 6 calls (buffered)
|
||||||
coreinit::OSWaitEvent(&executeDoneEvent);
|
// normally frame decoding happens only during H264DECExecute, but in order to hide the latency of our CPU decoder we will decode asynchronously in buffered mode
|
||||||
|
uint32 numFramesToBuffer = (ctx->Param.outputPerFrame == 0) ? 5 : 0;
|
||||||
|
if(ctx->decoderState.numFramesInFlight > numFramesToBuffer)
|
||||||
|
{
|
||||||
|
ctx->decoderState.numFramesInFlight--;
|
||||||
|
while(true)
|
||||||
|
{
|
||||||
|
coreinit::OSEvent& evt = session->GetFrameOutputEvent();
|
||||||
|
coreinit::OSWaitEvent(&evt);
|
||||||
|
H264DecoderBackend::DecodeResult decodeResult;
|
||||||
|
if( !session->GetFrameOutputIfReady(decodeResult) )
|
||||||
|
continue;
|
||||||
|
H264DoFrameOutputCallback(ctx, decodeResult);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
_ReleaseDecoderSession(session);
|
_ReleaseDecoderSession(session);
|
||||||
if(decodeResult.frameReady)
|
bt.Stop();
|
||||||
H264DoFrameOutputCallback(ctx, decodeResult);
|
double callTime = bt.GetElapsedMilliseconds();
|
||||||
|
cemuLog_log(LogType::H264, "H264Bench | H264DECExecute took {}ms", callTime);
|
||||||
return 0x80 | 100;
|
return 0x80 | 100;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,502 @@
|
||||||
|
#include "H264DecInternal.h"
|
||||||
|
#include "util/highresolutiontimer/HighResolutionTimer.h"
|
||||||
|
|
||||||
|
extern "C"
|
||||||
|
{
|
||||||
|
#include "../dependencies/ih264d/common/ih264_typedefs.h"
|
||||||
|
#include "../dependencies/ih264d/decoder/ih264d.h"
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace H264
|
||||||
|
{
|
||||||
|
bool H264_IsBotW();
|
||||||
|
|
||||||
|
class H264AVCDecoder : public H264DecoderBackend
|
||||||
|
{
|
||||||
|
static void* ivd_aligned_malloc(void* ctxt, WORD32 alignment, WORD32 size)
|
||||||
|
{
|
||||||
|
#ifdef _WIN32
|
||||||
|
return _aligned_malloc(size, alignment);
|
||||||
|
#else
|
||||||
|
// alignment is atleast sizeof(void*)
|
||||||
|
alignment = std::max<WORD32>(alignment, sizeof(void*));
|
||||||
|
|
||||||
|
//smallest multiple of 2 at least as large as alignment
|
||||||
|
alignment--;
|
||||||
|
alignment |= alignment << 1;
|
||||||
|
alignment |= alignment >> 1;
|
||||||
|
alignment |= alignment >> 2;
|
||||||
|
alignment |= alignment >> 4;
|
||||||
|
alignment |= alignment >> 8;
|
||||||
|
alignment |= alignment >> 16;
|
||||||
|
alignment ^= (alignment >> 1);
|
||||||
|
|
||||||
|
void* temp;
|
||||||
|
posix_memalign(&temp, (size_t)alignment, (size_t)size);
|
||||||
|
return temp;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ivd_aligned_free(void* ctxt, void* buf)
|
||||||
|
{
|
||||||
|
#ifdef _WIN32
|
||||||
|
_aligned_free(buf);
|
||||||
|
#else
|
||||||
|
free(buf);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
H264AVCDecoder()
|
||||||
|
{
|
||||||
|
m_decoderThread = std::thread(&H264AVCDecoder::DecoderThread, this);
|
||||||
|
}
|
||||||
|
|
||||||
|
~H264AVCDecoder()
|
||||||
|
{
|
||||||
|
m_threadShouldExit = true;
|
||||||
|
m_decodeSem.increment();
|
||||||
|
if (m_decoderThread.joinable())
|
||||||
|
m_decoderThread.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Init(bool isBufferedMode)
|
||||||
|
{
|
||||||
|
ih264d_create_ip_t s_create_ip{ 0 };
|
||||||
|
ih264d_create_op_t s_create_op{ 0 };
|
||||||
|
|
||||||
|
s_create_ip.s_ivd_create_ip_t.u4_size = sizeof(ih264d_create_ip_t);
|
||||||
|
s_create_ip.s_ivd_create_ip_t.e_cmd = IVD_CMD_CREATE;
|
||||||
|
s_create_ip.s_ivd_create_ip_t.u4_share_disp_buf = 1; // shared display buffer mode -> We give the decoder a list of buffers that it will use (?)
|
||||||
|
|
||||||
|
s_create_op.s_ivd_create_op_t.u4_size = sizeof(ih264d_create_op_t);
|
||||||
|
s_create_ip.s_ivd_create_ip_t.e_output_format = IV_YUV_420SP_UV;
|
||||||
|
s_create_ip.s_ivd_create_ip_t.pf_aligned_alloc = ivd_aligned_malloc;
|
||||||
|
s_create_ip.s_ivd_create_ip_t.pf_aligned_free = ivd_aligned_free;
|
||||||
|
s_create_ip.s_ivd_create_ip_t.pv_mem_ctxt = NULL;
|
||||||
|
|
||||||
|
WORD32 status = ih264d_api_function(m_codecCtx, &s_create_ip, &s_create_op);
|
||||||
|
cemu_assert(!status);
|
||||||
|
|
||||||
|
m_codecCtx = (iv_obj_t*)s_create_op.s_ivd_create_op_t.pv_handle;
|
||||||
|
m_codecCtx->pv_fxns = (void*)&ih264d_api_function;
|
||||||
|
m_codecCtx->u4_size = sizeof(iv_obj_t);
|
||||||
|
|
||||||
|
SetDecoderCoreCount(1);
|
||||||
|
|
||||||
|
m_isBufferedMode = isBufferedMode;
|
||||||
|
|
||||||
|
UpdateParameters(false);
|
||||||
|
|
||||||
|
m_numDecodedFrames = 0;
|
||||||
|
m_hasBufferSizeInfo = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Destroy()
|
||||||
|
{
|
||||||
|
if (!m_codecCtx)
|
||||||
|
return;
|
||||||
|
ih264d_delete_ip_t s_delete_ip{ 0 };
|
||||||
|
ih264d_delete_op_t s_delete_op{ 0 };
|
||||||
|
s_delete_ip.s_ivd_delete_ip_t.u4_size = sizeof(ih264d_delete_ip_t);
|
||||||
|
s_delete_ip.s_ivd_delete_ip_t.e_cmd = IVD_CMD_DELETE;
|
||||||
|
s_delete_op.s_ivd_delete_op_t.u4_size = sizeof(ih264d_delete_op_t);
|
||||||
|
WORD32 status = ih264d_api_function(m_codecCtx, &s_delete_ip, &s_delete_op);
|
||||||
|
cemu_assert_debug(!status);
|
||||||
|
m_codecCtx = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PushDecodedFrame(ivd_video_decode_op_t& s_dec_op)
|
||||||
|
{
|
||||||
|
// copy image data outside of lock since its an expensive operation
|
||||||
|
CopyImageToResultBuffer((uint8*)s_dec_op.s_disp_frm_buf.pv_y_buf, (uint8*)s_dec_op.s_disp_frm_buf.pv_u_buf, (uint8*)m_decodedSliceArray[s_dec_op.u4_ts].result.imageOutput, s_dec_op);
|
||||||
|
|
||||||
|
std::unique_lock _l(m_decodeQueueMtx);
|
||||||
|
cemu_assert(s_dec_op.u4_ts < m_decodedSliceArray.size());
|
||||||
|
auto& result = m_decodedSliceArray[s_dec_op.u4_ts];
|
||||||
|
cemu_assert_debug(result.isUsed);
|
||||||
|
cemu_assert_debug(s_dec_op.u4_output_present != 0);
|
||||||
|
|
||||||
|
result.result.isDecoded = true;
|
||||||
|
result.result.hasFrame = s_dec_op.u4_output_present != 0;
|
||||||
|
result.result.frameWidth = s_dec_op.u4_pic_wd;
|
||||||
|
result.result.frameHeight = s_dec_op.u4_pic_ht;
|
||||||
|
result.result.bytesPerRow = (s_dec_op.u4_pic_wd + 0xFF) & ~0xFF;
|
||||||
|
result.result.cropEnable = s_dec_op.u1_frame_cropping_flag;
|
||||||
|
result.result.cropTop = s_dec_op.u1_frame_cropping_rect_top_ofst;
|
||||||
|
result.result.cropBottom = s_dec_op.u1_frame_cropping_rect_bottom_ofst;
|
||||||
|
result.result.cropLeft = s_dec_op.u1_frame_cropping_rect_left_ofst;
|
||||||
|
result.result.cropRight = s_dec_op.u1_frame_cropping_rect_right_ofst;
|
||||||
|
|
||||||
|
m_displayQueue.push_back(s_dec_op.u4_ts);
|
||||||
|
|
||||||
|
_l.unlock();
|
||||||
|
coreinit::OSSignalEvent(m_displayQueueEvt);
|
||||||
|
}
|
||||||
|
|
||||||
|
// called from async worker thread
|
||||||
|
void Decode(DecodedSlice& decodedSlice)
|
||||||
|
{
|
||||||
|
if (!m_hasBufferSizeInfo)
|
||||||
|
{
|
||||||
|
uint32 numByteConsumed = 0;
|
||||||
|
if (!DetermineBufferSizes(decodedSlice.dataToDecode.m_data, decodedSlice.dataToDecode.m_length, numByteConsumed))
|
||||||
|
{
|
||||||
|
cemuLog_log(LogType::Force, "H264AVC: Unable to determine picture size. Ignoring decode input");
|
||||||
|
std::unique_lock _l(m_decodeQueueMtx);
|
||||||
|
decodedSlice.result.isDecoded = true;
|
||||||
|
decodedSlice.result.hasFrame = false;
|
||||||
|
coreinit::OSSignalEvent(m_displayQueueEvt);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
decodedSlice.dataToDecode.m_length -= numByteConsumed;
|
||||||
|
decodedSlice.dataToDecode.m_data = (uint8*)decodedSlice.dataToDecode.m_data + numByteConsumed;
|
||||||
|
m_hasBufferSizeInfo = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
ivd_video_decode_ip_t s_dec_ip{ 0 };
|
||||||
|
ivd_video_decode_op_t s_dec_op{ 0 };
|
||||||
|
s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t);
|
||||||
|
s_dec_op.u4_size = sizeof(ivd_video_decode_op_t);
|
||||||
|
|
||||||
|
s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE;
|
||||||
|
|
||||||
|
s_dec_ip.u4_ts = std::distance(m_decodedSliceArray.data(), &decodedSlice);
|
||||||
|
cemu_assert_debug(s_dec_ip.u4_ts < m_decodedSliceArray.size());
|
||||||
|
|
||||||
|
s_dec_ip.pv_stream_buffer = (uint8*)decodedSlice.dataToDecode.m_data;
|
||||||
|
s_dec_ip.u4_num_Bytes = decodedSlice.dataToDecode.m_length;
|
||||||
|
|
||||||
|
s_dec_ip.s_out_buffer.u4_min_out_buf_size[0] = 0;
|
||||||
|
s_dec_ip.s_out_buffer.u4_min_out_buf_size[1] = 0;
|
||||||
|
s_dec_ip.s_out_buffer.u4_num_bufs = 0;
|
||||||
|
|
||||||
|
BenchmarkTimer bt;
|
||||||
|
bt.Start();
|
||||||
|
WORD32 status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op);
|
||||||
|
if (status != 0 && (s_dec_op.u4_error_code&0xFF) == IVD_RES_CHANGED)
|
||||||
|
{
|
||||||
|
// resolution change
|
||||||
|
ResetDecoder();
|
||||||
|
m_hasBufferSizeInfo = false;
|
||||||
|
Decode(decodedSlice);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
else if (status != 0)
|
||||||
|
{
|
||||||
|
cemuLog_log(LogType::Force, "H264: Failed to decode frame (error 0x{:08x})", status);
|
||||||
|
decodedSlice.result.hasFrame = false;
|
||||||
|
cemu_assert_unimplemented();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
bt.Stop();
|
||||||
|
double decodeTime = bt.GetElapsedMilliseconds();
|
||||||
|
|
||||||
|
cemu_assert(s_dec_op.u4_frame_decoded_flag);
|
||||||
|
cemu_assert_debug(s_dec_op.u4_num_bytes_consumed == decodedSlice.dataToDecode.m_length);
|
||||||
|
|
||||||
|
cemu_assert_debug(m_isBufferedMode || s_dec_op.u4_output_present); // if buffered mode is disabled, then every input should output a frame (except for partial slices?)
|
||||||
|
|
||||||
|
if (s_dec_op.u4_output_present)
|
||||||
|
{
|
||||||
|
cemu_assert(s_dec_op.e_output_format == IV_YUV_420SP_UV);
|
||||||
|
if (H264_IsBotW())
|
||||||
|
{
|
||||||
|
if (s_dec_op.s_disp_frm_buf.u4_y_wd == 1920 && s_dec_op.s_disp_frm_buf.u4_y_ht == 1088)
|
||||||
|
s_dec_op.s_disp_frm_buf.u4_y_ht = 1080;
|
||||||
|
}
|
||||||
|
bt.Start();
|
||||||
|
PushDecodedFrame(s_dec_op);
|
||||||
|
bt.Stop();
|
||||||
|
double copyTime = bt.GetElapsedMilliseconds();
|
||||||
|
// release buffer
|
||||||
|
sint32 bufferId = -1;
|
||||||
|
for (size_t i = 0; i < m_displayBuf.size(); i++)
|
||||||
|
{
|
||||||
|
if (s_dec_op.s_disp_frm_buf.pv_y_buf >= m_displayBuf[i].data() && s_dec_op.s_disp_frm_buf.pv_y_buf < (m_displayBuf[i].data() + m_displayBuf[i].size()))
|
||||||
|
{
|
||||||
|
bufferId = (sint32)i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cemu_assert_debug(bufferId == s_dec_op.u4_disp_buf_id);
|
||||||
|
cemu_assert(bufferId >= 0);
|
||||||
|
ivd_rel_display_frame_ip_t s_video_rel_disp_ip{ 0 };
|
||||||
|
ivd_rel_display_frame_op_t s_video_rel_disp_op{ 0 };
|
||||||
|
s_video_rel_disp_ip.e_cmd = IVD_CMD_REL_DISPLAY_FRAME;
|
||||||
|
s_video_rel_disp_ip.u4_size = sizeof(ivd_rel_display_frame_ip_t);
|
||||||
|
s_video_rel_disp_op.u4_size = sizeof(ivd_rel_display_frame_op_t);
|
||||||
|
s_video_rel_disp_ip.u4_disp_buf_id = bufferId;
|
||||||
|
status = ih264d_api_function(m_codecCtx, &s_video_rel_disp_ip, &s_video_rel_disp_op);
|
||||||
|
cemu_assert(!status);
|
||||||
|
|
||||||
|
cemuLog_log(LogType::H264, "H264Bench | DecodeTime {}ms CopyTime {}ms", decodeTime, copyTime);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cemuLog_log(LogType::H264, "H264Bench | DecodeTime {}ms (no frame output)", decodeTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (s_dec_op.u4_frame_decoded_flag)
|
||||||
|
m_numDecodedFrames++;
|
||||||
|
// get VUI
|
||||||
|
//ih264d_ctl_get_vui_params_ip_t s_ctl_get_vui_params_ip;
|
||||||
|
//ih264d_ctl_get_vui_params_op_t s_ctl_get_vui_params_op;
|
||||||
|
|
||||||
|
//s_ctl_get_vui_params_ip.e_cmd = IVD_CMD_VIDEO_CTL;
|
||||||
|
//s_ctl_get_vui_params_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IH264D_CMD_CTL_GET_VUI_PARAMS;
|
||||||
|
//s_ctl_get_vui_params_ip.u4_size = sizeof(ih264d_ctl_get_vui_params_ip_t);
|
||||||
|
//s_ctl_get_vui_params_op.u4_size = sizeof(ih264d_ctl_get_vui_params_op_t);
|
||||||
|
|
||||||
|
//status = ih264d_api_function(mCodecCtx, &s_ctl_get_vui_params_ip, &s_ctl_get_vui_params_op);
|
||||||
|
//cemu_assert(status == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Flush()
|
||||||
|
{
|
||||||
|
// set flush mode
|
||||||
|
ivd_ctl_flush_ip_t s_video_flush_ip{ 0 };
|
||||||
|
ivd_ctl_flush_op_t s_video_flush_op{ 0 };
|
||||||
|
s_video_flush_ip.e_cmd = IVD_CMD_VIDEO_CTL;
|
||||||
|
s_video_flush_ip.e_sub_cmd = IVD_CMD_CTL_FLUSH;
|
||||||
|
s_video_flush_ip.u4_size = sizeof(ivd_ctl_flush_ip_t);
|
||||||
|
s_video_flush_op.u4_size = sizeof(ivd_ctl_flush_op_t);
|
||||||
|
WORD32 status = ih264d_api_function(m_codecCtx, &s_video_flush_ip, &s_video_flush_op);
|
||||||
|
if (status != 0)
|
||||||
|
cemuLog_log(LogType::Force, "H264Dec: Unexpected error during flush ({})", status);
|
||||||
|
// get all frames from the decoder
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
ivd_video_decode_ip_t s_dec_ip{ 0 };
|
||||||
|
ivd_video_decode_op_t s_dec_op{ 0 };
|
||||||
|
s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t);
|
||||||
|
s_dec_op.u4_size = sizeof(ivd_video_decode_op_t);
|
||||||
|
s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE;
|
||||||
|
s_dec_ip.pv_stream_buffer = NULL;
|
||||||
|
s_dec_ip.u4_num_Bytes = 0;
|
||||||
|
s_dec_ip.s_out_buffer.u4_min_out_buf_size[0] = 0;
|
||||||
|
s_dec_ip.s_out_buffer.u4_min_out_buf_size[1] = 0;
|
||||||
|
s_dec_ip.s_out_buffer.u4_num_bufs = 0;
|
||||||
|
status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op);
|
||||||
|
if (status != 0)
|
||||||
|
break;
|
||||||
|
cemu_assert_debug(s_dec_op.u4_output_present != 0); // should never be false?
|
||||||
|
if(s_dec_op.u4_output_present == 0)
|
||||||
|
continue;
|
||||||
|
if (H264_IsBotW())
|
||||||
|
{
|
||||||
|
if (s_dec_op.s_disp_frm_buf.u4_y_wd == 1920 && s_dec_op.s_disp_frm_buf.u4_y_ht == 1088)
|
||||||
|
s_dec_op.s_disp_frm_buf.u4_y_ht = 1080;
|
||||||
|
}
|
||||||
|
PushDecodedFrame(s_dec_op);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void CopyImageToResultBuffer(uint8* yIn, uint8* uvIn, uint8* bufOut, ivd_video_decode_op_t& decodeInfo)
|
||||||
|
{
|
||||||
|
uint32 imageWidth = decodeInfo.s_disp_frm_buf.u4_y_wd;
|
||||||
|
uint32 imageHeight = decodeInfo.s_disp_frm_buf.u4_y_ht;
|
||||||
|
|
||||||
|
size_t inputStride = decodeInfo.s_disp_frm_buf.u4_y_strd;
|
||||||
|
size_t outputStride = (imageWidth + 0xFF) & ~0xFF;
|
||||||
|
|
||||||
|
// copy Y
|
||||||
|
uint8* yOut = bufOut;
|
||||||
|
for (uint32 row = 0; row < imageHeight; row++)
|
||||||
|
{
|
||||||
|
memcpy(yOut, yIn, imageWidth);
|
||||||
|
yIn += inputStride;
|
||||||
|
yOut += outputStride;
|
||||||
|
}
|
||||||
|
|
||||||
|
// copy UV
|
||||||
|
uint8* uvOut = bufOut + outputStride * imageHeight;
|
||||||
|
for (uint32 row = 0; row < imageHeight/2; row++)
|
||||||
|
{
|
||||||
|
memcpy(uvOut, uvIn, imageWidth);
|
||||||
|
uvIn += inputStride;
|
||||||
|
uvOut += outputStride;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
void SetDecoderCoreCount(uint32 coreCount)
|
||||||
|
{
|
||||||
|
ih264d_ctl_set_num_cores_ip_t s_set_cores_ip;
|
||||||
|
ih264d_ctl_set_num_cores_op_t s_set_cores_op;
|
||||||
|
s_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL;
|
||||||
|
s_set_cores_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IH264D_CMD_CTL_SET_NUM_CORES;
|
||||||
|
s_set_cores_ip.u4_num_cores = coreCount; // valid numbers are 1-4
|
||||||
|
s_set_cores_ip.u4_size = sizeof(ih264d_ctl_set_num_cores_ip_t);
|
||||||
|
s_set_cores_op.u4_size = sizeof(ih264d_ctl_set_num_cores_op_t);
|
||||||
|
IV_API_CALL_STATUS_T status = ih264d_api_function(m_codecCtx, (void *)&s_set_cores_ip, (void *)&s_set_cores_op);
|
||||||
|
cemu_assert(status == IV_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DetermineBufferSizes(void* data, uint32 length, uint32& numByteConsumed)
|
||||||
|
{
|
||||||
|
numByteConsumed = 0;
|
||||||
|
UpdateParameters(true);
|
||||||
|
|
||||||
|
ivd_video_decode_ip_t s_dec_ip{ 0 };
|
||||||
|
ivd_video_decode_op_t s_dec_op{ 0 };
|
||||||
|
s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t);
|
||||||
|
s_dec_op.u4_size = sizeof(ivd_video_decode_op_t);
|
||||||
|
|
||||||
|
s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE;
|
||||||
|
s_dec_ip.pv_stream_buffer = (uint8*)data;
|
||||||
|
s_dec_ip.u4_num_Bytes = length;
|
||||||
|
s_dec_ip.s_out_buffer.u4_num_bufs = 0;
|
||||||
|
WORD32 status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op);
|
||||||
|
if (status != 0)
|
||||||
|
{
|
||||||
|
cemuLog_log(LogType::Force, "H264: Unable to determine buffer sizes for stream");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
numByteConsumed = s_dec_op.u4_num_bytes_consumed;
|
||||||
|
cemu_assert(status == 0);
|
||||||
|
if (s_dec_op.u4_pic_wd == 0 || s_dec_op.u4_pic_ht == 0)
|
||||||
|
return false;
|
||||||
|
UpdateParameters(false);
|
||||||
|
ReinitBuffers();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ReinitBuffers()
|
||||||
|
{
|
||||||
|
ivd_ctl_getbufinfo_ip_t s_ctl_ip{ 0 };
|
||||||
|
ivd_ctl_getbufinfo_op_t s_ctl_op{ 0 };
|
||||||
|
WORD32 outlen = 0;
|
||||||
|
|
||||||
|
s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
|
||||||
|
s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_GETBUFINFO;
|
||||||
|
s_ctl_ip.u4_size = sizeof(ivd_ctl_getbufinfo_ip_t);
|
||||||
|
s_ctl_op.u4_size = sizeof(ivd_ctl_getbufinfo_op_t);
|
||||||
|
|
||||||
|
WORD32 status = ih264d_api_function(m_codecCtx, &s_ctl_ip, &s_ctl_op);
|
||||||
|
cemu_assert(!status);
|
||||||
|
|
||||||
|
// allocate
|
||||||
|
for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++)
|
||||||
|
{
|
||||||
|
m_displayBuf.emplace_back().resize(s_ctl_op.u4_min_out_buf_size[0] + s_ctl_op.u4_min_out_buf_size[1]);
|
||||||
|
}
|
||||||
|
// set
|
||||||
|
ivd_set_display_frame_ip_t s_set_display_frame_ip{ 0 }; // make sure to zero-initialize this. The codec seems to check the first 3 pointers/sizes per frame, regardless of the value of u4_num_bufs
|
||||||
|
ivd_set_display_frame_op_t s_set_display_frame_op{ 0 };
|
||||||
|
|
||||||
|
s_set_display_frame_ip.e_cmd = IVD_CMD_SET_DISPLAY_FRAME;
|
||||||
|
s_set_display_frame_ip.u4_size = sizeof(ivd_set_display_frame_ip_t);
|
||||||
|
s_set_display_frame_op.u4_size = sizeof(ivd_set_display_frame_op_t);
|
||||||
|
|
||||||
|
cemu_assert_debug(s_ctl_op.u4_min_num_out_bufs == 2);
|
||||||
|
cemu_assert_debug(s_ctl_op.u4_min_out_buf_size[0] != 0 && s_ctl_op.u4_min_out_buf_size[1] != 0);
|
||||||
|
|
||||||
|
s_set_display_frame_ip.num_disp_bufs = s_ctl_op.u4_num_disp_bufs;
|
||||||
|
|
||||||
|
for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++)
|
||||||
|
{
|
||||||
|
s_set_display_frame_ip.s_disp_buffer[i].u4_num_bufs = 2;
|
||||||
|
s_set_display_frame_ip.s_disp_buffer[i].u4_min_out_buf_size[0] = s_ctl_op.u4_min_out_buf_size[0];
|
||||||
|
s_set_display_frame_ip.s_disp_buffer[i].u4_min_out_buf_size[1] = s_ctl_op.u4_min_out_buf_size[1];
|
||||||
|
s_set_display_frame_ip.s_disp_buffer[i].pu1_bufs[0] = m_displayBuf[i].data() + 0;
|
||||||
|
s_set_display_frame_ip.s_disp_buffer[i].pu1_bufs[1] = m_displayBuf[i].data() + s_ctl_op.u4_min_out_buf_size[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
status = ih264d_api_function(m_codecCtx, &s_set_display_frame_ip, &s_set_display_frame_op);
|
||||||
|
cemu_assert(!status);
|
||||||
|
|
||||||
|
|
||||||
|
// mark all as released (available)
|
||||||
|
for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++)
|
||||||
|
{
|
||||||
|
ivd_rel_display_frame_ip_t s_video_rel_disp_ip{ 0 };
|
||||||
|
ivd_rel_display_frame_op_t s_video_rel_disp_op{ 0 };
|
||||||
|
|
||||||
|
s_video_rel_disp_ip.e_cmd = IVD_CMD_REL_DISPLAY_FRAME;
|
||||||
|
s_video_rel_disp_ip.u4_size = sizeof(ivd_rel_display_frame_ip_t);
|
||||||
|
s_video_rel_disp_op.u4_size = sizeof(ivd_rel_display_frame_op_t);
|
||||||
|
s_video_rel_disp_ip.u4_disp_buf_id = i;
|
||||||
|
|
||||||
|
status = ih264d_api_function(m_codecCtx, &s_video_rel_disp_ip, &s_video_rel_disp_op);
|
||||||
|
cemu_assert(!status);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ResetDecoder()
|
||||||
|
{
|
||||||
|
ivd_ctl_reset_ip_t s_ctl_ip;
|
||||||
|
ivd_ctl_reset_op_t s_ctl_op;
|
||||||
|
|
||||||
|
s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
|
||||||
|
s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_RESET;
|
||||||
|
s_ctl_ip.u4_size = sizeof(ivd_ctl_reset_ip_t);
|
||||||
|
s_ctl_op.u4_size = sizeof(ivd_ctl_reset_op_t);
|
||||||
|
|
||||||
|
WORD32 status = ih264d_api_function(m_codecCtx, (void*)&s_ctl_ip, (void*)&s_ctl_op);
|
||||||
|
cemu_assert_debug(status == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void UpdateParameters(bool headerDecodeOnly)
|
||||||
|
{
|
||||||
|
ih264d_ctl_set_config_ip_t s_h264d_ctl_ip{ 0 };
|
||||||
|
ih264d_ctl_set_config_op_t s_h264d_ctl_op{ 0 };
|
||||||
|
ivd_ctl_set_config_ip_t* ps_ctl_ip = &s_h264d_ctl_ip.s_ivd_ctl_set_config_ip_t;
|
||||||
|
ivd_ctl_set_config_op_t* ps_ctl_op = &s_h264d_ctl_op.s_ivd_ctl_set_config_op_t;
|
||||||
|
|
||||||
|
ps_ctl_ip->u4_disp_wd = 0;
|
||||||
|
ps_ctl_ip->e_frm_skip_mode = IVD_SKIP_NONE;
|
||||||
|
ps_ctl_ip->e_frm_out_mode = m_isBufferedMode ? IVD_DISPLAY_FRAME_OUT : IVD_DECODE_FRAME_OUT;
|
||||||
|
ps_ctl_ip->e_vid_dec_mode = headerDecodeOnly ? IVD_DECODE_HEADER : IVD_DECODE_FRAME;
|
||||||
|
ps_ctl_ip->e_cmd = IVD_CMD_VIDEO_CTL;
|
||||||
|
ps_ctl_ip->e_sub_cmd = IVD_CMD_CTL_SETPARAMS;
|
||||||
|
ps_ctl_ip->u4_size = sizeof(ih264d_ctl_set_config_ip_t);
|
||||||
|
ps_ctl_op->u4_size = sizeof(ih264d_ctl_set_config_op_t);
|
||||||
|
|
||||||
|
WORD32 status = ih264d_api_function(m_codecCtx, &s_h264d_ctl_ip, &s_h264d_ctl_op);
|
||||||
|
cemu_assert(status == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void DecoderThread()
|
||||||
|
{
|
||||||
|
while(!m_threadShouldExit)
|
||||||
|
{
|
||||||
|
m_decodeSem.decrementWithWait();
|
||||||
|
std::unique_lock _l(m_decodeQueueMtx);
|
||||||
|
if (m_decodeQueue.empty())
|
||||||
|
continue;
|
||||||
|
uint32 decodeIndex = m_decodeQueue.front();
|
||||||
|
m_decodeQueue.erase(m_decodeQueue.begin());
|
||||||
|
_l.unlock();
|
||||||
|
if(decodeIndex == CMD_FLUSH)
|
||||||
|
{
|
||||||
|
Flush();
|
||||||
|
_l.lock();
|
||||||
|
cemu_assert_debug(m_decodeQueue.empty()); // after flushing the queue should be empty since the sender is waiting for the flush to complete
|
||||||
|
_l.unlock();
|
||||||
|
coreinit::OSSignalEvent(m_flushEvt);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto& decodedSlice = m_decodedSliceArray[decodeIndex];
|
||||||
|
Decode(decodedSlice);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
iv_obj_t* m_codecCtx{nullptr};
|
||||||
|
bool m_hasBufferSizeInfo{ false };
|
||||||
|
bool m_isBufferedMode{ false };
|
||||||
|
uint32 m_numDecodedFrames{0};
|
||||||
|
std::vector<std::vector<uint8>> m_displayBuf;
|
||||||
|
|
||||||
|
std::thread m_decoderThread;
|
||||||
|
std::atomic_bool m_threadShouldExit{false};
|
||||||
|
};
|
||||||
|
|
||||||
|
H264DecoderBackend* CreateAVCDecoder()
|
||||||
|
{
|
||||||
|
return new H264AVCDecoder();
|
||||||
|
}
|
||||||
|
};
|
|
@ -0,0 +1,139 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "util/helpers/Semaphore.h"
|
||||||
|
#include "Cafe/OS/libs/coreinit/coreinit_Thread.h"
|
||||||
|
#include "Cafe/OS/libs/coreinit/coreinit_SysHeap.h"
|
||||||
|
|
||||||
|
#include "Cafe/OS/libs/h264_avc/parser/H264Parser.h"
|
||||||
|
|
||||||
|
namespace H264
|
||||||
|
{
|
||||||
|
class H264DecoderBackend
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
struct DataToDecode
|
||||||
|
{
|
||||||
|
uint8* m_data;
|
||||||
|
uint32 m_length;
|
||||||
|
std::vector<uint8> m_buffer;
|
||||||
|
};
|
||||||
|
|
||||||
|
static constexpr uint32 CMD_FLUSH = 0xFFFFFFFF;
|
||||||
|
|
||||||
|
public:
|
||||||
|
struct DecodeResult
|
||||||
|
{
|
||||||
|
bool isDecoded{false};
|
||||||
|
bool hasFrame{false}; // set to true if a full frame was successfully decoded
|
||||||
|
double timestamp{};
|
||||||
|
void* imageOutput{nullptr};
|
||||||
|
sint32 frameWidth{0};
|
||||||
|
sint32 frameHeight{0};
|
||||||
|
uint32 bytesPerRow{0};
|
||||||
|
bool cropEnable{false};
|
||||||
|
sint32 cropTop{0};
|
||||||
|
sint32 cropBottom{0};
|
||||||
|
sint32 cropLeft{0};
|
||||||
|
sint32 cropRight{0};
|
||||||
|
};
|
||||||
|
|
||||||
|
struct DecodedSlice
|
||||||
|
{
|
||||||
|
bool isUsed{false};
|
||||||
|
DecodeResult result;
|
||||||
|
DataToDecode dataToDecode;
|
||||||
|
};
|
||||||
|
|
||||||
|
H264DecoderBackend()
|
||||||
|
{
|
||||||
|
m_displayQueueEvt = (coreinit::OSEvent*)coreinit::OSAllocFromSystem(sizeof(coreinit::OSEvent), 4);
|
||||||
|
coreinit::OSInitEvent(m_displayQueueEvt, coreinit::OSEvent::EVENT_STATE::STATE_NOT_SIGNALED, coreinit::OSEvent::EVENT_MODE::MODE_AUTO);
|
||||||
|
m_flushEvt = (coreinit::OSEvent*)coreinit::OSAllocFromSystem(sizeof(coreinit::OSEvent), 4);
|
||||||
|
coreinit::OSInitEvent(m_flushEvt, coreinit::OSEvent::EVENT_STATE::STATE_NOT_SIGNALED, coreinit::OSEvent::EVENT_MODE::MODE_AUTO);
|
||||||
|
};
|
||||||
|
|
||||||
|
virtual ~H264DecoderBackend()
|
||||||
|
{
|
||||||
|
coreinit::OSFreeToSystem(m_displayQueueEvt);
|
||||||
|
coreinit::OSFreeToSystem(m_flushEvt);
|
||||||
|
};
|
||||||
|
|
||||||
|
virtual void Init(bool isBufferedMode) = 0;
|
||||||
|
virtual void Destroy() = 0;
|
||||||
|
|
||||||
|
void QueueForDecode(uint8* data, uint32 length, double timestamp, void* imagePtr)
|
||||||
|
{
|
||||||
|
std::unique_lock _l(m_decodeQueueMtx);
|
||||||
|
|
||||||
|
DecodedSlice& ds = GetFreeDecodedSliceEntry();
|
||||||
|
|
||||||
|
ds.dataToDecode.m_buffer.assign(data, data + length);
|
||||||
|
ds.dataToDecode.m_data = ds.dataToDecode.m_buffer.data();
|
||||||
|
ds.dataToDecode.m_length = length;
|
||||||
|
|
||||||
|
ds.result.isDecoded = false;
|
||||||
|
ds.result.imageOutput = imagePtr;
|
||||||
|
ds.result.timestamp = timestamp;
|
||||||
|
|
||||||
|
m_decodeQueue.push_back(std::distance(m_decodedSliceArray.data(), &ds));
|
||||||
|
m_decodeSem.increment();
|
||||||
|
}
|
||||||
|
|
||||||
|
void QueueFlush()
|
||||||
|
{
|
||||||
|
std::unique_lock _l(m_decodeQueueMtx);
|
||||||
|
m_decodeQueue.push_back(CMD_FLUSH);
|
||||||
|
m_decodeSem.increment();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool GetFrameOutputIfReady(DecodeResult& result)
|
||||||
|
{
|
||||||
|
std::unique_lock _l(m_decodeQueueMtx);
|
||||||
|
if(m_displayQueue.empty())
|
||||||
|
return false;
|
||||||
|
uint32 sliceIndex = m_displayQueue.front();
|
||||||
|
DecodedSlice& ds = m_decodedSliceArray[sliceIndex];
|
||||||
|
cemu_assert_debug(ds.result.isDecoded);
|
||||||
|
std::swap(result, ds.result);
|
||||||
|
ds.isUsed = false;
|
||||||
|
m_displayQueue.erase(m_displayQueue.begin());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
coreinit::OSEvent& GetFrameOutputEvent()
|
||||||
|
{
|
||||||
|
return *m_displayQueueEvt;
|
||||||
|
}
|
||||||
|
|
||||||
|
coreinit::OSEvent& GetFlushEvent()
|
||||||
|
{
|
||||||
|
return *m_flushEvt;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
DecodedSlice& GetFreeDecodedSliceEntry()
|
||||||
|
{
|
||||||
|
for (auto& slice : m_decodedSliceArray)
|
||||||
|
{
|
||||||
|
if (!slice.isUsed)
|
||||||
|
{
|
||||||
|
slice.isUsed = true;
|
||||||
|
return slice;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cemu_assert_suspicious();
|
||||||
|
return m_decodedSliceArray[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
std::mutex m_decodeQueueMtx;
|
||||||
|
std::vector<uint32> m_decodeQueue; // indices into m_decodedSliceArray, in order of decode input
|
||||||
|
CounterSemaphore m_decodeSem;
|
||||||
|
std::vector<uint32> m_displayQueue; // indices into m_decodedSliceArray, in order of frame display output
|
||||||
|
coreinit::OSEvent* m_displayQueueEvt; // signalled when a new frame is ready for display
|
||||||
|
coreinit::OSEvent* m_flushEvt; // signalled after flush operation finished and all queued slices are decoded
|
||||||
|
|
||||||
|
// frame output queue
|
||||||
|
std::mutex m_frameOutputMtx;
|
||||||
|
std::array<DecodedSlice, 32> m_decodedSliceArray;
|
||||||
|
};
|
||||||
|
}
|
|
@ -319,6 +319,17 @@ bool parseNAL_pic_parameter_set_rbsp(h264ParserState_t* h264ParserState, h264Par
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool h264Parser_ParseSPS(uint8* data, uint32 length, h264State_seq_parameter_set_t& sps)
|
||||||
|
{
|
||||||
|
h264ParserState_t parserState;
|
||||||
|
RBSPInputBitstream nalStream(data, length);
|
||||||
|
bool r = parseNAL_seq_parameter_set_rbsp(&parserState, nullptr, nalStream);
|
||||||
|
if(!r || !parserState.hasSPS)
|
||||||
|
return false;
|
||||||
|
sps = parserState.sps;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
void parseNAL_ref_pic_list_modification(const h264State_seq_parameter_set_t& sps, const h264State_pic_parameter_set_t& pps, RBSPInputBitstream& nalStream, nal_slice_header_t* sliceHeader)
|
void parseNAL_ref_pic_list_modification(const h264State_seq_parameter_set_t& sps, const h264State_pic_parameter_set_t& pps, RBSPInputBitstream& nalStream, nal_slice_header_t* sliceHeader)
|
||||||
{
|
{
|
||||||
if (!sliceHeader->slice_type.isSliceTypeI() && !sliceHeader->slice_type.isSliceTypeSI())
|
if (!sliceHeader->slice_type.isSliceTypeI() && !sliceHeader->slice_type.isSliceTypeSI())
|
||||||
|
@ -688,9 +699,8 @@ void _calculateFrameOrder(h264ParserState_t* h264ParserState, const h264State_se
|
||||||
else if (sps.pic_order_cnt_type == 2)
|
else if (sps.pic_order_cnt_type == 2)
|
||||||
{
|
{
|
||||||
// display order matches decode order
|
// display order matches decode order
|
||||||
|
|
||||||
uint32 prevFrameNum = h264ParserState->picture_order.prevFrameNum;
|
uint32 prevFrameNum = h264ParserState->picture_order.prevFrameNum;
|
||||||
;
|
|
||||||
uint32 FrameNumOffset;
|
uint32 FrameNumOffset;
|
||||||
if (sliceHeader->IdrPicFlag)
|
if (sliceHeader->IdrPicFlag)
|
||||||
{
|
{
|
||||||
|
@ -706,9 +716,6 @@ void _calculateFrameOrder(h264ParserState_t* h264ParserState, const h264State_se
|
||||||
FrameNumOffset = prevFrameNumOffset + sps.getMaxFrameNum();
|
FrameNumOffset = prevFrameNumOffset + sps.getMaxFrameNum();
|
||||||
else
|
else
|
||||||
FrameNumOffset = prevFrameNumOffset;
|
FrameNumOffset = prevFrameNumOffset;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 tempPicOrderCnt;
|
uint32 tempPicOrderCnt;
|
||||||
|
|
|
@ -513,6 +513,8 @@ typedef struct
|
||||||
void h264Parse(h264ParserState_t* h264ParserState, h264ParserOutput_t* output, uint8* data, uint32 length, bool parseSlices = true);
|
void h264Parse(h264ParserState_t* h264ParserState, h264ParserOutput_t* output, uint8* data, uint32 length, bool parseSlices = true);
|
||||||
sint32 h264GetUnitLength(h264ParserState_t* h264ParserState, uint8* data, uint32 length);
|
sint32 h264GetUnitLength(h264ParserState_t* h264ParserState, uint8* data, uint32 length);
|
||||||
|
|
||||||
|
bool h264Parser_ParseSPS(uint8* data, uint32 length, h264State_seq_parameter_set_t& sps);
|
||||||
|
|
||||||
void h264Parser_getScalingMatrix4x4(h264State_seq_parameter_set_t* sps, h264State_pic_parameter_set_t* pps, nal_slice_header_t* sliceHeader, sint32 index, uint8* matrix4x4);
|
void h264Parser_getScalingMatrix4x4(h264State_seq_parameter_set_t* sps, h264State_pic_parameter_set_t* pps, nal_slice_header_t* sliceHeader, sint32 index, uint8* matrix4x4);
|
||||||
void h264Parser_getScalingMatrix8x8(h264State_seq_parameter_set_t* sps, h264State_pic_parameter_set_t* pps, nal_slice_header_t* sliceHeader, sint32 index, uint8* matrix8x8);
|
void h264Parser_getScalingMatrix8x8(h264State_seq_parameter_set_t* sps, h264State_pic_parameter_set_t* pps, nal_slice_header_t* sliceHeader, sint32 index, uint8* matrix8x8);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue