Files
UnrealEngine/Engine/Source/Runtime/AVEncoder/Private/Decoders/vdecmpeg4/M4XCmdSingleThread.cpp
2025-05-18 13:04:45 +08:00

349 lines
11 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#include "M4XCmdSingleThread.h"
#include "M4MemOps.h"
#include "M4Image.h"
#include "M4InvQuant.h"
#include "M4idct.h"
#include "M4BitstreamHeaderInfo.h"
namespace vdecmpeg4
{
#define M4RSHIFT(a,b) ((a) > 0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b))
#define M4SIGN(X) (((X)>0)?1:-1)
#define M4ABS(X) (((X)>0)?(X):-(X))
// ----------------------------------------------------------------------------
/**
* Constructor
*/
M4XCmdSingleThread::M4XCmdSingleThread()
{
mpDctWorkData = nullptr;
mpDecoder = nullptr;
}
// ----------------------------------------------------------------------------
/**
* Destructor
*/
M4XCmdSingleThread::~M4XCmdSingleThread()
{
M4CHECK(mpDctWorkData == nullptr);
}
// ----------------------------------------------------------------------------
/**
* Handle initialization for command processing
*
* @param pDecoder
*
* @return true if successful
*/
VIDError M4XCmdSingleThread::Init(M4Decoder* pDecoder)
{
M4CHECK(mpDctWorkData == nullptr);
M4CHECK(pDecoder);
mpDecoder = pDecoder;
M4MemHandler& memSys = mpDecoder->mMemSys;
mpDctWorkData = (int16*)memSys.malloc(64*6*sizeof(int16));
if (!mpDctWorkData)
{
mpDecoder = nullptr;
return VID_ERROR_OUT_OF_MEMORY;
}
return VID_OK;
}
// ----------------------------------------------------------------------------
/**
* Release command processing resources
*/
void M4XCmdSingleThread::Exit()
{
// Check if Init was called...
if (!mpDecoder)
{
return;
}
M4MemHandler& memSys = mpDecoder->mMemSys;
memSys.free(mpDctWorkData);
mpDctWorkData = nullptr;
mpDecoder = nullptr;
}
// ----------------------------------------------------------------------------
/**
* Setup required data for movie frame decode start
*
* @param pOutput
* @param pHeaderInfo
* @param pRefImage
*/
void M4XCmdSingleThread::FrameBegin(M4Image* pOutput, M4BitstreamHeaderInfo* pHeaderInfo, M4Image* pRefImage[2])
{
mpOutput = pOutput;
mpHeaderInfo = pHeaderInfo;
mpRefImage[0] = pRefImage[0];
mpRefImage[1] = pRefImage[1];
}
// ----------------------------------------------------------------------------
/**
* Perform INTRA (I) macroblock update
**/
void M4XCmdSingleThread::XUpdateIntraMB(M4_MB* pMB, int32 mbx, int32 mby, M4BitstreamCacheEntry* pCacheEntry)
{
int16* pDctFromStream = pCacheEntry->mDctFromBitstream;
uint16* pDcScaler = pCacheEntry->mDcScaler;
uint32 dctOffset = 0;
for(uint32 i=0; i<6; i++, dctOffset += 64)
{
if (mpHeaderInfo->mFlags.mQuantType == 0)
{
M4InvQuantType0Intra(mpDctWorkData+dctOffset, pDctFromStream+dctOffset, pMB->mQuant, pDcScaler[i]);
}
else
{
M4InvQuantType1Intra(mpDctWorkData+dctOffset, pDctFromStream+dctOffset, pMB->mQuant, pDcScaler[i], mpHeaderInfo->mInvQuantIntra);
}
M4idct(mpDctWorkData+dctOffset);
}
pCacheEntry->mState = 0; // free this cache block
M4MemOpIntraMBAll(mpOutput, mbx, mby, mpDctWorkData);
}
// ----------------------------------------------------------------------------
/**
* Perform INTER macroblock update
*
* @param pMB
* @param mbx
* @param mby
* @param fields
* @param pCacheEntry
* @param refImageNo
*/
void M4XCmdSingleThread::XUpdateInterMB(M4_MB* pMB, int32 mbx, int32 mby, M4BitstreamCacheEntry* pCacheEntry, MV_PREDICTION, uint32 refImageNo)
{
M4CHECK(mpDctWorkData);
M4_VECTOR d_uv;
if (pMB->mMode == M4_MBMODE_INTER || pMB->mMode == M4_MBMODE_INTER_Q)
{
d_uv = pMB->mFMv[0];
d_uv.x = (d_uv.x & 3) ? (d_uv.x >> 1) | 1 : d_uv.x / 2;
d_uv.y = (d_uv.y & 3) ? (d_uv.y >> 1) | 1 : d_uv.y / 2;
}
else
{
// M4_MBMODE_INTER4V (not possible in interlace mode)
int32 sum = pMB->mFMv[0].x + pMB->mFMv[1].x + pMB->mFMv[2].x + pMB->mFMv[3].x;
d_uv.x = (sum == 0 ? 0 : M4SIGN(sum) * ((int32)M4Decoder::mRoundtab[M4ABS(sum) % 16] + (M4ABS(sum) / 16) * 2) );
sum = pMB->mFMv[0].y + pMB->mFMv[1].y + pMB->mFMv[2].y + pMB->mFMv[3].y;
d_uv.y = (sum == 0 ? 0 : M4SIGN(sum) * ((int32)M4Decoder::mRoundtab[M4ABS(sum) % 16] + (M4ABS(sum) / 16) * 2) );
}
int32 stride = mpOutput->mImage.texWidth;
uint8* cur = mpOutput->mImage.y;
uint8* ref = mpRefImage[refImageNo]->mImage.y;
int32 x = mbx<<4;
int32 y = mby<<4;
int32 tx = pMB->mFMv[0].x;
int32 ty = pMB->mFMv[0].y;
if (pMB->mFMv[1].x != tx || pMB->mFMv[1].y != ty ||
pMB->mFMv[2].x != tx || pMB->mFMv[2].y != ty ||
pMB->mFMv[3].x != tx || pMB->mFMv[3].y != ty)
{
M4MemHalfPelInterpolate(cur, ref, stride, x, y, &(pMB->mFMv[0]), mpHeaderInfo->mFlags.mRounding);
M4MemHalfPelInterpolate(cur, ref, stride, x+8, y, &(pMB->mFMv[1]), mpHeaderInfo->mFlags.mRounding);
M4MemHalfPelInterpolate(cur, ref, stride, x, y+8, &(pMB->mFMv[2]), mpHeaderInfo->mFlags.mRounding);
M4MemHalfPelInterpolate(cur, ref, stride, x+8, y+8, &(pMB->mFMv[3]), mpHeaderInfo->mFlags.mRounding);
}
else
{
M4MemHalfPelInterpolate(cur, ref, stride, x, y, &(pMB->mFMv[0]), mpHeaderInfo->mFlags.mRounding, true);
}
x = mbx<<3;
y = mby<<3;
int32 stride2 = stride>>1;
M4MemHalfPelInterpolate(mpOutput->mImage.u, mpRefImage[refImageNo]->mImage.u, stride2, x, y, &d_uv, mpHeaderInfo->mFlags.mRounding);
M4MemHalfPelInterpolate(mpOutput->mImage.v, mpRefImage[refImageNo]->mImage.v, stride2, x, y, &d_uv, mpHeaderInfo->mFlags.mRounding);
if (pMB->mCbp)
{
M4CHECK(pCacheEntry);
const int16* pDctFromStream = pCacheEntry->mDctFromBitstream;
uint32 dctOffset = 0;
uint32 mask = 1 << 5;
for(uint32 i = 0; i < 6; i++, dctOffset+=64, mask>>=1)
{
// check if any block is coded inside stream
if (pMB->mCbp & mask)
{
if (mpHeaderInfo->mFlags.mQuantType == 0)
{
M4InvQuantType0Inter(mpDctWorkData+dctOffset, pDctFromStream+dctOffset, pMB->mQuant);
}
else
{
M4InvQuantType1Inter(mpDctWorkData+dctOffset, pDctFromStream+dctOffset, pMB->mQuant, mpHeaderInfo->mInvQuantInter);
}
M4idct(mpDctWorkData+dctOffset);
}
}
pCacheEntry->mState = 0; // free this cache block
M4MemOpInterMBAdd(mpOutput, mbx, mby, mpDctWorkData, pMB->mCbp);
}
}
// ----------------------------------------------------------------------------
/**
* Decode an INTER macroblock for B frames using interpolation
*
* @param mb
* @param mbx
* @param mby
* @param pCacheEntry
* @param refImageForward
* @param refImageBackward
*/
void M4XCmdSingleThread::XInterpolateMB(M4_MB* mb, int32 mbx, int32 mby, M4BitstreamCacheEntry* pCacheEntry, uint32 refImageForward, uint32 refImageBackward, uint16)
{
M4Image* imgForward = mpRefImage[refImageForward];
M4Image* imgBackward = mpRefImage[refImageBackward];
M4Image* pTmpImage = mpDecoder->mTempImage[0];
M4_VECTOR f_uv, b_uv;
if (mb->mMode == M4_MBMODE_INTER || mb->mMode == M4_MBMODE_INTER_Q)
{
// only 1 motion vector
f_uv = mb->mFMv[0];
f_uv.x = (f_uv.x & 3) ? (f_uv.x >> 1) | 1 : f_uv.x / 2;
f_uv.y = (f_uv.y & 3) ? (f_uv.y >> 1) | 1 : f_uv.y / 2;
b_uv = mb->mBMv[0];
b_uv.x = (b_uv.x & 3) ? (b_uv.x >> 1) | 1 : b_uv.x / 2;
b_uv.y = (b_uv.y & 3) ? (b_uv.y >> 1) | 1 : b_uv.y / 2;
}
else
{
// use 4 motion vectors
int32 sum;
sum = mb->mFMv[0].x + mb->mFMv[1].x + mb->mFMv[2].x + mb->mFMv[3].x;
f_uv.x = (sum == 0 ? 0 : M4SIGN(sum) * ((int32)M4Decoder::mRoundtab[M4ABS(sum) % 16] + (M4ABS(sum) / 16) * 2) );
sum = mb->mFMv[0].y + mb->mFMv[1].y + mb->mFMv[2].y + mb->mFMv[3].y;
f_uv.y = (sum == 0 ? 0 : M4SIGN(sum) * ((int32)M4Decoder::mRoundtab[M4ABS(sum) % 16] + (M4ABS(sum) / 16) * 2) );
sum = mb->mBMv[0].x + mb->mBMv[1].x + mb->mBMv[2].x + mb->mBMv[3].x;
b_uv.x = (sum == 0 ? 0 : M4SIGN(sum) * ((int32)M4Decoder::mRoundtab[M4ABS(sum) % 16] + (M4ABS(sum) / 16) * 2) );
sum = mb->mBMv[0].y + mb->mBMv[1].y + mb->mBMv[2].y + mb->mBMv[3].y;
b_uv.y = (sum == 0 ? 0 : M4SIGN(sum) * ((int32)M4Decoder::mRoundtab[M4ABS(sum) % 16] + (M4ABS(sum) / 16) * 2) );
}
// perform FORWARD interpolation
int32 stride = mpOutput->mImage.texWidth;
uint8* cur = mpOutput->mImage.y;
uint8* ref = imgForward->mImage.y;
int32 x = mbx<<4; // * 16
int32 y = mby<<4; // * 16
int32 tx = mb->mFMv[0].x;
int32 ty = mb->mFMv[0].y;
if (mb->mFMv[1].x != tx || mb->mFMv[1].y != ty ||
mb->mFMv[2].x != tx || mb->mFMv[2].y != ty ||
mb->mFMv[3].x != tx || mb->mFMv[3].y != ty)
{
M4MemHalfPelInterpolate(cur, ref, stride, x, y, &(mb->mFMv[0]), mpHeaderInfo->mFlags.mRounding);
M4MemHalfPelInterpolate(cur, ref, stride, x+8, y, &(mb->mFMv[1]), mpHeaderInfo->mFlags.mRounding);
M4MemHalfPelInterpolate(cur, ref, stride, x, y+8, &(mb->mFMv[2]), mpHeaderInfo->mFlags.mRounding);
M4MemHalfPelInterpolate(cur, ref, stride, x+8, y+8, &(mb->mFMv[3]), mpHeaderInfo->mFlags.mRounding);
}
else
{
M4MemHalfPelInterpolate(cur, ref, stride, x, y, &(mb->mFMv[0]), mpHeaderInfo->mFlags.mRounding, true);
}
int32 x2 = mbx<<3;
int32 y2 = mby<<3;
int32 stride2 = stride>>1; // /2
M4MemHalfPelInterpolate(mpOutput->mImage.u, imgForward->mImage.u, stride2, x2, y2, &f_uv, mpHeaderInfo->mFlags.mRounding);
M4MemHalfPelInterpolate(mpOutput->mImage.v, imgForward->mImage.v, stride2, x2, y2, &f_uv, mpHeaderInfo->mFlags.mRounding);
// perform BACKWARD interpolation (into temp image buffer)
cur = pTmpImage->mImage.y;
ref = imgBackward->mImage.y;
tx = mb->mBMv[0].x;
ty = mb->mBMv[0].y;
if (mb->mBMv[1].x != tx || mb->mBMv[1].y != ty ||
mb->mBMv[2].x != tx || mb->mBMv[2].y != ty ||
mb->mBMv[3].x != tx || mb->mBMv[3].y != ty)
{
M4MemHalfPelInterpolate(cur, ref, stride, x, y, &(mb->mBMv[0]), mpHeaderInfo->mFlags.mRounding);
M4MemHalfPelInterpolate(cur, ref, stride, x+8, y, &(mb->mBMv[1]), mpHeaderInfo->mFlags.mRounding);
M4MemHalfPelInterpolate(cur, ref, stride, x, y+8, &(mb->mBMv[2]), mpHeaderInfo->mFlags.mRounding);
M4MemHalfPelInterpolate(cur, ref, stride, x+8, y+8, &(mb->mBMv[3]), mpHeaderInfo->mFlags.mRounding);
}
else
{
M4MemHalfPelInterpolate(cur, ref, stride, x, y, &(mb->mBMv[0]), mpHeaderInfo->mFlags.mRounding, true);
}
M4MemHalfPelInterpolate(pTmpImage->mImage.u, imgBackward->mImage.u, stride2, x2, y2, &b_uv, mpHeaderInfo->mFlags.mRounding);
M4MemHalfPelInterpolate(pTmpImage->mImage.v, imgBackward->mImage.v, stride2, x2, y2, &b_uv, mpHeaderInfo->mFlags.mRounding);
// merge forward and backward images
M4MemOpInterpolateAll(mpOutput, mbx, mby, pTmpImage);
if (mb->mCbp)
{
M4CHECK(pCacheEntry);
const int16* pDctFromStream = pCacheEntry->mDctFromBitstream;
uint32 dctOffset = 0;
uint32 mask = 1 << 5;
for(uint32 i=0; i<6; i++, dctOffset += 64, mask>>=1)
{
// check if any block is coded inside stream
if (mb->mCbp & mask)
{
if (mpHeaderInfo->mFlags.mQuantType == 0)
{
M4InvQuantType0Inter(mpDctWorkData+dctOffset, pDctFromStream+dctOffset, mb->mQuant);
}
else
{
M4InvQuantType1Inter(mpDctWorkData+dctOffset, pDctFromStream+dctOffset, mb->mQuant, mpHeaderInfo->mInvQuantInter);
}
M4idct(mpDctWorkData+dctOffset);
}
}
pCacheEntry->mState = 0; // free this cache block
M4MemOpInterMBAdd(mpOutput, mbx, mby, mpDctWorkData, mb->mCbp);
}
}
}