349 lines
11 KiB
C++
349 lines
11 KiB
C++
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
#include "M4XCmdSingleThread.h"
|
|
#include "M4MemOps.h"
|
|
#include "M4Image.h"
|
|
#include "M4InvQuant.h"
|
|
#include "M4idct.h"
|
|
#include "M4BitstreamHeaderInfo.h"
|
|
|
|
namespace vdecmpeg4
|
|
{
|
|
|
|
#define M4RSHIFT(a,b) ((a) > 0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b))
|
|
#define M4SIGN(X) (((X)>0)?1:-1)
|
|
#define M4ABS(X) (((X)>0)?(X):-(X))
|
|
|
|
|
|
// ----------------------------------------------------------------------------
|
|
/**
|
|
* Constructor
|
|
*/
|
|
M4XCmdSingleThread::M4XCmdSingleThread()
|
|
{
|
|
mpDctWorkData = nullptr;
|
|
mpDecoder = nullptr;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
/**
|
|
* Destructor
|
|
*/
|
|
M4XCmdSingleThread::~M4XCmdSingleThread()
|
|
{
|
|
M4CHECK(mpDctWorkData == nullptr);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
/**
|
|
* Handle initialization for command processing
|
|
*
|
|
* @param pDecoder
|
|
*
|
|
* @return true if successful
|
|
*/
|
|
VIDError M4XCmdSingleThread::Init(M4Decoder* pDecoder)
|
|
{
|
|
M4CHECK(mpDctWorkData == nullptr);
|
|
M4CHECK(pDecoder);
|
|
|
|
mpDecoder = pDecoder;
|
|
M4MemHandler& memSys = mpDecoder->mMemSys;
|
|
|
|
mpDctWorkData = (int16*)memSys.malloc(64*6*sizeof(int16));
|
|
if (!mpDctWorkData)
|
|
{
|
|
mpDecoder = nullptr;
|
|
return VID_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
return VID_OK;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
/**
|
|
* Release command processing resources
|
|
*/
|
|
void M4XCmdSingleThread::Exit()
|
|
{
|
|
// Check if Init was called...
|
|
if (!mpDecoder)
|
|
{
|
|
return;
|
|
}
|
|
|
|
M4MemHandler& memSys = mpDecoder->mMemSys;
|
|
memSys.free(mpDctWorkData);
|
|
|
|
mpDctWorkData = nullptr;
|
|
mpDecoder = nullptr;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
/**
|
|
* Setup required data for movie frame decode start
|
|
*
|
|
* @param pOutput
|
|
* @param pHeaderInfo
|
|
* @param pRefImage
|
|
*/
|
|
void M4XCmdSingleThread::FrameBegin(M4Image* pOutput, M4BitstreamHeaderInfo* pHeaderInfo, M4Image* pRefImage[2])
|
|
{
|
|
mpOutput = pOutput;
|
|
mpHeaderInfo = pHeaderInfo;
|
|
mpRefImage[0] = pRefImage[0];
|
|
mpRefImage[1] = pRefImage[1];
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
/**
|
|
* Perform INTRA (I) macroblock update
|
|
**/
|
|
void M4XCmdSingleThread::XUpdateIntraMB(M4_MB* pMB, int32 mbx, int32 mby, M4BitstreamCacheEntry* pCacheEntry)
|
|
{
|
|
int16* pDctFromStream = pCacheEntry->mDctFromBitstream;
|
|
uint16* pDcScaler = pCacheEntry->mDcScaler;
|
|
|
|
uint32 dctOffset = 0;
|
|
for(uint32 i=0; i<6; i++, dctOffset += 64)
|
|
{
|
|
if (mpHeaderInfo->mFlags.mQuantType == 0)
|
|
{
|
|
M4InvQuantType0Intra(mpDctWorkData+dctOffset, pDctFromStream+dctOffset, pMB->mQuant, pDcScaler[i]);
|
|
}
|
|
else
|
|
{
|
|
M4InvQuantType1Intra(mpDctWorkData+dctOffset, pDctFromStream+dctOffset, pMB->mQuant, pDcScaler[i], mpHeaderInfo->mInvQuantIntra);
|
|
}
|
|
M4idct(mpDctWorkData+dctOffset);
|
|
}
|
|
|
|
pCacheEntry->mState = 0; // free this cache block
|
|
|
|
M4MemOpIntraMBAll(mpOutput, mbx, mby, mpDctWorkData);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
/**
|
|
* Perform INTER macroblock update
|
|
*
|
|
* @param pMB
|
|
* @param mbx
|
|
* @param mby
|
|
* @param fields
|
|
* @param pCacheEntry
|
|
* @param refImageNo
|
|
*/
|
|
void M4XCmdSingleThread::XUpdateInterMB(M4_MB* pMB, int32 mbx, int32 mby, M4BitstreamCacheEntry* pCacheEntry, MV_PREDICTION, uint32 refImageNo)
|
|
{
|
|
M4CHECK(mpDctWorkData);
|
|
|
|
M4_VECTOR d_uv;
|
|
if (pMB->mMode == M4_MBMODE_INTER || pMB->mMode == M4_MBMODE_INTER_Q)
|
|
{
|
|
d_uv = pMB->mFMv[0];
|
|
d_uv.x = (d_uv.x & 3) ? (d_uv.x >> 1) | 1 : d_uv.x / 2;
|
|
d_uv.y = (d_uv.y & 3) ? (d_uv.y >> 1) | 1 : d_uv.y / 2;
|
|
}
|
|
else
|
|
{
|
|
// M4_MBMODE_INTER4V (not possible in interlace mode)
|
|
int32 sum = pMB->mFMv[0].x + pMB->mFMv[1].x + pMB->mFMv[2].x + pMB->mFMv[3].x;
|
|
d_uv.x = (sum == 0 ? 0 : M4SIGN(sum) * ((int32)M4Decoder::mRoundtab[M4ABS(sum) % 16] + (M4ABS(sum) / 16) * 2) );
|
|
|
|
sum = pMB->mFMv[0].y + pMB->mFMv[1].y + pMB->mFMv[2].y + pMB->mFMv[3].y;
|
|
d_uv.y = (sum == 0 ? 0 : M4SIGN(sum) * ((int32)M4Decoder::mRoundtab[M4ABS(sum) % 16] + (M4ABS(sum) / 16) * 2) );
|
|
}
|
|
|
|
int32 stride = mpOutput->mImage.texWidth;
|
|
uint8* cur = mpOutput->mImage.y;
|
|
uint8* ref = mpRefImage[refImageNo]->mImage.y;
|
|
|
|
int32 x = mbx<<4;
|
|
int32 y = mby<<4;
|
|
|
|
int32 tx = pMB->mFMv[0].x;
|
|
int32 ty = pMB->mFMv[0].y;
|
|
if (pMB->mFMv[1].x != tx || pMB->mFMv[1].y != ty ||
|
|
pMB->mFMv[2].x != tx || pMB->mFMv[2].y != ty ||
|
|
pMB->mFMv[3].x != tx || pMB->mFMv[3].y != ty)
|
|
{
|
|
M4MemHalfPelInterpolate(cur, ref, stride, x, y, &(pMB->mFMv[0]), mpHeaderInfo->mFlags.mRounding);
|
|
M4MemHalfPelInterpolate(cur, ref, stride, x+8, y, &(pMB->mFMv[1]), mpHeaderInfo->mFlags.mRounding);
|
|
M4MemHalfPelInterpolate(cur, ref, stride, x, y+8, &(pMB->mFMv[2]), mpHeaderInfo->mFlags.mRounding);
|
|
M4MemHalfPelInterpolate(cur, ref, stride, x+8, y+8, &(pMB->mFMv[3]), mpHeaderInfo->mFlags.mRounding);
|
|
}
|
|
else
|
|
{
|
|
M4MemHalfPelInterpolate(cur, ref, stride, x, y, &(pMB->mFMv[0]), mpHeaderInfo->mFlags.mRounding, true);
|
|
}
|
|
|
|
x = mbx<<3;
|
|
y = mby<<3;
|
|
int32 stride2 = stride>>1;
|
|
M4MemHalfPelInterpolate(mpOutput->mImage.u, mpRefImage[refImageNo]->mImage.u, stride2, x, y, &d_uv, mpHeaderInfo->mFlags.mRounding);
|
|
M4MemHalfPelInterpolate(mpOutput->mImage.v, mpRefImage[refImageNo]->mImage.v, stride2, x, y, &d_uv, mpHeaderInfo->mFlags.mRounding);
|
|
|
|
if (pMB->mCbp)
|
|
{
|
|
M4CHECK(pCacheEntry);
|
|
const int16* pDctFromStream = pCacheEntry->mDctFromBitstream;
|
|
|
|
uint32 dctOffset = 0;
|
|
uint32 mask = 1 << 5;
|
|
for(uint32 i = 0; i < 6; i++, dctOffset+=64, mask>>=1)
|
|
{
|
|
// check if any block is coded inside stream
|
|
if (pMB->mCbp & mask)
|
|
{
|
|
if (mpHeaderInfo->mFlags.mQuantType == 0)
|
|
{
|
|
M4InvQuantType0Inter(mpDctWorkData+dctOffset, pDctFromStream+dctOffset, pMB->mQuant);
|
|
}
|
|
else
|
|
{
|
|
M4InvQuantType1Inter(mpDctWorkData+dctOffset, pDctFromStream+dctOffset, pMB->mQuant, mpHeaderInfo->mInvQuantInter);
|
|
}
|
|
M4idct(mpDctWorkData+dctOffset);
|
|
}
|
|
}
|
|
|
|
pCacheEntry->mState = 0; // free this cache block
|
|
M4MemOpInterMBAdd(mpOutput, mbx, mby, mpDctWorkData, pMB->mCbp);
|
|
}
|
|
}
|
|
|
|
|
|
// ----------------------------------------------------------------------------
|
|
/**
|
|
* Decode an INTER macroblock for B frames using interpolation
|
|
*
|
|
* @param mb
|
|
* @param mbx
|
|
* @param mby
|
|
* @param pCacheEntry
|
|
* @param refImageForward
|
|
* @param refImageBackward
|
|
*/
|
|
void M4XCmdSingleThread::XInterpolateMB(M4_MB* mb, int32 mbx, int32 mby, M4BitstreamCacheEntry* pCacheEntry, uint32 refImageForward, uint32 refImageBackward, uint16)
|
|
{
|
|
M4Image* imgForward = mpRefImage[refImageForward];
|
|
M4Image* imgBackward = mpRefImage[refImageBackward];
|
|
M4Image* pTmpImage = mpDecoder->mTempImage[0];
|
|
|
|
M4_VECTOR f_uv, b_uv;
|
|
if (mb->mMode == M4_MBMODE_INTER || mb->mMode == M4_MBMODE_INTER_Q)
|
|
{
|
|
// only 1 motion vector
|
|
f_uv = mb->mFMv[0];
|
|
f_uv.x = (f_uv.x & 3) ? (f_uv.x >> 1) | 1 : f_uv.x / 2;
|
|
f_uv.y = (f_uv.y & 3) ? (f_uv.y >> 1) | 1 : f_uv.y / 2;
|
|
|
|
b_uv = mb->mBMv[0];
|
|
b_uv.x = (b_uv.x & 3) ? (b_uv.x >> 1) | 1 : b_uv.x / 2;
|
|
b_uv.y = (b_uv.y & 3) ? (b_uv.y >> 1) | 1 : b_uv.y / 2;
|
|
}
|
|
else
|
|
{
|
|
// use 4 motion vectors
|
|
int32 sum;
|
|
sum = mb->mFMv[0].x + mb->mFMv[1].x + mb->mFMv[2].x + mb->mFMv[3].x;
|
|
f_uv.x = (sum == 0 ? 0 : M4SIGN(sum) * ((int32)M4Decoder::mRoundtab[M4ABS(sum) % 16] + (M4ABS(sum) / 16) * 2) );
|
|
|
|
sum = mb->mFMv[0].y + mb->mFMv[1].y + mb->mFMv[2].y + mb->mFMv[3].y;
|
|
f_uv.y = (sum == 0 ? 0 : M4SIGN(sum) * ((int32)M4Decoder::mRoundtab[M4ABS(sum) % 16] + (M4ABS(sum) / 16) * 2) );
|
|
|
|
sum = mb->mBMv[0].x + mb->mBMv[1].x + mb->mBMv[2].x + mb->mBMv[3].x;
|
|
b_uv.x = (sum == 0 ? 0 : M4SIGN(sum) * ((int32)M4Decoder::mRoundtab[M4ABS(sum) % 16] + (M4ABS(sum) / 16) * 2) );
|
|
|
|
sum = mb->mBMv[0].y + mb->mBMv[1].y + mb->mBMv[2].y + mb->mBMv[3].y;
|
|
b_uv.y = (sum == 0 ? 0 : M4SIGN(sum) * ((int32)M4Decoder::mRoundtab[M4ABS(sum) % 16] + (M4ABS(sum) / 16) * 2) );
|
|
|
|
}
|
|
|
|
// perform FORWARD interpolation
|
|
int32 stride = mpOutput->mImage.texWidth;
|
|
uint8* cur = mpOutput->mImage.y;
|
|
uint8* ref = imgForward->mImage.y;
|
|
int32 x = mbx<<4; // * 16
|
|
int32 y = mby<<4; // * 16
|
|
|
|
int32 tx = mb->mFMv[0].x;
|
|
int32 ty = mb->mFMv[0].y;
|
|
if (mb->mFMv[1].x != tx || mb->mFMv[1].y != ty ||
|
|
mb->mFMv[2].x != tx || mb->mFMv[2].y != ty ||
|
|
mb->mFMv[3].x != tx || mb->mFMv[3].y != ty)
|
|
{
|
|
M4MemHalfPelInterpolate(cur, ref, stride, x, y, &(mb->mFMv[0]), mpHeaderInfo->mFlags.mRounding);
|
|
M4MemHalfPelInterpolate(cur, ref, stride, x+8, y, &(mb->mFMv[1]), mpHeaderInfo->mFlags.mRounding);
|
|
M4MemHalfPelInterpolate(cur, ref, stride, x, y+8, &(mb->mFMv[2]), mpHeaderInfo->mFlags.mRounding);
|
|
M4MemHalfPelInterpolate(cur, ref, stride, x+8, y+8, &(mb->mFMv[3]), mpHeaderInfo->mFlags.mRounding);
|
|
}
|
|
else
|
|
{
|
|
M4MemHalfPelInterpolate(cur, ref, stride, x, y, &(mb->mFMv[0]), mpHeaderInfo->mFlags.mRounding, true);
|
|
}
|
|
|
|
int32 x2 = mbx<<3;
|
|
int32 y2 = mby<<3;
|
|
int32 stride2 = stride>>1; // /2
|
|
M4MemHalfPelInterpolate(mpOutput->mImage.u, imgForward->mImage.u, stride2, x2, y2, &f_uv, mpHeaderInfo->mFlags.mRounding);
|
|
M4MemHalfPelInterpolate(mpOutput->mImage.v, imgForward->mImage.v, stride2, x2, y2, &f_uv, mpHeaderInfo->mFlags.mRounding);
|
|
|
|
// perform BACKWARD interpolation (into temp image buffer)
|
|
cur = pTmpImage->mImage.y;
|
|
ref = imgBackward->mImage.y;
|
|
|
|
tx = mb->mBMv[0].x;
|
|
ty = mb->mBMv[0].y;
|
|
if (mb->mBMv[1].x != tx || mb->mBMv[1].y != ty ||
|
|
mb->mBMv[2].x != tx || mb->mBMv[2].y != ty ||
|
|
mb->mBMv[3].x != tx || mb->mBMv[3].y != ty)
|
|
{
|
|
M4MemHalfPelInterpolate(cur, ref, stride, x, y, &(mb->mBMv[0]), mpHeaderInfo->mFlags.mRounding);
|
|
M4MemHalfPelInterpolate(cur, ref, stride, x+8, y, &(mb->mBMv[1]), mpHeaderInfo->mFlags.mRounding);
|
|
M4MemHalfPelInterpolate(cur, ref, stride, x, y+8, &(mb->mBMv[2]), mpHeaderInfo->mFlags.mRounding);
|
|
M4MemHalfPelInterpolate(cur, ref, stride, x+8, y+8, &(mb->mBMv[3]), mpHeaderInfo->mFlags.mRounding);
|
|
}
|
|
else
|
|
{
|
|
M4MemHalfPelInterpolate(cur, ref, stride, x, y, &(mb->mBMv[0]), mpHeaderInfo->mFlags.mRounding, true);
|
|
}
|
|
|
|
M4MemHalfPelInterpolate(pTmpImage->mImage.u, imgBackward->mImage.u, stride2, x2, y2, &b_uv, mpHeaderInfo->mFlags.mRounding);
|
|
M4MemHalfPelInterpolate(pTmpImage->mImage.v, imgBackward->mImage.v, stride2, x2, y2, &b_uv, mpHeaderInfo->mFlags.mRounding);
|
|
|
|
// merge forward and backward images
|
|
M4MemOpInterpolateAll(mpOutput, mbx, mby, pTmpImage);
|
|
|
|
if (mb->mCbp)
|
|
{
|
|
M4CHECK(pCacheEntry);
|
|
const int16* pDctFromStream = pCacheEntry->mDctFromBitstream;
|
|
|
|
uint32 dctOffset = 0;
|
|
uint32 mask = 1 << 5;
|
|
for(uint32 i=0; i<6; i++, dctOffset += 64, mask>>=1)
|
|
{
|
|
// check if any block is coded inside stream
|
|
if (mb->mCbp & mask)
|
|
{
|
|
if (mpHeaderInfo->mFlags.mQuantType == 0)
|
|
{
|
|
M4InvQuantType0Inter(mpDctWorkData+dctOffset, pDctFromStream+dctOffset, mb->mQuant);
|
|
}
|
|
else
|
|
{
|
|
M4InvQuantType1Inter(mpDctWorkData+dctOffset, pDctFromStream+dctOffset, mb->mQuant, mpHeaderInfo->mInvQuantInter);
|
|
}
|
|
M4idct(mpDctWorkData+dctOffset);
|
|
}
|
|
}
|
|
|
|
pCacheEntry->mState = 0; // free this cache block
|
|
M4MemOpInterMBAdd(mpOutput, mbx, mby, mpDctWorkData, mb->mCbp);
|
|
}
|
|
}
|
|
|
|
|
|
}
|
|
|