800 lines
27 KiB
C++
800 lines
27 KiB
C++
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#include "MuR/ImagePrivate.h"
|
|
#include "MuR/SystemPrivate.h"
|
|
#include "Async/ParallelFor.h"
|
|
#include "HAL/UnrealMemory.h"
|
|
|
|
namespace
|
|
{
|
|
|
|
bool bEnableCompressedMipGenerationMemoryOptimizations = true;
|
|
static FAutoConsoleVariableRef CVarEnableCompressedMipGenerationMemoryOptimizations (
|
|
TEXT("mutable.EnableCompressedMipGenerationMemoryOptimizations"),
|
|
bEnableCompressedMipGenerationMemoryOptimizations,
|
|
TEXT("If set to true, enables memory optimizations for mip generation on compressed images."),
|
|
ECVF_Default);
|
|
}
|
|
|
|
namespace mu
|
|
{
|
|
|
|
|
|
namespace OpImageMipmapInternal
|
|
{
|
|
|
|
template<int32 PIXEL_SIZE>
|
|
inline void GenerateNextMipmapUint8Unfiltered(const uint8* SourceData, uint8* DestData, FIntVector2 SourceSize)
|
|
{
|
|
FIntVector2 DestSize = FIntVector2(
|
|
FMath::DivideAndRoundUp(SourceSize.X, 2),
|
|
FMath::DivideAndRoundUp(SourceSize.Y, 2));
|
|
|
|
for (int32 Y = 0; Y < DestSize.Y; ++Y)
|
|
{
|
|
for (int32 X = 0; X < DestSize.X; ++X)
|
|
{
|
|
uint8* DestPixel = DestData + (Y*DestSize.X + X) * PIXEL_SIZE;
|
|
const uint8* SourcePixel = SourceData + ((Y << 1) * SourceSize.X + (X << 1)) * PIXEL_SIZE;
|
|
|
|
FMemory::Memcpy(DestPixel, SourcePixel, PIXEL_SIZE);
|
|
}
|
|
}
|
|
}
|
|
|
|
template<int32 PIXEL_SIZE>
|
|
UE_NO_PROFILE_SVE_BUG
|
|
inline void GenerateNextMipmapUint8SimpleAverage(const uint8* SourceData, uint8* DestData, FIntVector2 SourceSize)
|
|
{
|
|
check(SourceSize[0] > 1 || SourceSize[1] > 1);
|
|
|
|
FIntVector2 DestSize = FIntVector2(
|
|
FMath::DivideAndRoundUp(SourceSize.X, 2),
|
|
FMath::DivideAndRoundUp(SourceSize.Y, 2));
|
|
|
|
int32 FullColumns = SourceSize.X / 2;
|
|
int32 FullRows = SourceSize.Y / 2;
|
|
bool bStrayColumn = (SourceSize.X % 2) != 0;
|
|
bool bStrayRow = (SourceSize.Y % 2) != 0;
|
|
|
|
int32 SourceStride = SourceSize.X * PIXEL_SIZE;
|
|
int32 DestStride = DestSize.X * PIXEL_SIZE;
|
|
|
|
const auto ProcessRow = [
|
|
DestData, SourceData, FullColumns, bStrayColumn, SourceStride, DestStride
|
|
] (uint32 Y)
|
|
{
|
|
const uint8* SourceRow0 = SourceData + 2*Y*SourceStride;
|
|
const uint8* SourceRow1 = SourceRow0 + SourceStride;
|
|
uint8* DestRow = DestData + Y * DestStride;
|
|
|
|
for (int32 X = 0; X < FullColumns; ++X)
|
|
{
|
|
if constexpr (PIXEL_SIZE == 4)
|
|
{
|
|
// Use memcpy to avoid any possible but improbable UB. memcpy should be optimized away by the compiler.
|
|
uint64 Row0Bits;
|
|
uint64 Row1Bits;
|
|
|
|
FMemory::Memcpy(&Row0Bits, SourceRow0, sizeof(uint64));
|
|
FMemory::Memcpy(&Row1Bits, SourceRow1, sizeof(uint64));
|
|
|
|
const uint64 XorRow0Row1Bits = Row0Bits ^ Row1Bits;
|
|
|
|
// Average of 2 unsigned integers without overflow extended to work on multiple bytes.
|
|
constexpr uint64 ShiftMask = 0xFEFEFEFEFEFEFEFE;
|
|
const uint64 ErrorCorrection = XorRow0Row1Bits & 0x0101010101010101;
|
|
const uint64 AvgLowBits = (Row0Bits & Row1Bits) + ((XorRow0Row1Bits & ShiftMask) >> 1) + ErrorCorrection;
|
|
const uint64 AvgHighBits = AvgLowBits >> 32;
|
|
const uint32 Result = (AvgLowBits & AvgHighBits) + (((AvgLowBits ^ AvgHighBits) & ShiftMask) >> 1);
|
|
|
|
FMemory::Memcpy(DestRow, &Result, sizeof(uint32));
|
|
}
|
|
else
|
|
{
|
|
for (int32 C = 0; C < PIXEL_SIZE; ++C)
|
|
{
|
|
int32 PixelSum = SourceRow0[C] + SourceRow0[PIXEL_SIZE + C] + SourceRow1[C] + SourceRow1[PIXEL_SIZE + C];
|
|
DestRow[C] = (uint8)(PixelSum >> 2);
|
|
}
|
|
}
|
|
|
|
SourceRow0 += 2*PIXEL_SIZE;
|
|
SourceRow1 += 2*PIXEL_SIZE;
|
|
DestRow += PIXEL_SIZE;
|
|
}
|
|
|
|
if (bStrayColumn)
|
|
{
|
|
if constexpr (PIXEL_SIZE == 4)
|
|
{
|
|
uint32 Row0Bits;
|
|
uint32 Row1Bits;
|
|
|
|
FMemory::Memcpy(&Row0Bits, SourceRow0, sizeof(uint32));
|
|
FMemory::Memcpy(&Row1Bits, SourceRow1, sizeof(uint32));
|
|
|
|
// Average of 2 unsigned integers without overflow extended to work on multiple bytes.
|
|
constexpr uint32 ShiftMask = 0xFEFEFEFE;
|
|
const uint32 Result = (Row0Bits & Row1Bits) + (((Row0Bits ^ Row1Bits) & ShiftMask) >> 1);
|
|
|
|
FMemory::Memcpy(DestRow, &Result, sizeof(uint32));
|
|
}
|
|
else
|
|
{
|
|
for (int32 C = 0; C < PIXEL_SIZE; ++C)
|
|
{
|
|
int32 PixelSum = SourceRow0[C] + SourceRow1[C];
|
|
DestRow[C] = (uint8)(PixelSum >> 1);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
constexpr int32 PixelConcurrencyThreshold = 0xffff;
|
|
if (DestSize[0] * DestSize[1] < PixelConcurrencyThreshold)
|
|
{
|
|
for (int32 Y = 0; Y < FullRows; ++Y)
|
|
{
|
|
ProcessRow(Y);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ParallelFor(FullRows, ProcessRow);
|
|
}
|
|
|
|
if (bStrayRow)
|
|
{
|
|
const uint8* SourceRow0 = SourceData + 2 * FullRows * SourceStride;
|
|
const uint8* SourceRow1 = SourceRow0 + SourceStride;
|
|
uint8* DestRow = DestData + FullRows * DestStride;
|
|
|
|
for (int32 X = 0; X < FullColumns; ++X)
|
|
{
|
|
if constexpr (PIXEL_SIZE == 4)
|
|
{
|
|
uint32 Col0Bits;
|
|
uint32 Col1Bits;
|
|
|
|
FMemory::Memcpy(&Col0Bits, SourceRow0, sizeof(uint32));
|
|
FMemory::Memcpy(&Col1Bits, SourceRow0 + 4, sizeof(uint32));
|
|
|
|
// Average of 2 unsigned integers without overflow extended to work on multiple bytes.
|
|
// In this case we use the ceil variant to be consistent with the method used for 4 pixel average.
|
|
constexpr uint32 ShiftMask = 0xFEFEFEFE;
|
|
const uint32 Result = (Col0Bits & Col1Bits) + (((Col0Bits ^ Col1Bits) & ShiftMask) >> 1);
|
|
|
|
FMemory::Memcpy(DestRow, &Result, sizeof(uint32));
|
|
}
|
|
else
|
|
{
|
|
for (int32 C = 0; C < PIXEL_SIZE; ++C)
|
|
{
|
|
int32 P = SourceRow0[C] + SourceRow0[PIXEL_SIZE + C];
|
|
DestRow[C] = (uint8)(P >> 1);
|
|
}
|
|
}
|
|
|
|
SourceRow0 += 2*PIXEL_SIZE;
|
|
DestRow += PIXEL_SIZE;
|
|
}
|
|
|
|
if (bStrayColumn)
|
|
{
|
|
if constexpr (PIXEL_SIZE == 4)
|
|
{
|
|
FMemory::Memcpy(DestRow, SourceRow0, 4);
|
|
}
|
|
else
|
|
{
|
|
for (int32 C = 0; C < PIXEL_SIZE; ++C)
|
|
{
|
|
DestRow[C] = SourceRow0[C];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
// Generate next mip decompressed form block compressed image.
|
|
template<int32 NumChannels, EMipmapFilterType Filter>
|
|
inline void GenerateNextMipBlockCompressed(
|
|
const uint8* Src, uint8* Dest, FIntVector2 SrcSize, EImageFormat SrcFormat, EImageFormat DestFormat)
|
|
{
|
|
MUTABLE_CPUPROFILER_SCOPE(GenerateNextMipBlockCompressed);
|
|
|
|
const FImageFormatData& DestFormatData = GetImageFormatData(DestFormat);
|
|
const FImageFormatData& SrcFormatData = GetImageFormatData(SrcFormat);
|
|
|
|
check(NumChannels == DestFormatData.Channels);
|
|
check(DestFormatData.PixelsPerBlockX == 1 && DestFormatData.PixelsPerBlockY == 1);
|
|
|
|
const int32 DestChannelCount = DestFormatData.Channels;
|
|
const FIntVector2 PixelsPerBlock = FIntVector2(SrcFormatData.PixelsPerBlockX, SrcFormatData.PixelsPerBlockY);
|
|
const int32 BlockSizeInBytes = SrcFormatData.BytesPerBlock;
|
|
|
|
const FIntVector2 DestSize = FIntVector2(
|
|
FMath::DivideAndRoundUp(SrcSize.X, 2),
|
|
FMath::DivideAndRoundUp(SrcSize.Y, 2));
|
|
|
|
const FIntVector2 NumBlocks = FIntVector2(
|
|
FMath::DivideAndRoundUp(SrcSize.X, PixelsPerBlock.X),
|
|
FMath::DivideAndRoundUp(SrcSize.Y, PixelsPerBlock.Y));
|
|
|
|
constexpr int32 BatchSizeInBlocksX = 1 << 5;
|
|
constexpr int32 BatchSizeInBlocksY = 1 << 4;
|
|
|
|
FIntVector2 NumBatches = FIntVector2(
|
|
FMath::DivideAndRoundUp(NumBlocks.X, BatchSizeInBlocksX),
|
|
FMath::DivideAndRoundUp(NumBlocks.Y, BatchSizeInBlocksY));
|
|
|
|
|
|
// Limit the parallel job num based on actual num workers. Here we cannot rely on ParallelFor
|
|
// balancing the load as we need to allocate memory for every job. Make sure there is always 1 job.
|
|
// TODO: Consider balancing work on using a 2D grid.
|
|
const int32 MaxParallelJobs = FMath::Max(1, FMath::Min(int32(LowLevelTasks::FScheduler::Get().GetNumWorkers()), 8));
|
|
|
|
constexpr int32 MinRowBatchesPerJob = 1;
|
|
|
|
const int32 NumRowBatchesPerJob =
|
|
FMath::Min(NumBatches.Y, FMath::Max(MinRowBatchesPerJob, FMath::DivideAndRoundUp(NumBatches.Y, MaxParallelJobs)));
|
|
|
|
const int32 NumParallelJobs = FMath::DivideAndRoundUp(NumBatches.Y, NumRowBatchesPerJob);
|
|
|
|
// Use the tracking allocator policy on the image counter, this will not count for preventing memory peaks
|
|
// but will show if it happens. This allocation should be small enough so it is not a problem to get over-budget
|
|
// by this amount.
|
|
TArray<uint8, FDefaultMemoryTrackingAllocator<MemoryCounters::FImageMemoryCounter>> StagingMemory;
|
|
|
|
const miro::FImageSize StagingSize = miro::FImageSize(
|
|
uint16(BatchSizeInBlocksX*PixelsPerBlock.X),
|
|
uint16(BatchSizeInBlocksY*PixelsPerBlock.Y));
|
|
|
|
// Allocate extra memory so the mip computation can work on all possible pixels sizes.
|
|
// Also add some extra padding so different threads do not share cache lines.
|
|
const int32 PerJobStagingBytes = StagingSize.X*StagingSize.Y*NumChannels + 8 + 64;
|
|
|
|
StagingMemory.SetNum(PerJobStagingBytes*NumParallelJobs);
|
|
uint8 * const StagingMemoryData = StagingMemory.GetData();
|
|
|
|
miro::SubImageDecompression::FuncRefType DecompressionFunc = SelectDecompressionFunction(DestFormat, SrcFormat);
|
|
|
|
auto ProcessJob =
|
|
[
|
|
NumParallelJobs, NumRowBatchesPerJob,
|
|
StagingMemoryData, PerJobStagingBytes,
|
|
NumBatches, NumBlocks, PixelsPerBlock, BlockSizeInBytes, DecompressionFunc,
|
|
Src, SrcSize, Dest, DestSize
|
|
](int32 JobId)
|
|
{
|
|
const int32 JobRowBegin = JobId*NumRowBatchesPerJob;
|
|
const int32 JobRowEnd = FMath::Min(JobRowBegin + NumRowBatchesPerJob, NumBatches.Y);
|
|
uint8 * const JobStagingMemoryData = StagingMemoryData + JobId*PerJobStagingBytes;
|
|
|
|
for (int32 BatchY = JobRowBegin; BatchY < JobRowEnd; ++BatchY)
|
|
{
|
|
for (int32 BatchX = 0; BatchX < NumBatches.X; ++BatchX)
|
|
{
|
|
const FIntVector2 BatchBeginInBlocks = FIntVector2(BatchX*BatchSizeInBlocksX, BatchY*BatchSizeInBlocksY);
|
|
const FIntVector2 BatchEndInBlocks = FIntVector2(
|
|
FMath::Min(BatchBeginInBlocks.X + BatchSizeInBlocksX, NumBlocks.X),
|
|
FMath::Min(BatchBeginInBlocks.Y + BatchSizeInBlocksY, NumBlocks.Y));
|
|
|
|
const uint8* const SrcBatchData = Src + (BatchBeginInBlocks.Y * NumBlocks.X + BatchBeginInBlocks.X)*BlockSizeInBytes;
|
|
|
|
// Assume the decompressed size is always multiple of the block size. Trim unused bytes when copying to
|
|
// the final destination.
|
|
const FIntVector2 BatchDecSizeInPixels = FIntVector2(
|
|
(BatchEndInBlocks.X - BatchBeginInBlocks.X)*PixelsPerBlock.X,
|
|
(BatchEndInBlocks.Y - BatchBeginInBlocks.Y)*PixelsPerBlock.Y);
|
|
|
|
const miro::FImageSize FromSize = miro::FImageSize(uint16(SrcSize.X), uint16(SrcSize.Y));
|
|
const miro::FImageSize SubSize = miro::FImageSize(uint16(BatchDecSizeInPixels.X), uint16(BatchDecSizeInPixels.Y));
|
|
DecompressionFunc(FromSize, SubSize, SubSize, SrcBatchData, JobStagingMemoryData);
|
|
|
|
const FIntVector2 BatchOutBeginInPixels = FIntVector2(
|
|
(BatchBeginInBlocks.X*PixelsPerBlock.X) >> 1,
|
|
(BatchBeginInBlocks.Y*PixelsPerBlock.Y) >> 1);
|
|
|
|
const FIntVector2 BatchOutEndInPixels = FIntVector2(
|
|
FMath::Min(BatchOutBeginInPixels.X + ((BatchSizeInBlocksX*PixelsPerBlock.X) >> 1), DestSize.X),
|
|
FMath::Min(BatchOutBeginInPixels.Y + ((BatchSizeInBlocksY*PixelsPerBlock.Y) >> 1), DestSize.Y));
|
|
|
|
// Generate partial next mip to dest.
|
|
// This works for all pixel sizes because we have preallocated more memory than needed.
|
|
for (int32 Y = BatchOutBeginInPixels.Y; Y < BatchOutEndInPixels.Y; ++Y)
|
|
{
|
|
for (int32 X = BatchOutBeginInPixels.X; X < BatchOutEndInPixels.X; ++X)
|
|
{
|
|
uint8* const DestPixel = Dest + (Y*DestSize.X + X) * NumChannels;
|
|
|
|
const FIntVector2 Row0Offset = FIntVector2(
|
|
(X - BatchOutBeginInPixels.X) << 1, (Y - BatchOutBeginInPixels.Y) << 1);
|
|
|
|
uint8 const * const SrcRow0 = JobStagingMemoryData + (Row0Offset.Y*BatchDecSizeInPixels.X + Row0Offset.X) * NumChannels;
|
|
|
|
if constexpr (Filter == EMipmapFilterType::SimpleAverage)
|
|
{
|
|
// Use memcpy to avoid any possible but improbable UB. memcpy should be optimized away by the compiler.
|
|
uint64 Row0Bits;
|
|
FMemory::Memcpy(&Row0Bits, SrcRow0, sizeof(uint64));
|
|
|
|
uint8 const * const SrcRow1 = JobStagingMemoryData +
|
|
(FMath::Min(Row0Offset.Y + 1, BatchDecSizeInPixels.Y - 1)*BatchDecSizeInPixels.X + Row0Offset.X) * NumChannels;
|
|
|
|
uint64 Row1Bits;
|
|
FMemory::Memcpy(&Row1Bits, SrcRow1, sizeof(uint64));
|
|
|
|
const bool bOutOfBounds = Row0Offset.X + 1 >= BatchDecSizeInPixels.X;
|
|
|
|
constexpr uint64 ShiftMask = 0xFEFEFEFEFEFEFEFE;
|
|
|
|
const uint64 XorRow0Row1Bits = Row0Bits ^ Row1Bits;
|
|
const uint64 ErrorCorrection = XorRow0Row1Bits & 0x0101010101010101;
|
|
|
|
// Average of 2 unsigned integers without overflow extended to work on multiple bytes.
|
|
const uint64 AvgLowBits = (Row0Bits & Row1Bits) + ((XorRow0Row1Bits & ShiftMask) >> 1) + ErrorCorrection;
|
|
const uint64 AvgHighBits = bOutOfBounds ? AvgLowBits : (AvgLowBits >> NumChannels*8);
|
|
const uint32 Result = (AvgLowBits & AvgHighBits) + (((AvgLowBits ^ AvgHighBits) & ShiftMask) >> 1);
|
|
|
|
FMemory::Memcpy(DestPixel, &Result, NumChannels);
|
|
}
|
|
else // constexpr Filter == EMipmapFilterType::Unfiltered
|
|
{
|
|
FMemory::Memcpy(DestPixel, SrcRow0, NumChannels);
|
|
}
|
|
static_assert(
|
|
Filter == EMipmapFilterType::SimpleAverage ||
|
|
Filter == EMipmapFilterType::Unfiltered);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
if (NumParallelJobs == 1)
|
|
{
|
|
ProcessJob(0);
|
|
}
|
|
else if (NumParallelJobs > 1)
|
|
{
|
|
ParallelFor(NumParallelJobs, ProcessJob);
|
|
}
|
|
}
|
|
} // namespace OpImageMipmapInternal
|
|
|
|
|
|
/** Generate the mipmaps for byte-based images of whatever number of channels.
|
|
* \param mips number of additional levels to build from the source.
|
|
*/
|
|
template<int32 PIXEL_SIZE>
|
|
inline void GenerateMipmapUint8LODRange(
|
|
int32 SrcLOD, int32 DestLODBegin, int32 DestLODEnd,
|
|
const FImage* SourceImage, FImage* DestImage,
|
|
const FMipmapGenerationSettings& Settings)
|
|
{
|
|
using namespace OpImageMipmapInternal;
|
|
|
|
FIntVector2 SourceSize = SourceImage->CalculateMipSize(SrcLOD);
|
|
|
|
check(Invoke([&]() -> bool
|
|
{
|
|
FIntVector2 DestBeginLODSize = DestImage->CalculateMipSize(DestLODBegin);
|
|
|
|
FIntVector2 SrcNextLODSize = FIntVector2(
|
|
FMath::DivideAndRoundUp(SourceSize.X, 2),
|
|
FMath::DivideAndRoundUp(SourceSize.Y, 2));
|
|
|
|
return DestBeginLODSize == SrcNextLODSize;
|
|
}));
|
|
|
|
switch (Settings.FilterType)
|
|
{
|
|
case EMipmapFilterType::SimpleAverage:
|
|
{
|
|
const uint8* SrcData = SourceImage->GetLODData(SrcLOD);
|
|
|
|
for (int32 L = DestLODBegin; L < DestLODEnd; ++L)
|
|
{
|
|
uint8* DestData = DestImage->GetLODData(L);
|
|
GenerateNextMipmapUint8SimpleAverage<PIXEL_SIZE>(SrcData, DestData, SourceSize);
|
|
|
|
SrcData = DestData;
|
|
SourceSize = FIntVector2(
|
|
FMath::DivideAndRoundUp(SourceSize.X, 2),
|
|
FMath::DivideAndRoundUp(SourceSize.Y, 2));
|
|
}
|
|
break;
|
|
}
|
|
case EMipmapFilterType::Unfiltered:
|
|
{
|
|
const uint8* SrcData = SourceImage->GetLODData(SrcLOD);
|
|
|
|
for (int32 L = DestLODBegin; L < DestLODEnd; ++L)
|
|
{
|
|
uint8* DestData = DestImage->GetLODData(L);
|
|
GenerateNextMipmapUint8Unfiltered<PIXEL_SIZE>(SrcData, DestData, SourceSize);
|
|
|
|
SrcData = DestData;
|
|
SourceSize = FIntVector2(
|
|
FMath::DivideAndRoundUp(SourceSize.X, 2),
|
|
FMath::DivideAndRoundUp(SourceSize.Y, 2));
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
check(false);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Generate the mipmaps for Block Comporessed images of whatever number of channels.
|
|
* The result is a non compressed image of the next mip with its tail.
|
|
* \param mips number of additional levels to build from the source.
|
|
*/
|
|
template<int32 PixelSize>
|
|
inline void GenerateMipmapsBlockCompressedLODRange(
|
|
int32 SrcLOD, int32 DestLODBegin, int32 DestLODEnd,
|
|
const FImage* SourceImage, FImage* DestImage,
|
|
const FMipmapGenerationSettings& Settings)
|
|
{
|
|
using namespace OpImageMipmapInternal;
|
|
|
|
const FIntVector2 SourceSize = SourceImage->CalculateMipSize(SrcLOD);
|
|
|
|
check(DestImage->GetLODCount() >= DestLODEnd);
|
|
check(Invoke([&]() -> bool
|
|
{
|
|
FIntVector2 DestBeginLODSize = DestImage->CalculateMipSize(DestLODBegin);
|
|
|
|
FIntVector2 SrcNextLODSize = FIntVector2(
|
|
FMath::DivideAndRoundUp(SourceSize.X, 2),
|
|
FMath::DivideAndRoundUp(SourceSize.Y, 2));
|
|
|
|
return DestBeginLODSize == SrcNextLODSize;
|
|
}));
|
|
|
|
const EImageFormat SrcFormat = SourceImage->GetFormat();
|
|
const EImageFormat DestFormat = DestImage->GetFormat();
|
|
|
|
switch (Settings.FilterType)
|
|
{
|
|
case EMipmapFilterType::SimpleAverage:
|
|
{
|
|
const uint8* SrcData = SourceImage->GetLODData(SrcLOD);
|
|
uint8* DestData = DestImage->GetLODData(DestLODBegin);
|
|
|
|
GenerateNextMipBlockCompressed<PixelSize, EMipmapFilterType::SimpleAverage>(
|
|
SrcData, DestData, SourceSize, SrcFormat, DestFormat);
|
|
|
|
SrcData = DestData;
|
|
FIntVector2 CurrentMipSize = FIntVector2(
|
|
FMath::DivideAndRoundUp(SourceSize[0], 2),
|
|
FMath::DivideAndRoundUp(SourceSize[1], 2));
|
|
|
|
if (CurrentMipSize.X > 1 || CurrentMipSize.Y > 1)
|
|
{
|
|
for (int32 L = DestLODBegin + 1; L < DestLODEnd; ++L)
|
|
{
|
|
DestData = DestImage->GetLODData(L);
|
|
|
|
GenerateNextMipmapUint8SimpleAverage<PixelSize>(SrcData, DestData, CurrentMipSize);
|
|
|
|
SrcData = DestData;
|
|
CurrentMipSize = FIntVector2(
|
|
FMath::DivideAndRoundUp(CurrentMipSize.X, 2),
|
|
FMath::DivideAndRoundUp(CurrentMipSize.Y, 2));
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
case EMipmapFilterType::Unfiltered:
|
|
{
|
|
const uint8* SrcData = SourceImage->GetLODData(SrcLOD);
|
|
uint8* DestData = DestImage->GetLODData(DestLODBegin);
|
|
|
|
GenerateNextMipBlockCompressed<PixelSize, EMipmapFilterType::Unfiltered>(
|
|
SrcData, DestData, SourceSize, SrcFormat, DestFormat);
|
|
|
|
SrcData = DestData;
|
|
FIntVector2 CurrentMipSize = FIntVector2(
|
|
FMath::DivideAndRoundUp(SourceSize.X, 2),
|
|
FMath::DivideAndRoundUp(SourceSize.Y, 2));
|
|
|
|
if (CurrentMipSize.X > 1 || CurrentMipSize.Y > 1)
|
|
{
|
|
for (int32 L = DestLODBegin + 1; L < DestLODEnd; ++L)
|
|
{
|
|
DestData = DestImage->GetLODData(L);
|
|
|
|
GenerateNextMipmapUint8Unfiltered<PixelSize>(SrcData, DestData, CurrentMipSize);
|
|
|
|
SrcData = DestData;
|
|
CurrentMipSize = FIntVector2(
|
|
FMath::DivideAndRoundUp(CurrentMipSize.X, 2),
|
|
FMath::DivideAndRoundUp(CurrentMipSize.Y, 2));
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
check(false);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void FImageOperator::ImageMipmap_PrepareScratch(const FImage* DestImage, int32 StartLevel, int32 LevelCount, FScratchImageMipmap& Scratch )
|
|
{
|
|
check(DestImage->GetLODCount() == LevelCount);
|
|
|
|
EImageFormat DestFormat = DestImage->GetFormat();
|
|
if (mu::IsCompressedFormat(DestFormat))
|
|
{
|
|
// Is it a block format?
|
|
if (mu::GetImageFormatData(DestFormat).PixelsPerBlockX > 1)
|
|
{
|
|
if (!bEnableCompressedMipGenerationMemoryOptimizations)
|
|
{
|
|
// Uncompress the last mip that we already have
|
|
FIntVector2 UncompressedSize = DestImage->CalculateMipSize(StartLevel);
|
|
Scratch.Uncompressed = CreateImage(
|
|
(uint16)UncompressedSize[0], (uint16)UncompressedSize[1],
|
|
1,
|
|
EImageFormat::RGBA_UByte, EInitializationType::NotInitialized);
|
|
}
|
|
|
|
FIntVector2 UncompressedMipsSize = DestImage->CalculateMipSize(StartLevel + 1);
|
|
// Generate the mipmaps from there on
|
|
Scratch.UncompressedMips = CreateImage(
|
|
(uint16)UncompressedMipsSize[0], (uint16)UncompressedMipsSize[1],
|
|
FMath::Max(1, LevelCount - StartLevel - 1),
|
|
EImageFormat::RGBA_UByte, EInitializationType::NotInitialized);
|
|
|
|
// Compress the mipmapped image
|
|
// Scratch.CompressedMips = CreateImage(
|
|
// (uint16)UncompressedMipsSize[0], (uint16)UncompressedMipsSize[1],
|
|
// Scratch.UncompressedMips->GetLODCount(),
|
|
// DestImage->GetFormat(), EInitializationType::NotInitialized);
|
|
}
|
|
else
|
|
{
|
|
// It's probably an RLE compressed format
|
|
|
|
// Uncompress the last mip that we already have
|
|
FIntVector2 UncompressedSize = DestImage->CalculateMipSize(StartLevel);
|
|
Scratch.Uncompressed = CreateImage(
|
|
(uint16)UncompressedSize[0], (uint16)UncompressedSize[1],
|
|
1,
|
|
EImageFormat::L_UByte, EInitializationType::NotInitialized);
|
|
|
|
|
|
FIntVector2 UncompressedMipsSize = DestImage->CalculateMipSize(StartLevel + 1);
|
|
// Generate the mipmaps from there on
|
|
Scratch.UncompressedMips = CreateImage(
|
|
(uint16)UncompressedMipsSize[0], (uint16)UncompressedMipsSize[1],
|
|
FMath::Max(1, LevelCount - StartLevel - 1),
|
|
EImageFormat::L_UByte, EInitializationType::NotInitialized);
|
|
|
|
|
|
// Compress the mipmapped image
|
|
// Scratch.CompressedMips = CreateImage(
|
|
// (uint16)UncompressedMipsSize[0], (uint16)UncompressedMipsSize[1],
|
|
// Scratch.UncompressedMips->GetLODCount(),
|
|
// DestImage->GetFormat(), EInitializationType::NotInitialized);
|
|
|
|
// Preallocate ample memory for the compressed data
|
|
int32 UncompressedNumMips = Scratch.UncompressedMips->GetLODCount();
|
|
for (int32 L = 0; L < UncompressedNumMips; ++L)
|
|
{
|
|
// It could happen with missing data.
|
|
if (!Scratch.CompressedMips)
|
|
{
|
|
break;
|
|
}
|
|
|
|
const FIntVector2 LODSize = Scratch.Uncompressed->CalculateMipSize(L);
|
|
Scratch.CompressedMips->DataStorage.ResizeLOD(L, LODSize.X*LODSize.Y);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void FImageOperator::ImageMipmap_ReleaseScratch(FScratchImageMipmap& Scratch)
|
|
{
|
|
ReleaseImage(Scratch.Uncompressed);
|
|
ReleaseImage(Scratch.UncompressedMips);
|
|
ReleaseImage(Scratch.CompressedMips);
|
|
}
|
|
|
|
void FImageOperator::ImageMipmap(FScratchImageMipmap& Scratch, int32 CompressionQuality, FImage* DestImage, const FImage* BaseImage,
|
|
int32 StartLevel, int32 NumLODs, const FMipmapGenerationSettings& Settings, bool bGenerateOnlyTail)
|
|
{
|
|
check(!(BaseImage->Flags & FImage::IF_CANNOT_BE_SCALED));
|
|
check(DestImage->GetFormat() == BaseImage->GetFormat());
|
|
|
|
if (!bGenerateOnlyTail)
|
|
{
|
|
check(DestImage->GetLODCount() == NumLODs);
|
|
check(DestImage->GetSizeX() == BaseImage->GetSizeX());
|
|
check(DestImage->GetSizeY() == BaseImage->GetSizeY());
|
|
}
|
|
else
|
|
{
|
|
check(DestImage->GetLODCount() + BaseImage->GetLODCount() == NumLODs);
|
|
|
|
checkCode
|
|
(
|
|
const FIntVector2 BaseNextMipSize = BaseImage->CalculateMipSize(StartLevel + 1);
|
|
check(BaseNextMipSize.X == DestImage->GetSizeX() && BaseNextMipSize.Y == DestImage->GetSizeY());
|
|
);
|
|
}
|
|
|
|
if (!bGenerateOnlyTail && DestImage != BaseImage)
|
|
{
|
|
for (int32 L = 0; L <= StartLevel; ++L)
|
|
{
|
|
TArrayView<uint8> DestView = DestImage->DataStorage.GetLOD(L);
|
|
TArrayView<const uint8> SrcView = BaseImage->DataStorage.GetLOD(L);
|
|
|
|
check(DestView.Num() == SrcView.Num());
|
|
FMemory::Memcpy(DestView.GetData(), SrcView.GetData(), DestView.Num());
|
|
}
|
|
}
|
|
|
|
EImageFormat BaseFormat = BaseImage->GetFormat();
|
|
const bool bIsBlockCompressedFormat = mu::IsBlockCompressedFormat(BaseFormat);
|
|
const bool bIsCompressedFormat = mu::IsCompressedFormat(BaseFormat);
|
|
|
|
check(!bIsBlockCompressedFormat || bIsCompressedFormat);
|
|
|
|
if (bIsBlockCompressedFormat && bEnableCompressedMipGenerationMemoryOptimizations)
|
|
{
|
|
const EImageFormat DestFormat = Scratch.UncompressedMips->GetFormat();
|
|
FImage* UncompressedImage = Scratch.UncompressedMips.Get();
|
|
|
|
const int32 UncompressedNumLODs = UncompressedImage->GetLODCount();
|
|
|
|
switch (DestFormat)
|
|
{
|
|
case EImageFormat::L_UByte:
|
|
{
|
|
constexpr int32 PixelSize = 1;
|
|
GenerateMipmapsBlockCompressedLODRange<PixelSize>(
|
|
StartLevel, 0, UncompressedNumLODs, BaseImage, UncompressedImage, Settings);
|
|
break;
|
|
}
|
|
case EImageFormat::RGB_UByte:
|
|
{
|
|
constexpr int32 PixelSize = 3;
|
|
GenerateMipmapsBlockCompressedLODRange<PixelSize>(
|
|
StartLevel, 0, UncompressedNumLODs, BaseImage, UncompressedImage, Settings);
|
|
break;
|
|
}
|
|
case EImageFormat::RGBA_UByte:
|
|
{
|
|
constexpr int32 PixelSize = 4;
|
|
GenerateMipmapsBlockCompressedLODRange<PixelSize>(
|
|
StartLevel, 0, UncompressedNumLODs, BaseImage, UncompressedImage, Settings);
|
|
break;
|
|
}
|
|
default: check(false);
|
|
}
|
|
|
|
int32 DestLODBegin = bGenerateOnlyTail ? 0 : StartLevel + 1;
|
|
|
|
bool bSuccess = false;
|
|
ImagePixelFormat(
|
|
bSuccess, CompressionQuality, DestImage, UncompressedImage, DestLODBegin, 0, UncompressedNumLODs);
|
|
check(bSuccess);
|
|
}
|
|
else if (bIsCompressedFormat)
|
|
{
|
|
const int32 DestLODBegin = bGenerateOnlyTail ? 0 : StartLevel + 1;
|
|
const int32 DestLODEnd = DestImage->GetLODCount();
|
|
|
|
// Bad case.
|
|
// Uncompress the last mip that we already have
|
|
bool bSuccess = false;
|
|
ImagePixelFormat(bSuccess, CompressionQuality, Scratch.Uncompressed.Get(), BaseImage, StartLevel);
|
|
check(bSuccess);
|
|
|
|
// Generate the mipmaps from there on
|
|
constexpr bool bGenerateOnlyTailForCompressed = true;
|
|
|
|
const int32 NumScratchMips = Scratch.UncompressedMips->GetLODCount() + Scratch.Uncompressed->GetLODCount();
|
|
ImageMipmap(Scratch, CompressionQuality, Scratch.UncompressedMips.Get(),
|
|
Scratch.Uncompressed.Get(), 0, NumScratchMips, Settings, bGenerateOnlyTailForCompressed);
|
|
|
|
// Compress the mipmapped image
|
|
bSuccess = false;
|
|
ImagePixelFormat(
|
|
bSuccess, CompressionQuality, DestImage, Scratch.UncompressedMips.Get(),
|
|
StartLevel + 1, 0, Scratch.UncompressedMips->GetLODCount());
|
|
check(bSuccess);
|
|
}
|
|
else
|
|
{
|
|
const int32 DestLODBegin = bGenerateOnlyTail ? 0 : StartLevel + 1;
|
|
const int32 DestLODEnd = DestImage->GetLODCount();
|
|
|
|
switch (BaseImage->GetFormat())
|
|
{
|
|
case EImageFormat::L_UByte:
|
|
{
|
|
GenerateMipmapUint8LODRange<1>(StartLevel, DestLODBegin, DestLODEnd, BaseImage, DestImage, Settings);
|
|
break;
|
|
}
|
|
case EImageFormat::RGB_UByte:
|
|
{
|
|
GenerateMipmapUint8LODRange<3>(StartLevel, DestLODBegin, DestLODEnd, BaseImage, DestImage, Settings);
|
|
break;
|
|
}
|
|
case EImageFormat::BGRA_UByte:
|
|
case EImageFormat::RGBA_UByte:
|
|
{
|
|
GenerateMipmapUint8LODRange<4>(StartLevel, DestLODBegin, DestLODEnd, BaseImage, DestImage, Settings);
|
|
break;
|
|
}
|
|
default:
|
|
checkf(false, TEXT("Format not implemented in mipmap generation."));
|
|
}
|
|
}
|
|
}
|
|
|
|
void FImageOperator::ImageMipmap(int32 CompressionQuality, FImage* Dest, const FImage* Base,
|
|
int32 StartLevel, int32 LevelCount, const FMipmapGenerationSettings& Settings, bool bGenerateOnlyTail)
|
|
{
|
|
FScratchImageMipmap Scratch;
|
|
|
|
ImageMipmap_PrepareScratch(Dest, StartLevel, LevelCount, Scratch);
|
|
ImageMipmap(Scratch, CompressionQuality, Dest, Base, StartLevel, LevelCount, Settings, bGenerateOnlyTail);
|
|
ImageMipmap_ReleaseScratch(Scratch);
|
|
}
|
|
|
|
/**
|
|
* Update all the mipmaps in the image from the data in the base one.
|
|
* Only the mipmaps already existing in the image are updated.
|
|
*/
|
|
void ImageMipmapInPlace(int32 InImageCompressionQuality, FImage* InBase, const FMipmapGenerationSettings& InSettings)
|
|
{
|
|
check(!(InBase->Flags & FImage::IF_CANNOT_BE_SCALED));
|
|
|
|
int32 LevelCount = InBase->GetLODCount();
|
|
|
|
if (LevelCount - 1 <= 0)
|
|
{
|
|
return;
|
|
}
|
|
|
|
switch (InBase->GetFormat())
|
|
{
|
|
case EImageFormat::L_UByte:
|
|
GenerateMipmapUint8LODRange<1>(0, 1, LevelCount, InBase, InBase, InSettings);
|
|
break;
|
|
|
|
case EImageFormat::RGB_UByte:
|
|
GenerateMipmapUint8LODRange<3>(0, 1, LevelCount, InBase, InBase, InSettings);
|
|
break;
|
|
|
|
case EImageFormat::BGRA_UByte:
|
|
case EImageFormat::RGBA_UByte:
|
|
GenerateMipmapUint8LODRange<4>(0, 1, LevelCount, InBase, InBase, InSettings);
|
|
break;
|
|
|
|
default:
|
|
checkf(false, TEXT("Format not implemented in mipmap generation."));
|
|
}
|
|
}
|
|
|
|
}
|