// Copyright Epic Games, Inc. All Rights Reserved. #include "MuR/OpImageLayer.h" #include "MuR/ImagePrivate.h" #include "MuR/ImageRLE.h" #include "MuR/ParallelExecutionUtils.h" #include "Templates/UnrealTemplate.h" #include "MuR/OpImageBlend.h" #include "MuR/MutableRuntimeModule.h" namespace mu { namespace OpImageLayerInternal { struct FOpLayerBatchArgs { int32 BatchNumElems = 0; int32 LODBegin = 0; int32 LODEnd = 0; int32 FirstLODOffset = 0; FImage* Result = nullptr; const FImage* Base = nullptr; const FImage* Blend = nullptr; const FImage* Mask = nullptr; uint32 ResultBytesPerElem = 0; uint32 BaseBytesPerElem = 0; uint32 BlendBytesPerElem = 0; uint32 MaskBytesPerElem = 0; }; struct FOpLayerBatchViews { int32 NumElems = 0; TArrayView Result; TArrayView Base; TArrayView Blend; TArrayView Mask; }; FORCENOINLINE int32 GetOpLayerNumBatches(const FOpLayerBatchArgs& Args) { check(Args.Result); const int32 NumBatches = Args.Result->DataStorage.GetNumBatches(Args.BatchNumElems, Args.ResultBytesPerElem); #if DO_CHECK if (Args.Base) { check(NumBatches == Args.Base->DataStorage.GetNumBatches(Args.BatchNumElems, Args.BaseBytesPerElem)); } if (Args.Blend) { check(NumBatches == Args.Blend->DataStorage.GetNumBatches(Args.BatchNumElems, Args.BlendBytesPerElem)); } if (Args.Mask) { check(NumBatches == Args.Mask->DataStorage.GetNumBatches(Args.BatchNumElems, Args.MaskBytesPerElem)); } #endif return NumBatches; } FORCENOINLINE int32 GetOpLayerNumBatchesLODRange(const FOpLayerBatchArgs& Args) { check(Args.Result); const int32 NumBatches = Args.Result->DataStorage.GetNumBatchesLODRange(Args.BatchNumElems, Args.ResultBytesPerElem, Args.LODBegin, Args.LODEnd); #if DO_CHECK if (Args.Base) { check(NumBatches == Args.Base->DataStorage.GetNumBatchesLODRange(Args.BatchNumElems, Args.BaseBytesPerElem, Args.LODBegin, Args.LODEnd)); } if (Args.Blend) { check(NumBatches == Args.Blend->DataStorage.GetNumBatchesLODRange(Args.BatchNumElems, Args.BlendBytesPerElem, Args.LODBegin, Args.LODEnd)); } if (Args.Mask) { check(NumBatches == Args.Mask->DataStorage.GetNumBatchesLODRange(Args.BatchNumElems, Args.MaskBytesPerElem, Args.LODBegin, Args.LODEnd)); } #endif return NumBatches; } FORCENOINLINE int32 GetOpLayerNumBatchesLODRangeOffsetViews(const FOpLayerBatchArgs& Args) { const bool bOnlyFirstLOD = Args.FirstLODOffset >= 0; check(Args.Result); const int32 NumBatches = bOnlyFirstLOD ? Args.Result->DataStorage.GetNumBatchesFirstLODOffset(Args.BatchNumElems, Args.ResultBytesPerElem, Args.FirstLODOffset) : Args.Result->DataStorage.GetNumBatchesLODRange(Args.BatchNumElems, Args.ResultBytesPerElem, Args.LODBegin, Args.LODEnd); #if DO_CHECK if (Args.Base) { const int32 BaseNumBatches = bOnlyFirstLOD ? Args.Base->DataStorage.GetNumBatchesFirstLODOffset(Args.BatchNumElems, Args.BaseBytesPerElem, Args.FirstLODOffset) : Args.Base->DataStorage.GetNumBatchesLODRange(Args.BatchNumElems, Args.BaseBytesPerElem, Args.LODBegin, Args.LODEnd); check(NumBatches == BaseNumBatches); } if (Args.Blend) { const int32 BlendNumBatches = bOnlyFirstLOD ? Args.Blend->DataStorage.GetNumBatchesFirstLODOffset(Args.BatchNumElems, Args.BlendBytesPerElem, Args.FirstLODOffset) : Args.Blend->DataStorage.GetNumBatchesLODRange(Args.BatchNumElems, Args.BlendBytesPerElem, Args.LODBegin, Args.LODEnd); check(NumBatches == BlendNumBatches); } if (Args.Mask) { const int32 MaskNumBatches = bOnlyFirstLOD ? Args.Mask->DataStorage.GetNumBatchesFirstLODOffset(Args.BatchNumElems, Args.MaskBytesPerElem, Args.FirstLODOffset) : Args.Mask->DataStorage.GetNumBatchesLODRange(Args.BatchNumElems, Args.MaskBytesPerElem, Args.LODBegin, Args.LODEnd); check(NumBatches == MaskNumBatches); } #endif return NumBatches; } FORCENOINLINE FOpLayerBatchViews GetOpLayerBatchViews(int32 BatchId, const FOpLayerBatchArgs& Args) { FOpLayerBatchViews BatchViewsResult; check(Args.Result); BatchViewsResult.Result = Args.Result->DataStorage.GetBatch(BatchId, Args.BatchNumElems, Args.ResultBytesPerElem); BatchViewsResult.NumElems = BatchViewsResult.Result.Num() / Args.ResultBytesPerElem; if (Args.Base) { BatchViewsResult.Base = Args.Base->DataStorage.GetBatch(BatchId, Args.BatchNumElems, Args.BaseBytesPerElem); check(BatchViewsResult.NumElems == BatchViewsResult.Base.Num() / Args.BaseBytesPerElem); } if (Args.Blend) { BatchViewsResult.Blend = Args.Blend->DataStorage.GetBatch(BatchId, Args.BatchNumElems, Args.BlendBytesPerElem); check(BatchViewsResult.NumElems == BatchViewsResult.Blend.Num() / Args.BlendBytesPerElem); } if (Args.Mask) { BatchViewsResult.Mask = Args.Mask->DataStorage.GetBatch(BatchId, Args.BatchNumElems, Args.MaskBytesPerElem); check(BatchViewsResult.NumElems == BatchViewsResult.Mask.Num() / Args.MaskBytesPerElem); } return BatchViewsResult; } FORCENOINLINE FOpLayerBatchViews GetOpLayerBatchLODRangeViews(int32 BatchId, const FOpLayerBatchArgs& Args) { FOpLayerBatchViews BatchViewsResult; check(Args.Result); BatchViewsResult.Result = Args.Result->DataStorage.GetBatchLODRange(BatchId, Args.BatchNumElems, Args.ResultBytesPerElem, Args.LODBegin, Args.LODEnd); BatchViewsResult.NumElems = BatchViewsResult.Result.Num() / Args.ResultBytesPerElem; if (Args.Base) { BatchViewsResult.Base = Args.Base->DataStorage.GetBatchLODRange(BatchId, Args.BatchNumElems, Args.BaseBytesPerElem, Args.LODBegin, Args.LODEnd); check(BatchViewsResult.NumElems == BatchViewsResult.Base.Num() / Args.BaseBytesPerElem); } if (Args.Blend) { BatchViewsResult.Blend = Args.Blend->DataStorage.GetBatchLODRange(BatchId, Args.BatchNumElems, Args.BlendBytesPerElem, Args.LODBegin, Args.LODEnd); check(BatchViewsResult.NumElems == BatchViewsResult.Blend.Num() / Args.BlendBytesPerElem); } if (Args.Mask) { BatchViewsResult.Mask = Args.Mask->DataStorage.GetBatchLODRange(BatchId, Args.BatchNumElems, Args.MaskBytesPerElem, Args.LODBegin, Args.LODEnd); check(BatchViewsResult.NumElems == BatchViewsResult.Mask.Num() / Args.MaskBytesPerElem); } return BatchViewsResult; } FORCENOINLINE FOpLayerBatchViews GetOpLayerBatchLODRangeOffsetViews(int32 BatchId, const FOpLayerBatchArgs& Args) { FOpLayerBatchViews BatchViewsResult; const bool bOnlyFirstLOD = Args.FirstLODOffset >= 0; check(Args.Result); BatchViewsResult.Result = bOnlyFirstLOD ? Args.Result->DataStorage.GetBatchFirstLODOffset(BatchId, Args.BatchNumElems, Args.ResultBytesPerElem, Args.FirstLODOffset) : Args.Result->DataStorage.GetBatchLODRange(BatchId, Args.BatchNumElems, Args.ResultBytesPerElem, Args.LODBegin, Args.LODEnd); BatchViewsResult.NumElems = BatchViewsResult.Result.Num() / Args.ResultBytesPerElem; if (Args.Base) { BatchViewsResult.Base = bOnlyFirstLOD ? Args.Base->DataStorage.GetBatchFirstLODOffset(BatchId, Args.BatchNumElems, Args.BaseBytesPerElem, Args.FirstLODOffset) : Args.Base->DataStorage.GetBatchLODRange(BatchId, Args.BatchNumElems, Args.BaseBytesPerElem, Args.LODBegin, Args.LODEnd); check(BatchViewsResult.NumElems == BatchViewsResult.Base.Num() / Args.BaseBytesPerElem); } if (Args.Blend) { BatchViewsResult.Blend = bOnlyFirstLOD ? Args.Blend->DataStorage.GetBatchFirstLODOffset(BatchId, Args.BatchNumElems, Args.BlendBytesPerElem, Args.FirstLODOffset) : Args.Blend->DataStorage.GetBatchLODRange(BatchId, Args.BatchNumElems, Args.BlendBytesPerElem, Args.LODBegin, Args.LODEnd); check(BatchViewsResult.NumElems == BatchViewsResult.Blend.Num() / Args.BlendBytesPerElem); } if (Args.Mask) { BatchViewsResult.Mask = bOnlyFirstLOD ? Args.Mask->DataStorage.GetBatchFirstLODOffset(BatchId, Args.BatchNumElems, Args.MaskBytesPerElem, Args.FirstLODOffset) : Args.Mask->DataStorage.GetBatchLODRange(BatchId, Args.BatchNumElems, Args.MaskBytesPerElem, Args.LODBegin, Args.LODEnd); check(BatchViewsResult.NumElems == BatchViewsResult.Mask.Num() / Args.MaskBytesPerElem); } return BatchViewsResult; } FORCEINLINE bool IsAnyComponentLargerThan1(FVector4f Value) { return (Value[0] > 1) | (Value[1] > 1) | (Value[2] > 1) | (Value[3] > 1); } /** * Apply a blending function to an image with a colour source. * It only affects the RGB or L channels, leaving alpha untouched. */ template FORCENOINLINE void BufferLayerColourGenericChannel(uint8* DestBuf, const uint8* BaseBuf, int32 NumElems, const FIntVector& Color) { static_assert(NumChannels > 0 && NumChannels <= 4); for (int32 I = 0; I < NumElems; ++I) { for (uint32 C = 0; C < NumChannels; ++C) { uint32 Base = BaseBuf[NumChannels * I + C]; uint32 Result = BLEND_FUNC(Base, Color[C]); if constexpr (bClamp) { DestBuf[NumChannels * I + C] = (uint8)FMath::Min(255u, Result); } else { DestBuf[NumChannels * I + C] = (uint8)Result; } } } } template FORCENOINLINE void BufferLayerColourFromAlphaGenericChannel(uint8* DestBuf, const uint8* BaseBuf, int32 NumElems, const FIntVector& Color) { static_assert(NumChannels > 0 && NumChannels <= 4); for (int32 I = 0; I < NumElems; ++I) { const uint32 Alpha = Invoke([&]() -> uint32 { if constexpr (NumChannels <= 3) { return 255; } else { return BaseBuf[NumChannels * I + 3]; } }); for (uint32 C = 0; C < NumChannels; ++C) { uint32 Result = BLEND_FUNC(Alpha, Color[C]); if constexpr (Clamp) { DestBuf[NumChannels * I + C] = (uint8)FMath::Min(255u, Result); } else { DestBuf[NumChannels * I + C] = (uint8)Result; } } } } } // namespace OpImageLayerInternal template< uint32 (*BLEND_FUNC)(uint32,uint32), bool CLAMP > FORCENOINLINE void BufferLayerColourImpl(FImage* ResultImage, const FImage* BaseImage, FVector4f Color) { check(ResultImage->GetFormat() == BaseImage->GetFormat()); check(ResultImage->GetSizeX() == BaseImage->GetSizeX()); check(ResultImage->GetSizeY() == BaseImage->GetSizeY()); check(ResultImage->GetLODCount() == BaseImage->GetLODCount()); const EImageFormat BaseFormat = BaseImage->GetFormat(); // Generic implementation const int32 BytesPerElem = GetImageFormatData(BaseFormat).BytesPerBlock; constexpr int32 BatchNumElems = 4096*4; OpImageLayerInternal::FOpLayerBatchArgs BatchArgs; BatchArgs.BatchNumElems = BatchNumElems; BatchArgs.Result = ResultImage; BatchArgs.Base = BaseImage; BatchArgs.BaseBytesPerElem = BytesPerElem; BatchArgs.ResultBytesPerElem = BytesPerElem; const int32 NumBatches = OpImageLayerInternal::GetOpLayerNumBatches(BatchArgs); const FIntVector ColorValue = FIntVector(Color.X * 255.0f, Color.Y * 255.0f, Color.Z * 255.0f); ParallelExecutionUtils::InvokeBatchParallelFor(NumBatches, [ &BatchArgs, ColorValue, BaseFormat ](int32 BatchId) { OpImageLayerInternal::FOpLayerBatchViews BatchViews = OpImageLayerInternal::GetOpLayerBatchViews(BatchId, BatchArgs); switch (BaseFormat) { case EImageFormat::L_UByte: { check(BatchArgs.ResultBytesPerElem == 1); OpImageLayerInternal::BufferLayerColourGenericChannel<1, BLEND_FUNC, CLAMP>( BatchViews.Result.GetData(), BatchViews.Base.GetData(), BatchViews.NumElems, ColorValue); break; } case EImageFormat::RGB_UByte: { check(BatchArgs.ResultBytesPerElem == 3); OpImageLayerInternal::BufferLayerColourGenericChannel<3, BLEND_FUNC, CLAMP>( BatchViews.Result.GetData(), BatchViews.Base.GetData(), BatchViews.NumElems, ColorValue); break; } case EImageFormat::RGBA_UByte: { check(BatchArgs.ResultBytesPerElem == 4); OpImageLayerInternal::BufferLayerColourGenericChannel<4, BLEND_FUNC, CLAMP>( BatchViews.Result.GetData(), BatchViews.Base.GetData(), BatchViews.NumElems, ColorValue); break; } case EImageFormat::BGRA_UByte: { check(BatchArgs.ResultBytesPerElem == 4); FIntVector BGRAColorValue = FIntVector(ColorValue.Z, ColorValue.Y, ColorValue.X); OpImageLayerInternal::BufferLayerColourGenericChannel<4, BLEND_FUNC, CLAMP>( BatchViews.Result.GetData(), BatchViews.Base.GetData(), BatchViews.NumElems, ColorValue); break; } default: { checkf(false, TEXT("Unsupported format.")); break; } } }); } template< uint32 (*BLEND_FUNC)(uint32, uint32) > void BufferLayerColour(FImage* ResultImage, const FImage* BaseImage, FVector4f Color) { bool bIsClampNeeded = OpImageLayerInternal::IsAnyComponentLargerThan1(Color); if (bIsClampNeeded) { BufferLayerColourImpl(ResultImage, BaseImage, Color); } else { BufferLayerColourImpl(ResultImage, BaseImage, Color); } } template< uint32 (*BLEND_FUNC)(uint32, uint32), bool CLAMP> FORCENOINLINE void BufferLayerColourFromAlpha(FImage* ResultImage, const FImage* BaseImage, FVector4f Color) { check(ResultImage->GetFormat() == BaseImage->GetFormat()); check(ResultImage->GetSizeX() == BaseImage->GetSizeX()); check(ResultImage->GetSizeY() == BaseImage->GetSizeY()); check(ResultImage->GetLODCount() == BaseImage->GetLODCount()); const EImageFormat BaseFormat = BaseImage->GetFormat(); // Generic implementation constexpr int32 BatchNumElems = 4096*2; const int32 BytesPerElem = GetImageFormatData(BaseFormat).BytesPerBlock; OpImageLayerInternal::FOpLayerBatchArgs BatchArgs; BatchArgs.BatchNumElems = BatchNumElems; BatchArgs.Result = ResultImage; BatchArgs.Base = BaseImage; BatchArgs.BaseBytesPerElem = BytesPerElem; BatchArgs.ResultBytesPerElem = BytesPerElem; const int32 NumBatches = OpImageLayerInternal::GetOpLayerNumBatches(BatchArgs); const FIntVector ColorValue = FIntVector(Color.X * 255.0f, Color.Y * 255.0f, Color.Z * 255.0f); ParallelExecutionUtils::InvokeBatchParallelFor(NumBatches, [ &BatchArgs, ColorValue, BaseFormat ](int32 BatchId) { OpImageLayerInternal::FOpLayerBatchViews BatchViews = OpImageLayerInternal::GetOpLayerBatchViews(BatchId, BatchArgs); switch (BaseFormat) { case EImageFormat::L_UByte: { check(BatchArgs.ResultBytesPerElem == 1); OpImageLayerInternal::BufferLayerColourFromAlphaGenericChannel<1, BLEND_FUNC, CLAMP>( BatchViews.Result.GetData(), BatchViews.Base.GetData(), BatchViews.NumElems, ColorValue); break; } case EImageFormat::RGB_UByte: { check(BatchArgs.ResultBytesPerElem == 3); OpImageLayerInternal::BufferLayerColourFromAlphaGenericChannel<3, BLEND_FUNC, CLAMP>( BatchViews.Result.GetData(), BatchViews.Base.GetData(), BatchViews.NumElems, ColorValue); break; } case EImageFormat::RGBA_UByte: { check(BatchArgs.ResultBytesPerElem == 4); OpImageLayerInternal::BufferLayerColourFromAlphaGenericChannel<4, BLEND_FUNC, CLAMP>( BatchViews.Result.GetData(), BatchViews.Base.GetData(), BatchViews.NumElems, ColorValue); break; } case EImageFormat::BGRA_UByte: { check(BatchArgs.ResultBytesPerElem == 4); FIntVector BGRAColorValues = FIntVector(ColorValue.Z, ColorValue.Y, ColorValue.X); OpImageLayerInternal::BufferLayerColourFromAlphaGenericChannel<4, BLEND_FUNC, CLAMP>( BatchViews.Result.GetData(), BatchViews.Base.GetData(), BatchViews.NumElems, ColorValue); break; } default: { checkf(false, TEXT("Unsupported format.")); break; } } }); } template void BufferLayerColourFromAlpha(FImage* ResultImage, const FImage* BaseImage, FVector4f Color) { bool bIsClampNeeded = OpImageLayerInternal::IsAnyComponentLargerThan1(Color); if (bIsClampNeeded) { BufferLayerColourFromAlpha(ResultImage, BaseImage, Color); } else { BufferLayerColourFromAlpha(ResultImage, BaseImage, Color); } } template< uint32 (*BLEND_FUNC_MASKED)(uint32, uint32, uint32), uint32 (*BLEND_FUNC)(uint32, uint32), bool CLAMP, // Number of total channels to actually process uint32 CHANNELS_TO_BLEND, // Number of total channels in the base image int32 BASE_CHANNEL_STRIDE> void BufferLayerColourFormat(FImage* DestImage, const FImage* BaseImage, const FImage* MaskImage, FVector4f Col, uint32 BaseChannelOffset, uint8 ColorChannelOffset, bool bOnlyFirstLOD) { check(CHANNELS_TO_BLEND + BaseChannelOffset <= BASE_CHANNEL_STRIDE); check(DestImage->GetLODCount() <= BaseImage->GetLODCount()); FUintVector4 TopColor = FUintVector4(Col.X * 255.0f, Col.Y * 255.0f, Col.Z * 255.0f, Col.W * 255); const EImageFormat BaseFormat = BaseImage->GetFormat(); const EImageFormat MaskFormat = MaskImage->GetFormat(); int32 NumLODs = bOnlyFirstLOD ? 1 : BaseImage->GetLODCount(); const bool bIsMaskUncompressed = (MaskFormat == EImageFormat::L_UByte); constexpr uint32 NumColorChannels = FMath::Min(CHANNELS_TO_BLEND, 3u); check(NumColorChannels + ColorChannelOffset < 4); if (bIsMaskUncompressed) { const int32 BytesPerElem = GetImageFormatData(BaseFormat).BytesPerBlock; check(GetImageFormatData(MaskFormat).BytesPerBlock == 1); check(GetImageFormatData(DestImage->GetFormat()).BytesPerBlock == BytesPerElem); constexpr int32 BatchNumElems = 4096 * 2; OpImageLayerInternal::FOpLayerBatchArgs BatchArgs; BatchArgs.BatchNumElems = BatchNumElems; BatchArgs.LODBegin = 0; BatchArgs.LODEnd = NumLODs; BatchArgs.Result = DestImage; BatchArgs.Base = BaseImage; BatchArgs.Mask = MaskImage; BatchArgs.ResultBytesPerElem = BytesPerElem; BatchArgs.BaseBytesPerElem = BytesPerElem; BatchArgs.MaskBytesPerElem = 1; const int32 NumBatches = OpImageLayerInternal::GetOpLayerNumBatchesLODRange(BatchArgs); ParallelExecutionUtils::InvokeBatchParallelFor(NumBatches, [ &BatchArgs, BaseChannelOffset, TopColor, ColorChannelOffset, NumColorChannels ](int32 BatchId) { OpImageLayerInternal::FOpLayerBatchViews BatchViews = OpImageLayerInternal::GetOpLayerBatchLODRangeViews(BatchId, BatchArgs); // This could happen in case of missing data files. if (!BatchViews.Base.GetData() || !BatchViews.Mask.GetData() || !BatchViews.Result.GetData()) { return; } const uint8* BaseBuf = BatchViews.Base.GetData() + BaseChannelOffset; const uint8* MaskBuf = BatchViews.Mask.GetData(); uint8* DestBuf = BatchViews.Result.GetData() + BaseChannelOffset; for (int32 I = 0; I < BatchViews.NumElems; ++I) { uint32 MaskData = MaskBuf[I]; for (int32 C = 0; C < NumColorChannels; ++C) { const uint32 Base = BaseBuf[BASE_CHANNEL_STRIDE*I + C]; const uint32 Result = BLEND_FUNC_MASKED(Base, TopColor[C + ColorChannelOffset], MaskData); if constexpr (CLAMP) { DestBuf[BASE_CHANNEL_STRIDE*I + C] = (uint8)FMath::Min(255u, Result); } else { DestBuf[BASE_CHANNEL_STRIDE*I + C] = (uint8)Result; } } constexpr bool bIsNC4 = (BASE_CHANNEL_STRIDE == 4); if constexpr (bIsNC4) { DestBuf[BASE_CHANNEL_STRIDE*I + 3] = BaseBuf[BASE_CHANNEL_STRIDE*I + 3]; } } }); } else if (MaskFormat == EImageFormat::L_UByteRLE) { int32 Rows = BaseImage->GetSizeY(); int32 Width = BaseImage->GetSizeX(); for (int32 Lod = 0; Lod < NumLODs; ++Lod) { const uint8* BaseBuf = BaseImage->GetLODData(Lod); const uint8* MaskBuf = MaskImage->GetLODData(Lod); uint8* DestBuf = DestImage->GetLODData(Lod); // This could happen in case of missing data files. if (!BaseBuf || !MaskBuf || !DestBuf) { continue; } // Remove RLE header, mip size and row sizes. MaskBuf += sizeof(uint32); MaskBuf += Rows * sizeof(uint32); for (int32 RowIndex = 0; RowIndex < Rows; ++RowIndex) { const uint8* DestRowEnd = DestBuf + Width * BASE_CHANNEL_STRIDE; while (DestBuf != DestRowEnd) { // Decode header uint16 Equal = 0; FMemory::Memmove(&Equal, MaskBuf, sizeof(uint16)); MaskBuf += 2; uint8 Different = *MaskBuf; ++MaskBuf; uint8 EqualPixel = *MaskBuf; ++MaskBuf; // Equal pixels //check(DestBuf + BASE_CHANNEL_STRIDE * Equal <= BaseImage->GetDataSize(Lod)); if (EqualPixel == 255) { for (int32 I = 0; I < Equal; ++I) { for (int32 C = 0; C < NumColorChannels; ++C) { uint32 Base = BaseBuf[BASE_CHANNEL_STRIDE * I + C]; uint32 Result = BLEND_FUNC(Base, TopColor[C + ColorChannelOffset]); if constexpr (CLAMP) { DestBuf[BASE_CHANNEL_STRIDE * I + C] = (uint8)FMath::Min(255u, Result); } else { DestBuf[BASE_CHANNEL_STRIDE * I + C] = (uint8)Result; } } constexpr bool bIsNC4 = (BASE_CHANNEL_STRIDE == 4); if (bIsNC4) { DestBuf[BASE_CHANNEL_STRIDE * I + 3] = BaseBuf[BASE_CHANNEL_STRIDE * I + 3]; } } } else if (EqualPixel > 0) { for (int32 I = 0; I < Equal; ++I) { for (int32 C = 0; C < NumColorChannels; ++C) { uint32 Base = BaseBuf[BASE_CHANNEL_STRIDE * I + C]; uint32 Result = BLEND_FUNC_MASKED(Base, TopColor[C + ColorChannelOffset], EqualPixel); if constexpr (CLAMP) { DestBuf[BASE_CHANNEL_STRIDE * I + C] = (uint8)FMath::Min(255u, Result); } else { DestBuf[BASE_CHANNEL_STRIDE * I + C] = (uint8)Result; } } constexpr bool bIsNC4 = (BASE_CHANNEL_STRIDE == 4); if (bIsNC4) { DestBuf[BASE_CHANNEL_STRIDE * I + 3] = BaseBuf[BASE_CHANNEL_STRIDE * I + 3]; } } } else { // It could happen if xxxxxOnBase if (DestBuf != BaseBuf) { FMemory::Memmove(DestBuf, BaseBuf, BASE_CHANNEL_STRIDE*Equal); } } DestBuf += BASE_CHANNEL_STRIDE * Equal; BaseBuf += BASE_CHANNEL_STRIDE * Equal; // Different pixels //check(DestBuf + BASE_CHANNEL_STRIDE * Different <= StartDestBuf + BaseImage->GetDataSize(Lod)); for (int32 I = 0; I < Different; ++I) { for (int32 C = 0; C < NumColorChannels; ++C) { uint32 Mask = MaskBuf[I]; uint32 Base = BaseBuf[BASE_CHANNEL_STRIDE * I + C]; uint32 Result = BLEND_FUNC_MASKED(Base, TopColor[C + ColorChannelOffset], Mask); if constexpr (CLAMP) { DestBuf[BASE_CHANNEL_STRIDE * I + C] = (uint8)FMath::Min(255u, Result); } else { DestBuf[BASE_CHANNEL_STRIDE * I + C] = (uint8)Result; } } constexpr bool bIsNC4 = (BASE_CHANNEL_STRIDE == 4); if (bIsNC4) { DestBuf[BASE_CHANNEL_STRIDE * I + 3] = BaseBuf[BASE_CHANNEL_STRIDE * I + 3]; } } DestBuf += BASE_CHANNEL_STRIDE * Different; BaseBuf += BASE_CHANNEL_STRIDE * Different; MaskBuf += Different; } } Rows = FMath::DivideAndRoundUp(Rows, 2); Width = FMath::DivideAndRoundUp(Width, 2); } } else { checkf( false, TEXT("Unsupported mask format.") ); } } /** * Apply a blending function to an image with a colour source and a mask */ template< uint32 (*BLEND_FUNC_MASKED)(uint32,uint32,uint32), uint32 (*BLEND_FUNC)(uint32,uint32), bool CLAMP> FORCENOINLINE void BufferLayerColourImpl(FImage* ResultImage, const FImage* BaseImage, const FImage* MaskImage, FVector4f Col) { check(BaseImage->GetSizeX() == MaskImage->GetSizeX()); check(BaseImage->GetSizeY() == MaskImage->GetSizeY()); check(MaskImage->GetFormat() == EImageFormat::L_UByte || MaskImage->GetFormat() == EImageFormat::L_UByteRLE); const bool bValid = (BaseImage->GetSizeX() == MaskImage->GetSizeX()) && (BaseImage->GetSizeY() == MaskImage->GetSizeY()) && (MaskImage->GetFormat() == EImageFormat::L_UByte || MaskImage->GetFormat() == EImageFormat::L_UByteRLE); if (!bValid) { return; } EImageFormat BaseFormat = BaseImage->GetFormat(); if (BaseFormat == EImageFormat::RGB_UByte) { BufferLayerColourFormat(ResultImage, BaseImage, MaskImage, Col, 0, 0, false); } else if (BaseFormat == EImageFormat::RGBA_UByte) { BufferLayerColourFormat(ResultImage, BaseImage, MaskImage, Col, 0, 0, false); } else if (BaseFormat == EImageFormat::BGRA_UByte) { float Temp = Col[0]; Col[0] = Col[2]; Col[2] = Temp; BufferLayerColourFormat(ResultImage, BaseImage, MaskImage, Col, 0, 0, false); } else if (BaseFormat == EImageFormat::L_UByte) { BufferLayerColourFormat(ResultImage, BaseImage, MaskImage, Col, 0, 0, false); } else { checkf(false, TEXT("Unsupported format.")); } } template< uint32 (*BLEND_FUNC_MASKED)(uint32, uint32, uint32), uint32 (*BLEND_FUNC)(uint32, uint32)> void BufferLayerColour(FImage* DestImage, const FImage* BaseImage, const FImage* MaskImage, FVector4f Col) { bool bIsClampNeeded = OpImageLayerInternal::IsAnyComponentLargerThan1(Col); if (bIsClampNeeded) { BufferLayerColourImpl(DestImage, BaseImage, MaskImage, Col); } else { BufferLayerColourImpl(DestImage, BaseImage, MaskImage, Col); } } /** * Apply a blending function to an image with another image as blending layer */ template< uint32 (*BLEND_FUNC)(uint32, uint32), bool CLAMP, // Number of total channels to actually process uint32 CHANNELS_TO_BLEND, // Number of total channels in the base image int32 BASE_CHANNEL_STRIDE> void BufferLayerColourFormatInPlace(FImage* BaseImage, FVector4f Color, uint32 BaseChannelOffset, uint8 ColorChannelOffset, bool bOnlyFirstLOD) { FUintVector4 TopColor = FUintVector4(Color.X * 255.0f, Color.Y * 255.0f, Color.Z * 255.0f, Color.W * 255.0f); int32 NumLODs = bOnlyFirstLOD ? 1 : BaseImage->GetLODCount(); const int32 BytesPerElem = GetImageFormatData(BaseImage->GetFormat()).BytesPerBlock; constexpr int32 BatchNumElems = 4096*2; OpImageLayerInternal::FOpLayerBatchArgs BatchArgs; BatchArgs.BatchNumElems = BatchNumElems; BatchArgs.LODBegin = 0; BatchArgs.LODEnd = NumLODs; BatchArgs.Result = BaseImage; BatchArgs.ResultBytesPerElem = BytesPerElem; const int32 NumBatches = OpImageLayerInternal::GetOpLayerNumBatchesLODRange(BatchArgs); ParallelExecutionUtils::InvokeBatchParallelFor(NumBatches, [ &BatchArgs, TopColor, BaseChannelOffset, ColorChannelOffset ] (uint32 BatchId) { OpImageLayerInternal::FOpLayerBatchViews BatchViews = OpImageLayerInternal::GetOpLayerBatchLODRangeViews(BatchId, BatchArgs); uint8* BaseBuf = BatchViews.Result.GetData() + BaseChannelOffset; for (int32 I = 0; I < BatchViews.NumElems; ++I) { for (int32 C = 0; C < CHANNELS_TO_BLEND; ++C) { uint32 Base = BaseBuf[BASE_CHANNEL_STRIDE*I + C]; uint32 Blended = TopColor[C + ColorChannelOffset]; uint32 Result = BLEND_FUNC(Base, Blended); if constexpr (CLAMP) { BaseBuf[BASE_CHANNEL_STRIDE*I + C] = (uint8)FMath::Min(255u, Result); } else { BaseBuf[BASE_CHANNEL_STRIDE*I + C] = (uint8)Result; } } } }); } template< uint32 (*BLEND_FUNC)(uint32, uint32), bool CLAMP, uint32 CHANNEL_COUNT> FORCENOINLINE void BufferLayerColourInPlaceImpl(FImage* BaseImage, FVector4f Col, bool bOnlyOneMip, uint32 BaseOffset, uint8 ColOffset) { EImageFormat BaseFormat = BaseImage->GetFormat(); if (BaseFormat == EImageFormat::RGB_UByte) { check(BaseOffset + CHANNEL_COUNT <= 3); BufferLayerColourFormatInPlace (BaseImage, Col, BaseOffset, ColOffset, bOnlyOneMip); } else if (BaseFormat == EImageFormat::RGBA_UByte) { check(BaseOffset + CHANNEL_COUNT <= 4); BufferLayerColourFormatInPlace (BaseImage, Col, BaseOffset, ColOffset, bOnlyOneMip); } else if (BaseFormat == EImageFormat::BGRA_UByte) { float Temp = Col[0]; Col[0] = Col[2]; Col[2] = Temp; BufferLayerColourFormatInPlace (BaseImage, Col, BaseOffset, ColOffset, bOnlyOneMip); } else if (BaseFormat == EImageFormat::L_UByte) { check(BaseOffset + CHANNEL_COUNT <= 1); BufferLayerColourFormatInPlace< BLEND_FUNC, CLAMP, CHANNEL_COUNT, 1> (BaseImage, Col, BaseOffset, ColOffset, bOnlyOneMip); } else { checkf(false, TEXT("Unsupported format.")); } } template< uint32 (*BLEND_FUNC)(uint32, uint32), uint32 CHANNEL_COUNT> void BufferLayerColourInPlace(FImage* BaseImage, FVector4f Color, bool bOnlyOneMip, uint32 BaseOffset, uint8 ColorOffset) { bool bIsClampNeeded = OpImageLayerInternal::IsAnyComponentLargerThan1(Color); if (bIsClampNeeded) { BufferLayerColourInPlaceImpl(BaseImage, Color, bOnlyOneMip, BaseOffset, ColorOffset); } else { BufferLayerColourInPlaceImpl(BaseImage, Color, bOnlyOneMip, BaseOffset, ColorOffset); } } template< uint32 (*BLEND_FUNC_MASKED)(uint32, uint32, uint32), uint32 (*BLEND_FUNC)(uint32, uint32), bool CLAMP, int32 CHANNEL_COUNT> FORCENOINLINE void BufferLayerColourInPlaceImpl(FImage* BaseImage, const FImage* MaskImage, FVector4f Color, bool bOnlyOneMip, uint32 BaseOffset, uint8 ColorOffset) { check(BaseImage->GetSizeX() == MaskImage->GetSizeX()); check(BaseImage->GetSizeY() == MaskImage->GetSizeY()); EImageFormat BaseFormat = BaseImage->GetFormat(); if (BaseFormat == EImageFormat::RGB_UByte) { check(BaseOffset + CHANNEL_COUNT <= 3); BufferLayerColourFormat (BaseImage, BaseImage, MaskImage, Color, BaseOffset, ColorOffset, bOnlyOneMip); } else if (BaseFormat == EImageFormat::RGBA_UByte) { check(BaseOffset + CHANNEL_COUNT <= 4); BufferLayerColourFormat (BaseImage, BaseImage, MaskImage, Color, BaseOffset, ColorOffset, bOnlyOneMip); } else if (BaseFormat == EImageFormat::BGRA_UByte) { check(BaseOffset + CHANNEL_COUNT <= 4); float Temp = Color[0]; Color[0] = Color[2]; Color[2] = Temp; BufferLayerColourFormat (BaseImage, BaseImage, MaskImage, Color, BaseOffset, ColorOffset, bOnlyOneMip); } else if (BaseFormat == EImageFormat::L_UByte) { check(BaseOffset + CHANNEL_COUNT <= 1); BufferLayerColourFormat (BaseImage, BaseImage, MaskImage, Color, BaseOffset, ColorOffset, bOnlyOneMip); } else { checkf(false, TEXT("Unsupported format.")); } } template< uint32 (*BLEND_FUNC_MASKED)(uint32, uint32, uint32), uint32 (*BLEND_FUNC)(uint32, uint32), uint32 CHANNEL_COUNT> void BufferLayerColourInPlace(FImage* BaseImage, const FImage* MaskImage, FVector4f Color, bool bOnlyOneMip, uint32 BaseOffset, uint8 ColorOffset) { bool bIsClampNeeded = OpImageLayerInternal::IsAnyComponentLargerThan1(Color); if (bIsClampNeeded) { BufferLayerColourInPlaceImpl (BaseImage, MaskImage, Color, bOnlyOneMip, BaseOffset, ColorOffset); } else { BufferLayerColourInPlaceImpl (BaseImage, MaskImage, Color, bOnlyOneMip, BaseOffset, ColorOffset); } } /** * Apply a blending function to an image with another image as blending layer */ template< uint32 (*BLEND_FUNC)(uint32, uint32), bool CLAMP, // Number of total channels to actually process int32 CHANNELS_TO_BLEND, // Number of total channels in the base image int32 BASE_CHANNEL_STRIDE, // Number of total channels in the blend image int32 BLENDED_CHANNEL_STRIDE> void BufferLayerFormatInPlace( FImage* BaseImage, const FImage* BlendedImage, uint32 BaseChannelOffset, uint32 BlendedChannelOffset, bool bOnlyFirstLOD) { check(BaseImage->GetSizeX() == BlendedImage->GetSizeX()); check(BaseImage->GetSizeY() == BlendedImage->GetSizeY()); // No longer required. //check(BaseImage->GetFormat() == BlendedImage->GetFormat()); check(BaseChannelOffset + CHANNELS_TO_BLEND <= GetImageFormatData(BaseImage->GetFormat()).Channels); check(BlendedChannelOffset + CHANNELS_TO_BLEND <= GetImageFormatData(BlendedImage->GetFormat()).Channels); check(bOnlyFirstLOD || BaseImage->GetLODCount() <= BlendedImage->GetLODCount()); const int32 BaseBytesPerElem = GetImageFormatData(BaseImage->GetFormat()).BytesPerBlock; const int32 BlendBytesPerElem = GetImageFormatData(BlendedImage->GetFormat()).BytesPerBlock; const int32 NumLODs = bOnlyFirstLOD ? 1 : BaseImage->GetLODCount(); constexpr int32 BatchNumElems = 4096*2; OpImageLayerInternal::FOpLayerBatchArgs BatchArgs; BatchArgs.BatchNumElems = BatchNumElems; BatchArgs.LODBegin = 0; BatchArgs.LODEnd = NumLODs; BatchArgs.Result = BaseImage; BatchArgs.Blend = BlendedImage; BatchArgs.ResultBytesPerElem = BaseBytesPerElem; BatchArgs.BlendBytesPerElem = BlendBytesPerElem; const int32 NumBatches = OpImageLayerInternal::GetOpLayerNumBatchesLODRange(BatchArgs); ParallelExecutionUtils::InvokeBatchParallelFor(NumBatches, [ &BatchArgs, BaseChannelOffset, BlendedChannelOffset ] (int32 BatchId) { OpImageLayerInternal::FOpLayerBatchViews BatchViews = OpImageLayerInternal::GetOpLayerBatchLODRangeViews(BatchId, BatchArgs); uint8* BaseBuf = BatchViews.Result.GetData() + BaseChannelOffset; const uint8* BlendedBuf = BatchViews.Blend.GetData() + BlendedChannelOffset; for (int32 I = 0; I < BatchViews.NumElems; ++I) { for (int32 C = 0; C < CHANNELS_TO_BLEND; ++C) { uint32 Base = BaseBuf[BASE_CHANNEL_STRIDE * I + C]; uint32 Blended = BlendedBuf[BLENDED_CHANNEL_STRIDE * I + C]; uint32 Result = BLEND_FUNC(Base, Blended); if constexpr (CLAMP) { BaseBuf[BASE_CHANNEL_STRIDE * I + C] = (uint8)FMath::Min(255u, Result); } else { BaseBuf[BASE_CHANNEL_STRIDE * I + C] = (uint8)Result; } } } }); } /** * Apply a blending function to an image with another image as blending layer */ template< uint32 (*BLEND_FUNC)(uint32, uint32), bool CLAMP, // Number of total channels to actually process int32 CHANNELS_TO_BLEND, // Number of total channels in the base image int32 BASE_CHANNEL_STRIDE, // Number of total channels in the blend image int32 BLENDED_CHANNEL_STRIDE, int32 BLENDED_CHANNEL_OFFSET> void BufferLayerFormat(FImage* DestImage, const FImage* BaseImage, const FImage* BlendedImage, bool bOnlyFirstLOD) { check(BaseImage->GetSizeX() == BlendedImage->GetSizeX()); check(BaseImage->GetSizeY() == BlendedImage->GetSizeY()); // Not true anymore, since the BLENDED_CHANNEL_OFFSET has been added. // check(BaseImage->GetFormat() == BlendedImage->GetFormat()); check(bOnlyFirstLOD || BaseImage->GetLODCount() <= BlendedImage->GetLODCount()); constexpr int32 UnblendedChannels = BASE_CHANNEL_STRIDE - CHANNELS_TO_BLEND; constexpr int32 BatchNumElems = 4096*2; const int32 BaseBytesPerElem = GetImageFormatData(BaseImage->GetFormat()).BytesPerBlock; const int32 BlendBytesPerElem = GetImageFormatData(BlendedImage->GetFormat()).BytesPerBlock; const int32 DestBytesPerElem = GetImageFormatData(DestImage->GetFormat()).BytesPerBlock; const int32 NumLODs = bOnlyFirstLOD ? 1 : BaseImage->GetLODCount(); OpImageLayerInternal::FOpLayerBatchArgs BatchArgs; BatchArgs.BatchNumElems = BatchNumElems; BatchArgs.LODBegin = 0; BatchArgs.LODEnd = NumLODs; BatchArgs.Result = DestImage; BatchArgs.Base = BaseImage; BatchArgs.Blend = BlendedImage; BatchArgs.ResultBytesPerElem = DestBytesPerElem; BatchArgs.BaseBytesPerElem = BaseBytesPerElem; BatchArgs.BlendBytesPerElem = BlendBytesPerElem; const int32 NumBatches = OpImageLayerInternal::GetOpLayerNumBatchesLODRange(BatchArgs); ParallelExecutionUtils::InvokeBatchParallelFor(NumBatches, [ &BatchArgs, UnblendedChannels ] (int32 BatchId) { OpImageLayerInternal::FOpLayerBatchViews BatchViews = OpImageLayerInternal::GetOpLayerBatchLODRangeViews(BatchId, BatchArgs); const uint8* BaseBuf = BatchViews.Base.GetData(); const uint8* BlendedBuf = BatchViews.Blend.GetData() + BLENDED_CHANNEL_OFFSET; uint8* DestBuf = BatchViews.Result.GetData(); for (int32 I = 0; I < BatchViews.NumElems; ++I) { for (int32 C = 0; C < CHANNELS_TO_BLEND; ++C) { uint32 Base = BaseBuf[BASE_CHANNEL_STRIDE * I + C]; uint32 Blended = BlendedBuf[BLENDED_CHANNEL_STRIDE * I + C]; uint32 Result = BLEND_FUNC(Base, Blended); if constexpr (CLAMP) { DestBuf[BASE_CHANNEL_STRIDE * I + C] = (uint8)FMath::Min(255u, Result); } else { DestBuf[BASE_CHANNEL_STRIDE * I + C] = (uint8)Result; } } // Copy the unblended channels // \TODO: unnecessary when doing it in-place? if constexpr (UnblendedChannels > 0) { for (int32 C = 0; C < UnblendedChannels; ++C) { DestBuf[BASE_CHANNEL_STRIDE * I + CHANNELS_TO_BLEND + C] = BaseBuf[BASE_CHANNEL_STRIDE * I + CHANNELS_TO_BLEND + C]; } } } }); } /** * Apply a blending function to an image with another image as blending layer */ template< uint32 (*BLEND_FUNC)(uint32, uint32), bool CLAMP > void BufferLayer(FImage* ResultImage, const FImage* BaseImage, const FImage* BlendedImage, bool bApplyToAlpha, bool bOnlyOneMip, bool bUseBlendSourceFromBlendAlpha) { check(ResultImage->GetFormat() == BaseImage->GetFormat()); check(ResultImage->GetSizeX() == BaseImage->GetSizeX()); check(ResultImage->GetSizeY() == BaseImage->GetSizeY()); check(bOnlyOneMip || ResultImage->GetLODCount() == BaseImage->GetLODCount()); check(BaseImage->GetSizeX() == BlendedImage->GetSizeX()); check(BaseImage->GetSizeY() == BlendedImage->GetSizeY()); check(bOnlyOneMip || ResultImage->GetLODCount() <= BlendedImage->GetLODCount()); EImageFormat BaseFormat = BaseImage->GetFormat(); EImageFormat BlendedFormat = BlendedImage->GetFormat(); if (bUseBlendSourceFromBlendAlpha) { if (BlendedFormat == EImageFormat::RGBA_UByte || BlendedFormat == EImageFormat::BGRA_UByte) { if (BaseFormat == EImageFormat::L_UByte) { BufferLayerFormat(ResultImage, BaseImage, BlendedImage, bOnlyOneMip); } else { checkf(false, TEXT("Unsupported format.")); } } else if (BlendedFormat == EImageFormat::L_UByte) { BufferLayerFormat(ResultImage, BaseImage, BlendedImage, bOnlyOneMip); } } else { check(BaseFormat == BlendedFormat); if (BaseFormat == EImageFormat::RGB_UByte) { check(!bUseBlendSourceFromBlendAlpha); BufferLayerFormat(ResultImage, BaseImage, BlendedImage, bOnlyOneMip); } else if (BaseFormat == EImageFormat::RGBA_UByte || BaseFormat == EImageFormat::BGRA_UByte) { check(!bUseBlendSourceFromBlendAlpha); if (bApplyToAlpha) { BufferLayerFormat(ResultImage, BaseImage, BlendedImage, bOnlyOneMip); } else { BufferLayerFormat(ResultImage, BaseImage, BlendedImage, bOnlyOneMip); } } else if (BaseFormat == EImageFormat::L_UByte) { BufferLayerFormat(ResultImage, BaseImage, BlendedImage, bOnlyOneMip); } else { checkf(false, TEXT("Unsupported format.")); } } } template< uint32 (*BLEND_FUNC)(uint32, uint32), bool CLAMP, int32 CHANNEL_COUNT > void BufferLayerInPlace(FImage* BaseImage, const FImage* BlendedImage, bool bOnlyOneMip, uint32 BaseOffset, uint32 BlendedOffset) { check(BaseImage->GetSizeX() == BlendedImage->GetSizeX()); check(BaseImage->GetSizeY() == BlendedImage->GetSizeY()); // Not required since we have the CHANNEL_COUNT and offsets. // check(BaseImage->GetFormat() == BlendedImage->GetFormat()); EImageFormat BaseFormat = BaseImage->GetFormat(); EImageFormat BlendFormat = BlendedImage->GetFormat(); if (BaseFormat == EImageFormat::RGB_UByte && BlendFormat==EImageFormat::RGB_UByte) { check(BaseOffset + CHANNEL_COUNT <= 3); check(BlendedOffset + CHANNEL_COUNT <= 3); BufferLayerFormatInPlace(BaseImage, BlendedImage, BaseOffset, BlendedOffset, bOnlyOneMip); } else if ((BaseFormat == EImageFormat::RGBA_UByte || BaseFormat == EImageFormat::BGRA_UByte) && (BlendFormat == EImageFormat::RGBA_UByte || BlendFormat == EImageFormat::BGRA_UByte) ) { check(BaseOffset + CHANNEL_COUNT <= 4); check(BlendedOffset + CHANNEL_COUNT <= 4); BufferLayerFormatInPlace< BLEND_FUNC, CLAMP, CHANNEL_COUNT, 4, 4> (BaseImage, BlendedImage, BaseOffset, BlendedOffset, bOnlyOneMip); } else if ((BaseFormat == EImageFormat::RGBA_UByte || BaseFormat == EImageFormat::BGRA_UByte) && BlendFormat == EImageFormat::L_UByte ) { check(BaseOffset + CHANNEL_COUNT <= 4); check(BlendedOffset + CHANNEL_COUNT <= 1); BufferLayerFormatInPlace< BLEND_FUNC, CLAMP, CHANNEL_COUNT, 4, 1> (BaseImage, BlendedImage, BaseOffset, BlendedOffset, bOnlyOneMip); } else if (BaseFormat == EImageFormat::L_UByte && BlendFormat==EImageFormat::L_UByte) { check(BaseOffset + CHANNEL_COUNT <= 1); BufferLayerFormatInPlace< BLEND_FUNC, CLAMP, CHANNEL_COUNT, 1, 1> (BaseImage, BlendedImage, BaseOffset, BlendedOffset, bOnlyOneMip); } else { checkf(false, TEXT("Unsupported format.")); } } template< uint32 (*BLEND_FUNC_MASKED)(uint32, uint32, uint32), uint32 (*BLEND_FUNC)(uint32, uint32), bool CLAMP, // Number of total channels to actually process int32 CHANNELS_TO_BLEND, // Number of total channels in the base image int32 BASE_CHANNEL_STRIDE, // Number of total channels in the blend image int32 BLENDED_CHANNEL_STRIDE> void BufferLayerFormat( FImage* DestImage, const FImage* BaseImage, const FImage* MaskImage, const FImage* BlendImage, uint32 DestOffset, uint32 BaseChannelOffset, uint32 BlendedChannelOffset, bool bOnlyFirstLOD) { check(BaseImage->GetSizeX() == MaskImage->GetSizeX() && BaseImage->GetSizeY() == MaskImage->GetSizeY()); check(BaseImage->GetSizeX() == BlendImage->GetSizeX() && BaseImage->GetSizeY() == BlendImage->GetSizeY()); check(bOnlyFirstLOD || BaseImage->GetLODCount() <= MaskImage->GetLODCount()); check(bOnlyFirstLOD || BaseImage->GetLODCount() <= BlendImage->GetLODCount()); check(BaseImage->GetFormat() == BlendImage->GetFormat()); check(MaskImage->GetFormat() == EImageFormat::L_UByte || MaskImage->GetFormat() == EImageFormat::L_UByteRLE); const EImageFormat MaskFormat = MaskImage->GetFormat(); const int32 BaseBytesPerElem = GetImageFormatData(BaseImage->GetFormat()).BytesPerBlock; const int32 BlendBytesPerElem = GetImageFormatData(BlendImage->GetFormat()).BytesPerBlock; const int32 NumLODs = bOnlyFirstLOD ? 1 : BaseImage->GetLODCount(); const bool bIsMaskUncompressed = MaskFormat == EImageFormat::L_UByte; constexpr int32 UnblendedChannels = BASE_CHANNEL_STRIDE - CHANNELS_TO_BLEND; if (bIsMaskUncompressed) { constexpr int32 BatchNumElems = 4096*2; OpImageLayerInternal::FOpLayerBatchArgs BatchArgs; BatchArgs.BatchNumElems = BatchNumElems; BatchArgs.LODBegin = 0; BatchArgs.LODEnd = NumLODs; BatchArgs.Result = DestImage; BatchArgs.Base = BaseImage; BatchArgs.Blend = BlendImage; BatchArgs.Mask = MaskImage; BatchArgs.ResultBytesPerElem = BaseBytesPerElem; BatchArgs.BaseBytesPerElem = BaseBytesPerElem; BatchArgs.BlendBytesPerElem = BlendBytesPerElem; BatchArgs.MaskBytesPerElem = 1; const int32 NumBatches = OpImageLayerInternal::GetOpLayerNumBatchesLODRange(BatchArgs); ParallelExecutionUtils::InvokeBatchParallelFor(NumBatches, [ &BatchArgs, UnblendedChannels ] (uint32 BatchId) { OpImageLayerInternal::FOpLayerBatchViews BatchViews = OpImageLayerInternal::GetOpLayerBatchLODRangeViews(BatchId, BatchArgs); const uint8* BaseBuf = BatchViews.Base.GetData(); const uint8* BlendedBuf = BatchViews.Blend.GetData(); const uint8* MaskBuf = BatchViews.Mask.GetData(); uint8* DestBuf = BatchViews.Result.GetData(); // This could happen in case of missing data files. if (!BaseBuf || !BlendedBuf || !MaskBuf || !DestBuf) { return; } for (int32 I = 0; I < BatchViews.NumElems; ++I) { uint32 Mask = MaskBuf[I]; for (int32 C = 0; C < CHANNELS_TO_BLEND; ++C) { const uint32 Base = BaseBuf[BASE_CHANNEL_STRIDE * I + C]; const uint32 Blended = BlendedBuf[BLENDED_CHANNEL_STRIDE * I + C]; const uint32 Result = BLEND_FUNC_MASKED(Base, Blended, Mask); if constexpr (CLAMP) { DestBuf[BASE_CHANNEL_STRIDE * I + C] = (uint8)FMath::Min(255u, Result); } else { DestBuf[BASE_CHANNEL_STRIDE * I + C] = (uint8)Result; } } // Copy the unblended channels // \TODO: unnecessary when doing it in-place? if constexpr (UnblendedChannels > 0) { for (int32 C = 0; C < UnblendedChannels; ++C) { DestBuf[BASE_CHANNEL_STRIDE * I + CHANNELS_TO_BLEND + C] = BaseBuf[BASE_CHANNEL_STRIDE * I + CHANNELS_TO_BLEND + C]; } } } }); } else if (MaskFormat == EImageFormat::L_UByteRLE) { int32 Rows = BaseImage->GetSizeY(); int32 Width = BaseImage->GetSizeX(); for (int32 LOD = 0; LOD < NumLODs; ++LOD) { const uint8* BaseBuf = BaseImage->GetLODData(LOD); uint8* DestBuf = DestImage->GetLODData(LOD); const uint8* BlendedBuf = BlendImage->GetLODData(LOD); const uint8* MaskBuf = MaskImage->GetLODData(LOD); // This could happen in case of missing data files. if (!BaseBuf || !BlendedBuf || !MaskBuf || !DestBuf) { continue; } // Remove RLE header, mip size and row sizes. MaskBuf += sizeof(uint32) + Rows * sizeof(uint32); for (int32 RowIndex = 0; RowIndex < Rows; ++RowIndex) { const uint8* DestRowEnd = DestBuf + Width * BASE_CHANNEL_STRIDE; while (DestBuf != DestRowEnd) { // Decode header uint16 Equal; FMemory::Memcpy(&Equal, MaskBuf, 2); MaskBuf += 2; uint8 Different = *MaskBuf; ++MaskBuf; uint8 EqualPixel = *MaskBuf; ++MaskBuf; // Equal pixels //check(DestBuf + BASE_CHANNEL_STRIDE * Equal <= BaseImage->GetLODDataSize(0)); if (EqualPixel == 255) { for (int32 I = 0; I < Equal; ++I) { for (int32 C = 0; C < CHANNELS_TO_BLEND; ++C) { uint32 Base = BaseBuf[BASE_CHANNEL_STRIDE * I + C]; uint32 Blended = BlendedBuf[BLENDED_CHANNEL_STRIDE * I + C]; uint32 Result = BLEND_FUNC(Base, Blended); if constexpr (CLAMP) { DestBuf[BASE_CHANNEL_STRIDE * I + C] = (uint8)FMath::Min(255u, Result); } else { DestBuf[BASE_CHANNEL_STRIDE * I + C] = (uint8)Result; } } // Copy the unblended channels // \TODO: unnecessary when doing it in-place? if constexpr (UnblendedChannels > 0) { for (int32 C = 0; C < UnblendedChannels; ++C) { DestBuf[BASE_CHANNEL_STRIDE * I + CHANNELS_TO_BLEND + C] = BaseBuf[BASE_CHANNEL_STRIDE * I + CHANNELS_TO_BLEND + C]; } } } } else if (EqualPixel > 0) { for (int32 I = 0; I < Equal; ++I) { for (int32 C = 0; C < CHANNELS_TO_BLEND; ++C) { uint32 Base = BaseBuf[BASE_CHANNEL_STRIDE * I + C]; uint32 Blended = BlendedBuf[BLENDED_CHANNEL_STRIDE * I + C]; uint32 Result = BLEND_FUNC_MASKED(Base, Blended, EqualPixel); if constexpr (CLAMP) { DestBuf[BASE_CHANNEL_STRIDE * I + C] = (uint8)FMath::Min(255u, Result); } else { DestBuf[BASE_CHANNEL_STRIDE * I + C] = (uint8)Result; } } // Copy the unblended channels // \TODO: unnecessary when doing it in-place? if constexpr (UnblendedChannels > 0) { for (int32 C = 0; C < UnblendedChannels; ++C) { DestBuf[BASE_CHANNEL_STRIDE * I + CHANNELS_TO_BLEND + C] = BaseBuf[BASE_CHANNEL_STRIDE * I + CHANNELS_TO_BLEND + C]; } } } } else { // It could happen if xxxxxOnBase if (DestBuf != BaseBuf) { FMemory::Memmove(DestBuf, BaseBuf, BASE_CHANNEL_STRIDE * Equal); } } DestBuf += BASE_CHANNEL_STRIDE * Equal; BaseBuf += BASE_CHANNEL_STRIDE * Equal; BlendedBuf += BLENDED_CHANNEL_STRIDE * Equal; // Different pixels //check(DestBuf + BASE_CHANNEL_STRIDE * Different <= BaseImage->GetDataSize(0)); for (int32 I = 0; I < Different; ++I) { for (int32 C = 0; C < CHANNELS_TO_BLEND; ++C) { uint32 Mask = MaskBuf[I]; uint32 Base = BaseBuf[BASE_CHANNEL_STRIDE * I + C]; uint32 Blended = BlendedBuf[BLENDED_CHANNEL_STRIDE * I + C]; uint32 Result = BLEND_FUNC_MASKED(Base, Blended, Mask); if (CLAMP) { DestBuf[BASE_CHANNEL_STRIDE * I + C] = (uint8)FMath::Min(255u, Result); } else { DestBuf[BASE_CHANNEL_STRIDE * I + C] = (uint8)Result; } } // Copy the unblended channels // \TODO: unnecessary when doing it in-place? if constexpr (UnblendedChannels > 0) { for (int32 C = 0; C < UnblendedChannels; ++C) { DestBuf[BASE_CHANNEL_STRIDE * I + CHANNELS_TO_BLEND + C] = BaseBuf[BASE_CHANNEL_STRIDE * I + CHANNELS_TO_BLEND + C]; } } } DestBuf += BASE_CHANNEL_STRIDE * Different; BaseBuf += BASE_CHANNEL_STRIDE * Different; BlendedBuf += BLENDED_CHANNEL_STRIDE * Different; MaskBuf += Different; } } Rows = FMath::DivideAndRoundUp(Rows, 2); Width = FMath::DivideAndRoundUp(Width, 2); } } else { checkf(false, TEXT("Unsupported mask format.")); } } template< uint32 (*BLEND_FUNC_MASKED)(uint32, uint32, uint32), uint32 (*BLEND_FUNC)(uint32, uint32), bool CLAMP, // Number of total channels to actually process int32 CHANNELS_TO_BLEND, // Number of total channels in the base image int32 BASE_CHANNEL_STRIDE, // Number of total channels in the blend image int32 BLENDED_CHANNEL_STRIDE> void BufferLayerFormatEmbeddedMask( FImage* DestImage, const FImage* BaseImage, const FImage* BlendImage, uint32 DestOffset, uint32 BaseChannelOffset, bool bOnlyFirstLOD) { check(BaseImage->GetSizeX() == BlendImage->GetSizeX() && BaseImage->GetSizeY() == BlendImage->GetSizeY()); check(bOnlyFirstLOD || BaseImage->GetLODCount() <= BlendImage->GetLODCount()); constexpr int32 UnblendedChannels = BASE_CHANNEL_STRIDE - CHANNELS_TO_BLEND; const int32 BytesPerElem = GetImageFormatData(BaseImage->GetFormat()).BytesPerBlock; const int32 NumLODs = bOnlyFirstLOD ? 1 : BaseImage->GetLODCount(); constexpr int32 BatchNumElems = 4096*2; OpImageLayerInternal::FOpLayerBatchArgs BatchArgs; BatchArgs.BatchNumElems = BatchNumElems; BatchArgs.LODBegin = 0; BatchArgs.LODEnd = NumLODs; BatchArgs.Result = DestImage; BatchArgs.Base = BaseImage; BatchArgs.Blend = BlendImage; BatchArgs.ResultBytesPerElem = BytesPerElem; BatchArgs.BaseBytesPerElem = BytesPerElem; BatchArgs.BlendBytesPerElem = BytesPerElem; const int32 NumBatches = OpImageLayerInternal::GetOpLayerNumBatchesLODRange(BatchArgs); ParallelExecutionUtils::InvokeBatchParallelFor(NumBatches, [ &BatchArgs, UnblendedChannels, BlendImageFormat = BlendImage->GetFormat() ] (int32 BatchId) { OpImageLayerInternal::FOpLayerBatchViews BatchViews = OpImageLayerInternal::GetOpLayerBatchLODRangeViews(BatchId, BatchArgs); uint8* const DestBuf = BatchViews.Result.GetData(); uint8 const * const BlendedBuf = BatchViews.Blend.GetData(); uint8 const * const BaseBuf = BatchViews.Base.GetData(); // This could happen in case of missing data files. if (!BaseBuf || !BlendedBuf) { return; } if (BlendImageFormat == EImageFormat::RGBA_UByte) { const uint8* MaskBuf = BlendedBuf + 3; for (int32 I = 0; I < BatchViews.NumElems; ++I) { uint32 Mask = MaskBuf[BLENDED_CHANNEL_STRIDE * I]; for (int32 C = 0; C < CHANNELS_TO_BLEND; ++C) { uint32 Base = BaseBuf[BASE_CHANNEL_STRIDE * I + C]; uint32 Blended = BlendedBuf[BLENDED_CHANNEL_STRIDE * I + C]; uint32 Result = BLEND_FUNC_MASKED(Base, Blended, Mask); if constexpr (CLAMP) { DestBuf[BASE_CHANNEL_STRIDE * I + C] = (uint8)FMath::Min(255u, Result); } else { DestBuf[BASE_CHANNEL_STRIDE * I + C] = (uint8)Result; } } // Copy the unblended channels // \TODO: unnecessary when doing it in-place? if constexpr (UnblendedChannels > 0) { for (int32 C = 0; C < UnblendedChannels; ++C) { DestBuf[BASE_CHANNEL_STRIDE * I + CHANNELS_TO_BLEND + C] = BaseBuf[BASE_CHANNEL_STRIDE * I + CHANNELS_TO_BLEND + C]; } } } } else { for (int32 I = 0; I < BatchViews.NumElems; ++I) { for (int32 C = 0; C < CHANNELS_TO_BLEND; ++C) { uint32 Base = BaseBuf[BASE_CHANNEL_STRIDE * I + C]; uint32 Blended = BlendedBuf[BLENDED_CHANNEL_STRIDE * I + C]; uint32 Result = BLEND_FUNC(Base, Blended); if constexpr (CLAMP) { DestBuf[BASE_CHANNEL_STRIDE * I + C] = (uint8)FMath::Min(255u, Result); } else { DestBuf[BASE_CHANNEL_STRIDE * I + C] = (uint8)Result; } } // Copy the unblended channels // \TODO: unnecessary when doing it in-place? if constexpr (UnblendedChannels > 0) { for (int32 C = 0; C < UnblendedChannels; ++C) { DestBuf[BASE_CHANNEL_STRIDE * I + CHANNELS_TO_BLEND + C] = BaseBuf[BASE_CHANNEL_STRIDE * I + CHANNELS_TO_BLEND + C]; } } } } }); } template< uint32 (*BLEND_FUNC)(uint32, uint32), bool CLAMP, uint32 NC> void BufferLayerFormatStrideNoAlpha( FImage* DestImage, int32 DestOffset, int32 Stride, const FImage* MaskImage, const FImage* BlendImage/*, int32 LODCount*/) { const uint8* MaskBuf = MaskImage->GetLODData(0); const uint8* BlendedBuf = BlendImage->GetLODData(0); uint8* DestBuf = DestImage->GetLODData(0) + DestOffset; // This could happen in case of missing data files. if (!BlendedBuf || !MaskBuf || !DestImage->GetLODData(0)) { return; } EImageFormat MaskFormat = MaskImage->GetFormat(); bool bIsUncompressed = (MaskFormat == EImageFormat::L_UByte); if (bIsUncompressed) { int32 RowCount = BlendImage->GetSizeY(); int32 PixelCount = BlendImage->GetSizeX(); for (int32 RowIndex = 0; RowIndex < RowCount; ++RowIndex) { for (int32 PixelIndex = 0; PixelIndex < PixelCount; ++PixelIndex) { uint32 Mask = *MaskBuf; if (Mask) { for (int32 C = 0; C < NC; ++C) { uint32 Base = *DestBuf; uint32 Blended = *BlendedBuf; uint32 Result = BLEND_FUNC(Base, Blended); if constexpr (CLAMP) { *DestBuf = (uint8)FMath::Min(255u, Result); } else { *DestBuf = (uint8)Result; } ++DestBuf; ++BlendedBuf; } } else { DestBuf += NC; BlendedBuf += NC; } ++MaskBuf; } DestBuf += Stride; } } else if (MaskFormat == EImageFormat::L_UBitRLE) { int32 Rows = MaskImage->GetSizeY(); int32 Width = MaskImage->GetSizeX(); //for (int32 lod = 0; lod < LODCount; ++lod) //{ // Remove RLE header. MaskBuf += 4 + Rows*sizeof(uint32); for (int32 RowIndex = 0; RowIndex < Rows; ++RowIndex) { const uint8* DestRowEnd = DestBuf + Width*NC; while (DestBuf != DestRowEnd) { // Decode header uint16 Zeros = *(const uint16*)MaskBuf; MaskBuf += 2; uint16 Ones = *(const uint16*)MaskBuf; MaskBuf += 2; // Skip DestBuf += Zeros*NC; BlendedBuf += Zeros*NC; // Copy FMemory::Memmove(DestBuf, BlendedBuf, Ones*NC); DestBuf += NC*Ones; BlendedBuf += NC*Ones; } DestBuf += Stride; } //Rows = FMath::DivideAndRoundUp(Rows, 2); //Width = FMath::DivideAndRoundUp(Width, 2); //} } else { checkf( false, TEXT("Unsupported mask format.") ); } } template void BufferLayer(FImage* DestImage, const FImage* BaseImage, const FImage* MaskImage, const FImage* BlendImage, bool bApplyToAlpha, bool bOnlyFirstLOD) { if (BaseImage->GetFormat() == EImageFormat::RGB_UByte) { BufferLayerFormat (DestImage, BaseImage, MaskImage, BlendImage, 0, 0, 0, bOnlyFirstLOD); } else if (BaseImage->GetFormat() == EImageFormat::RGBA_UByte || BaseImage->GetFormat() == EImageFormat::BGRA_UByte) { if (bApplyToAlpha) { BufferLayerFormat (DestImage, BaseImage, MaskImage, BlendImage, 0, 0, 0, bOnlyFirstLOD); } else { BufferLayerFormat (DestImage, BaseImage, MaskImage, BlendImage, 0, 0, 0, bOnlyFirstLOD); } } else if (BaseImage->GetFormat() == EImageFormat::L_UByte) { BufferLayerFormat (DestImage, BaseImage, MaskImage, BlendImage, 0, 0, 0, bOnlyFirstLOD); } else { checkf(false, TEXT("Unsupported format.")); } } template< uint32 (*BLEND_FUNC_MASKED)(uint32, uint32, uint32), uint32 (*BLEND_FUNC)(uint32, uint32), bool CLAMP > void BufferLayerEmbeddedMask( FImage* DestImage, const FImage* BaseImage, const FImage* BlendImage, bool bApplyToAlpha, bool bOnlyFirstLOD) { if (BaseImage->GetFormat() == EImageFormat::RGB_UByte) { BufferLayerFormatEmbeddedMask (DestImage, BaseImage, BlendImage, 0, 0, bOnlyFirstLOD); } else if (BaseImage->GetFormat() == EImageFormat::RGBA_UByte || BaseImage->GetFormat() == EImageFormat::BGRA_UByte) { if (bApplyToAlpha) { BufferLayerFormatEmbeddedMask (DestImage, BaseImage, BlendImage, 0, 0, bOnlyFirstLOD); } else { BufferLayerFormatEmbeddedMask (DestImage, BaseImage, BlendImage, 0, 0, bOnlyFirstLOD); } } else { checkf(false, TEXT("Unsupported format.")); } } template< uint32 (*RGB_FUNC_MASKED)(uint32, uint32, uint32), uint32 (*A_FUNC)(uint32, uint32), bool CLAMP > void BufferLayerComposite( FImage* BaseImage, const FImage* BlendImage, bool bOnlyFirstLOD, uint8 BlendAlphaSourceChannel) { check(BaseImage->GetFormat() == EImageFormat::RGBA_UByte); check(BlendImage->GetFormat() == EImageFormat::RGBA_UByte); check(BaseImage->GetSizeX() == BlendImage->GetSizeX() && BaseImage->GetSizeY() == BlendImage->GetSizeY()); check(bOnlyFirstLOD || BaseImage->GetLODCount() <= BlendImage->GetLODCount()); bOnlyFirstLOD = bOnlyFirstLOD || BaseImage->GetLODCount() == 1; int32 FirstLODDataOffset = 0; int32 NumRelevantElems = -1; if (BlendImage->Flags & FImage::IF_HAS_RELEVANCY_MAP && bOnlyFirstLOD) { check(BlendImage->RelevancyMaxY < BaseImage->GetSizeY()); check(BlendImage->RelevancyMaxY >= BlendImage->RelevancyMinY); uint16 SizeX = BaseImage->GetSizeX(); NumRelevantElems = (BlendImage->RelevancyMaxY - BlendImage->RelevancyMinY + 1) * SizeX * 4; FirstLODDataOffset = BlendImage->RelevancyMinY * SizeX * 4; } const int32 BytesPerElem = GetImageFormatData(BaseImage->GetFormat()).BytesPerBlock; check(BytesPerElem == GetImageFormatData(BlendImage->GetFormat()).BytesPerBlock); const int32 LODBegin = 0; const int32 LODEnd = BaseImage->GetLODCount(); constexpr int32 BatchNumElems = 4096*2; OpImageLayerInternal::FOpLayerBatchArgs BatchArgs; BatchArgs.BatchNumElems = BatchNumElems; BatchArgs.LODBegin = LODBegin; BatchArgs.LODEnd = LODEnd; BatchArgs.FirstLODOffset = bOnlyFirstLOD ? FirstLODDataOffset : -1; BatchArgs.Result = BaseImage; BatchArgs.Blend = BlendImage; BatchArgs.ResultBytesPerElem = BytesPerElem; BatchArgs.BlendBytesPerElem = BytesPerElem; const int32 NumBatches = OpImageLayerInternal::GetOpLayerNumBatchesLODRangeOffsetViews(BatchArgs); // This will always be an upper-bound for bOnlyFirtsLODs, check if it performs as expected or it needs more fine tune. const int32 NumRelevantBatches = NumRelevantElems != -1 ? FMath::DivideAndRoundUp(NumRelevantElems, BatchNumElems) : NumBatches; ParallelExecutionUtils::InvokeBatchParallelFor(NumBatches, [ &BatchArgs, BlendAlphaSourceChannel ] (int32 BatchId) { OpImageLayerInternal::FOpLayerBatchViews BatchViews = OpImageLayerInternal::GetOpLayerBatchLODRangeOffsetViews(BatchId, BatchArgs); uint8* BaseBuf = BatchViews.Result.GetData(); const uint8* BlendBuf = BatchViews.Blend.GetData(); for (int32 I = 0; I < BatchViews.NumElems; ++I) { // TODO: Optimize this (SIMD?) uint32 Mask = BlendBuf[4 * I + 3]; // RGB for (int32 C = 0; C < 3; ++C) { uint32 Base = BaseBuf[4 * I + C]; uint32 Blended = BlendBuf[4 * I + C]; uint32 Result = RGB_FUNC_MASKED(Base, Blended, Mask); if constexpr (CLAMP) { BaseBuf[4 * I + C] = (uint8)FMath::Min(255u, Result); } else { BaseBuf[4 * I + C] = (uint8)Result; } } // A { uint32 Base = BaseBuf[4 * I + 3]; uint32 Blended = BlendBuf[4 * I + BlendAlphaSourceChannel]; uint32 Result = A_FUNC(Base, Blended); if constexpr (CLAMP) { BaseBuf[4 * I + 3] = (uint8)FMath::Min(255u, Result); } else { BaseBuf[4 * I + 3] = (uint8)Result; } } } }); } template<> void BufferLayerComposite ( FImage* BaseImage, const FImage* BlendImage, bool bOnlyFirstLOD, uint8 BlendAlphaSourceChannel) { check(BaseImage->GetFormat() == EImageFormat::RGBA_UByte); check(BlendImage->GetFormat() == EImageFormat::RGBA_UByte); check(BaseImage->GetSizeX() == BlendImage->GetSizeX() && BaseImage->GetSizeY() == BlendImage->GetSizeY()); check(bOnlyFirstLOD || BaseImage->GetLODCount() <= BlendImage->GetLODCount()); check(BlendAlphaSourceChannel == 3); bOnlyFirstLOD = bOnlyFirstLOD || BaseImage->GetLODCount() == 1; int32 FirstLODDataOffset = 0; int32 NumRelevantElems = -1; if (BlendImage->Flags & FImage::IF_HAS_RELEVANCY_MAP && bOnlyFirstLOD) { check(BlendImage->RelevancyMaxY < BaseImage->GetSizeY()); check(BlendImage->RelevancyMaxY >= BlendImage->RelevancyMinY); uint16 SizeX = BaseImage->GetSizeX(); NumRelevantElems = (BlendImage->RelevancyMaxY - BlendImage->RelevancyMinY + 1) * SizeX; FirstLODDataOffset = BlendImage->RelevancyMinY * SizeX * 4; } const int32 NumLODs = BaseImage->GetLODCount(); constexpr int32 BatchNumElems = 4096 * 2; constexpr int32 BytesPerElem = 4; OpImageLayerInternal::FOpLayerBatchArgs BatchArgs; BatchArgs.BatchNumElems = BatchNumElems; BatchArgs.LODBegin = 0; BatchArgs.LODEnd = NumLODs; BatchArgs.FirstLODOffset = bOnlyFirstLOD ? FirstLODDataOffset : -1; BatchArgs.Result = BaseImage; BatchArgs.Blend = BlendImage; BatchArgs.ResultBytesPerElem = BytesPerElem; BatchArgs.BlendBytesPerElem = BytesPerElem; const int32 NumBatches = OpImageLayerInternal::GetOpLayerNumBatchesLODRangeOffsetViews(BatchArgs); // This will always be an upper-bound for bOnlyFirtsLODs, check if it performs as expected or it needs more fine tune. const int32 NumRelevantBatches = NumRelevantElems != -1 ? FMath::DivideAndRoundUp(NumRelevantElems, BatchNumElems) : NumBatches; ParallelExecutionUtils::InvokeBatchParallelFor(NumRelevantBatches, [ &BatchArgs ] (uint32 BatchId) { OpImageLayerInternal::FOpLayerBatchViews BatchViews = OpImageLayerInternal::GetOpLayerBatchLODRangeOffsetViews(BatchId, BatchArgs); uint8* BaseBuf = BatchViews.Result.GetData(); const uint8* BlendBuf = BatchViews.Blend.GetData(); for (int32 I = 0; I < BatchViews.NumElems; ++I) { // TODO: Optimize this (SIMD?) uint32 FullBase; FMemory::Memcpy(&FullBase, BaseBuf + I*sizeof(uint32), sizeof(uint32)); uint32 FullBlended; FMemory::Memcpy(&FullBlended, BlendBuf + I*sizeof(uint32), sizeof(uint32)); uint32 Mask = (FullBlended & 0xff000000) >> 24; uint32 FullResult = 0; FullResult |= BlendChannelMasked((FullBase >> 0) & 0xff, (FullBlended >> 0) & 0xff, Mask) << 0; FullResult |= BlendChannelMasked((FullBase >> 8) & 0xff, (FullBlended >> 8) & 0xff, Mask) << 8; FullResult |= BlendChannelMasked((FullBase >> 16) & 0xff, (FullBlended >> 16) & 0xff, Mask) << 16; FullResult |= LightenChannel ((FullBase >> 24) & 0xff, (FullBlended >> 24) & 0xff) << 24; FMemory::Memcpy(BaseBuf + I*sizeof(uint32), &FullResult, sizeof(uint32)); } }); } template< VectorRegister4Int (*RGB_FUNC_MASKED)(const VectorRegister4Int&, const VectorRegister4Int&, const VectorRegister4Int&), int32 (*A_FUNC)(int32, int32), bool CLAMP > void BufferLayerCompositeVector( FImage* BaseImage, const FImage* BlendImage, bool bOnlyFirstLOD, uint8 BlendAlphaSourceChannel) { check(BaseImage->GetFormat() == EImageFormat::RGBA_UByte); check(BlendImage->GetFormat() == EImageFormat::RGBA_UByte); check(BaseImage->GetSizeX() == BlendImage->GetSizeX() && BaseImage->GetSizeY() == BlendImage->GetSizeY()); check(bOnlyFirstLOD || BaseImage->GetLODCount() <= BlendImage->GetLODCount()); const int32 NumLODs = bOnlyFirstLOD ? 1 : BaseImage->GetLODCount(); constexpr int32 BatchNumElems = 4096*2; constexpr int32 BytesPerElem = 4; OpImageLayerInternal::FOpLayerBatchArgs BatchArgs; BatchArgs.BatchNumElems = BatchNumElems; BatchArgs.LODBegin = 0; BatchArgs.LODEnd = NumLODs; BatchArgs.Result = BaseImage; BatchArgs.Blend = BlendImage; BatchArgs.ResultBytesPerElem = BytesPerElem; BatchArgs.BlendBytesPerElem = BytesPerElem; const int32 NumBatches = OpImageLayerInternal::GetOpLayerNumBatchesLODRange(BatchArgs); ParallelExecutionUtils::InvokeBatchParallelFor(NumBatches, [ &BatchArgs, BlendAlphaSourceChannel ] (uint32 BatchId) { OpImageLayerInternal::FOpLayerBatchViews BatchViews = OpImageLayerInternal::GetOpLayerBatchLODRangeViews(BatchId, BatchArgs); uint8* BaseBuf = BatchViews.Result.GetData(); const uint8* BlendBuf = BatchViews.Blend.GetData(); for (int32 I = 0; I < BatchViews.NumElems; ++I) { // TODO: Optimize this (SIMD?) const int32 BaseAlpha = BaseBuf[4 * I + BlendAlphaSourceChannel]; const int32 BlendedAlpha = BlendBuf[4 * I + BlendAlphaSourceChannel]; const VectorRegister4Int Mask = VectorIntSet1(BlendedAlpha); uint32 BlendPixel; FMemory::Memcpy(&BlendPixel, BlendBuf, sizeof(uint32)); uint32 BasePixel; FMemory::Memcpy(&BasePixel, BaseBuf, sizeof(uint32)); const VectorRegister4Int Blended = MakeVectorRegisterInt( (BlendPixel >> 0) & 0xFF, (BlendPixel >> 8) & 0xFF, (BlendPixel >> 16) & 0xFF, (BlendPixel >> 24) & 0xFF); const VectorRegister4Int Base = MakeVectorRegisterInt( (BasePixel >> 0) & 0xFF, (BasePixel >> 8) & 0xFF, (BasePixel >> 16) & 0xFF, (BasePixel >> 24) & 0xFF); VectorRegister4Int Result = RGB_FUNC_MASKED(Base, Blended, Mask); if constexpr (CLAMP) { Result = VectorIntMin(MakeVectorRegisterIntConstant(255, 255, 255, 255), Result); } int32 AlphaResult = A_FUNC(BaseAlpha, BlendedAlpha); if constexpr (CLAMP) { AlphaResult = FMath::Min(255, AlphaResult); } alignas(VectorRegister4Int) int32 IndexableRegister[4]; VectorIntStoreAligned(Result, &IndexableRegister); BaseBuf[4 * I + 0] = static_cast(IndexableRegister[0]); BaseBuf[4 * I + 1] = static_cast(IndexableRegister[1]); BaseBuf[4 * I + 2] = static_cast(IndexableRegister[2]); BaseBuf[4 * I + 3] = static_cast(IndexableRegister[3]); BaseBuf[4 * I + BlendAlphaSourceChannel] = static_cast(AlphaResult); } }); } template void BufferLayerStrideNoAlpha(FImage* DestImage, int32 DestOffset, int32 Stride, const FImage* MaskImage, const FImage* BlendImage/*, int32 LODCount*/) { if (BlendImage->GetFormat() == EImageFormat::RGB_UByte) { BufferLayerFormatStrideNoAlpha (DestImage, DestOffset, Stride, MaskImage, BlendImage/*, LODCount*/); } else if (BlendImage->GetFormat() == EImageFormat::RGBA_UByte || BlendImage->GetFormat() == EImageFormat::BGRA_UByte) { BufferLayerFormatStrideNoAlpha (DestImage, DestOffset, Stride, MaskImage, BlendImage/*, LODCount*/); } else if (BlendImage->GetFormat() == EImageFormat::L_UByte) { BufferLayerFormatStrideNoAlpha (DestImage, DestOffset, Stride, MaskImage, BlendImage/*, LODCount*/); } else { checkf( false, TEXT("Unsupported format.") ); } } /** * Apply a blending function to an image with another image as blending layer, on a subrect of * the base image. * \warning this method applies the blending function to the alpha channel too * \warning this method uses the mask as a binary mask (>0) */ template void ImageLayerOnBaseNoAlpha( FImage* BaseImage, const FImage* MaskImage, const FImage* BlendedImage, const box& Rect) { check(BaseImage->GetSizeX() >= Rect.min[0] + Rect.size[0]); check(BaseImage->GetSizeY() >= Rect.min[1] + Rect.size[1]); check(MaskImage->GetSizeX() == BlendedImage->GetSizeX()); check(MaskImage->GetSizeY() == BlendedImage->GetSizeY()); check(BaseImage->GetFormat() == BlendedImage->GetFormat()); check(MaskImage->GetFormat() == EImageFormat::L_UByte || //UBYTE_RLE does not look to be supported. //MaskImage->GetFormat() == EImageFormat::L_UByteRLE || MaskImage->GetFormat() == EImageFormat::L_UBitRLE); check(BaseImage->GetLODCount() <= MaskImage->GetLODCount()); check(BaseImage->GetLODCount() <= BlendedImage->GetLODCount()); int32 PixelSize = GetImageFormatData(BaseImage->GetFormat()).BytesPerBlock; int32 Start = (BaseImage->GetSizeX() * Rect.min[1] + Rect.min[0]) * PixelSize; int32 Stride = (BaseImage->GetSizeX() - Rect.size[0]) * PixelSize; // Stride is only valid for LOD 0, BufferLayerStride variants cannot operate on multiple lods. // TODO: review if this needs to be supported, and implement using a rect lod reducction at this level. BufferLayerStrideNoAlpha(BaseImage, Start, Stride, MaskImage, BlendedImage/*, BaseImage->GetLODCount()*/); } template FORCEINLINE uint32 PackPixel(const uint8* PixelPtr) { static_assert(NC > 0 && NC <= 4); uint32 PixelPack = 0; // The compiler should be able to optimize this given that NC is a constant expression. FMemory::Memcpy(&PixelPack, PixelPtr, NC); return PixelPack; } template FORCEINLINE void UnpackPixel(uint8* PixelPtr, uint32 PixelData) { static_assert(NC > 0 && NC <= 4); // The compiler should be able to optimize this given that NC is a constant expression FMemory::Memcpy(PixelPtr, &PixelData, NC); } template< uint32 (*BLEND_FUNC)(uint32, uint32), uint32 NC> void BufferLayerCombineColour(FImage* ResultImage, const FImage* BaseImage, FVector4f Color, bool bOnlyFirstLOD = false) { static_assert(NC > 0 && NC <= 4); check(BaseImage->GetSizeX() == ResultImage->GetSizeX()); check(BaseImage->GetSizeY() == ResultImage->GetSizeY()); const uint32 TopColor = static_cast(255.0f * Color[0]) << 0 | static_cast(255.0f * Color[1]) << 8 | static_cast(255.0f * Color[2]) << 16; const int32 NumLODs = bOnlyFirstLOD ? 1 : ResultImage->GetLODCount(); constexpr int32 BatchNumElems = 4098*2; OpImageLayerInternal::FOpLayerBatchArgs BatchArgs; BatchArgs.BatchNumElems = BatchNumElems; BatchArgs.LODBegin = 0; BatchArgs.LODEnd = NumLODs; BatchArgs.Result = ResultImage; BatchArgs.Base = BaseImage; BatchArgs.ResultBytesPerElem = NC; BatchArgs.BaseBytesPerElem = NC; const int32 NumBatches = OpImageLayerInternal::GetOpLayerNumBatchesLODRange(BatchArgs); ParallelExecutionUtils::InvokeBatchParallelFor(NumBatches, [ &BatchArgs, TopColor ](int32 BatchId) { OpImageLayerInternal::FOpLayerBatchViews BatchViews = OpImageLayerInternal::GetOpLayerBatchLODRangeViews(BatchId, BatchArgs); const uint8* BaseBuf = BatchViews.Base.GetData(); uint8* ResultBuf = BatchViews.Result.GetData(); for (int32 I = 0; I < BatchViews.NumElems; ++I) { const uint32 Base = PackPixel(&BaseBuf[NC * I]); const uint32 Result = BLEND_FUNC(Base, TopColor); UnpackPixel(&ResultBuf[NC * I], Result); } }); } template< uint32 (*BLEND_FUNC)(uint32, uint32), uint32 NC> void BufferLayerCombine(FImage* ResultImage, const FImage* BaseImage, const FImage* BlendImage, bool bOnlyFirstLOD) { static_assert(NC > 0 && NC <= 4); check(BaseImage->GetSizeX() == BlendImage->GetSizeX()); check(BaseImage->GetSizeY() == BlendImage->GetSizeY()); check(BaseImage->GetSizeX() == ResultImage->GetSizeX()); check(BaseImage->GetSizeY() == ResultImage->GetSizeY()); check(BaseImage->GetFormat() == BlendImage->GetFormat()); check(BaseImage->GetFormat() == ResultImage->GetFormat()); const int32 NumLODs = bOnlyFirstLOD ? 1 : ResultImage->GetLODCount(); constexpr int32 BatchNumElems = 4098*2; OpImageLayerInternal::FOpLayerBatchArgs BatchArgs; BatchArgs.BatchNumElems = BatchNumElems; BatchArgs.LODBegin = 0; BatchArgs.LODEnd = NumLODs; BatchArgs.Result = ResultImage; BatchArgs.Base = BaseImage; BatchArgs.ResultBytesPerElem = NC; BatchArgs.BaseBytesPerElem = NC; const int32 NumBatches = OpImageLayerInternal::GetOpLayerNumBatchesLODRange(BatchArgs); ParallelExecutionUtils::InvokeBatchParallelFor(NumBatches, [ &BatchArgs ](int32 BatchId) { OpImageLayerInternal::FOpLayerBatchViews BatchViews = OpImageLayerInternal::GetOpLayerBatchLODRangeViews(BatchId, BatchArgs); const uint8* BaseBuf = BatchViews.Base.GetData(); const uint8* BlendBuf = BatchViews.Blend.GetData(); uint8* ResultBuf = BatchViews.Result.GetData(); for (int32 I = 0; I < BatchViews.NumElems; ++I) { const uint32 Base = PackPixel(&BaseBuf[NC * I]); const uint32 Blend = PackPixel(&BlendBuf[NC * I]); const uint32 Result = BLEND_FUNC(Base, Blend); UnpackPixel(&ResultBuf[NC * I], Result); } }); } template< uint32 (*BLEND_FUNC_MASKED)(uint32, uint32, uint32), uint32 NC> void BufferLayerCombine(FImage* ResultImage, const FImage* BaseImage, const FImage* MaskImage, const FImage* BlendImage, bool bOnlyFirstLOD) { static_assert(NC > 0 && NC <= 4); check(BaseImage->GetSizeX() == BlendImage->GetSizeX()); check(BaseImage->GetSizeY() == BlendImage->GetSizeY()); check(BaseImage->GetSizeX() == MaskImage->GetSizeX()); check(BaseImage->GetSizeY() == MaskImage->GetSizeY()); check(BaseImage->GetFormat() == BlendImage->GetFormat()); const int32 NumLODs = bOnlyFirstLOD ? 1 : ResultImage->GetLODCount(); constexpr int32 BatchNumElems = 4098*2; OpImageLayerInternal::FOpLayerBatchArgs BatchArgs; BatchArgs.BatchNumElems = BatchNumElems; BatchArgs.LODBegin = 0; BatchArgs.LODEnd = NumLODs; BatchArgs.Result = ResultImage; BatchArgs.Base = BaseImage; BatchArgs.Blend = BlendImage; BatchArgs.Mask = MaskImage; BatchArgs.ResultBytesPerElem = NC; BatchArgs.BaseBytesPerElem = NC; BatchArgs.BlendBytesPerElem = NC; BatchArgs.MaskBytesPerElem = 1; const int32 NumBatches = OpImageLayerInternal::GetOpLayerNumBatchesLODRange(BatchArgs); ParallelExecutionUtils::InvokeBatchParallelFor(NumBatches, [ &BatchArgs ](int32 BatchId) { OpImageLayerInternal::FOpLayerBatchViews BatchViews = OpImageLayerInternal::GetOpLayerBatchLODRangeViews(BatchId, BatchArgs); const uint8* BaseBuf = BatchViews.Base.GetData(); const uint8* BlendBuf = BatchViews.Blend.GetData(); const uint8* MaskBuf = BatchViews.Mask.GetData(); uint8* ResultBuf = BatchViews.Result.GetData(); // This could happen in case of missing data files. if (!BaseBuf || !BlendBuf || !MaskBuf) { return; } for (int32 I = 0; I < BatchViews.NumElems; ++I) { const uint32 Base = PackPixel(&BaseBuf[NC * I]); const uint32 Blend = PackPixel(&BlendBuf[NC * I]); const uint32 Mask = PackPixel<1>(&MaskBuf[1 * I]); const uint32 Result = BLEND_FUNC_MASKED(Base, Blend, Mask); UnpackPixel(&ResultBuf[NC * I], Result); } }); } template< uint32 (*BLEND_FUNC_MASKED)(uint32, uint32, uint32), uint32 NC> void BufferLayerCombineColour(FImage* ResultImage, const FImage* BaseImage, const FImage* MaskImage, FVector4f Color) { static_assert(NC > 0 && NC <= 4); check(BaseImage->GetSizeX() == MaskImage->GetSizeX()); check(BaseImage->GetSizeY() == MaskImage->GetSizeY()); const uint32 TopColor = static_cast(255.0f * Color[0]) << 0 | static_cast(255.0f * Color[1]) << 8 | static_cast(255.0f * Color[2]) << 16; constexpr int32 BatchNumElems = 4098*2; OpImageLayerInternal::FOpLayerBatchArgs BatchArgs; BatchArgs.BatchNumElems = BatchNumElems; BatchArgs.Result = ResultImage; BatchArgs.Base = BaseImage; BatchArgs.Mask = MaskImage; BatchArgs.ResultBytesPerElem = NC; BatchArgs.BaseBytesPerElem = NC; BatchArgs.MaskBytesPerElem = 1; const int32 NumBatches = OpImageLayerInternal::GetOpLayerNumBatches(BatchArgs); ParallelExecutionUtils::InvokeBatchParallelFor(NumBatches, [ &BatchArgs, TopColor ](int32 BatchId) { OpImageLayerInternal::FOpLayerBatchViews BatchViews = OpImageLayerInternal::GetOpLayerBatchViews(BatchId, BatchArgs); const uint8* BaseBuf = BatchViews.Base.GetData(); const uint8* MaskBuf = BatchViews.Mask.GetData(); uint8* ResultBuf = BatchViews.Result.GetData(); // This could happen in case of missing data files. if (!BaseBuf || !MaskBuf) { return; } for (int32 I = 0; I < BatchViews.NumElems; ++I) { const uint32 Base = PackPixel(&BaseBuf[NC * I]); const uint32 Mask = PackPixel<1>(&MaskBuf[1 * I]); const uint32 Result = BLEND_FUNC_MASKED(Base, TopColor, Mask); UnpackPixel(&ResultBuf[NC * I], Result); } }); } template void ImageLayerCombine(FImage* ResultImage, const FImage* BaseImage, const FImage* BlendedImage, bool bOnlyFirstLOD) { check(ResultImage->GetFormat() == BaseImage->GetFormat()); check(ResultImage->GetSizeX() == BaseImage->GetSizeX()); check(ResultImage->GetSizeY() == BaseImage->GetSizeY()); check(bOnlyFirstLOD || ResultImage->GetLODCount() == BaseImage->GetLODCount()); check(BaseImage->GetSizeX() == BlendedImage->GetSizeX()); check(BaseImage->GetSizeY() == BlendedImage->GetSizeY()); check(BaseImage->GetFormat() == BlendedImage->GetFormat()); check(bOnlyFirstLOD || ResultImage->GetLODCount() <= BlendedImage->GetLODCount()); const EImageFormat BaseFormat = BaseImage->GetFormat(); if (BaseFormat == EImageFormat::L_UByte) { BufferLayerCombine(ResultImage, BaseImage, BlendedImage, bOnlyFirstLOD); } else if (BaseFormat == EImageFormat::RGB_UByte) { BufferLayerCombine(ResultImage, BaseImage, BlendedImage, bOnlyFirstLOD); } else if (BaseFormat == EImageFormat::RGBA_UByte || BaseFormat == EImageFormat::BGRA_UByte) { // \todo: pass swizzle template argument if BGRA_UBYTE, not yet supported. BufferLayerCombine(ResultImage, BaseImage, BlendedImage, bOnlyFirstLOD); } else { checkf(false, TEXT("Unsupported format.")); } } template< uint32 (*BLEND_FUNC)(uint32, uint32), uint32 (*BLEND_FUNC_MASKED)(uint32, uint32, uint32)> void ImageLayerCombine(FImage* ResultImage, const FImage* BaseImage, const FImage* MaskImage, const FImage* BlendedImage, bool bOnlyFirstLOD) { check(ResultImage->GetFormat() == BaseImage->GetFormat()); check(ResultImage->GetSizeX() == BaseImage->GetSizeX()); check(ResultImage->GetSizeY() == BaseImage->GetSizeY()); check(bOnlyFirstLOD || ResultImage->GetLODCount() == BaseImage->GetLODCount()); check(BaseImage->GetSizeX() == BlendedImage->GetSizeX()); check(BaseImage->GetSizeY() == BlendedImage->GetSizeY()); check(BaseImage->GetFormat() == BlendedImage->GetFormat()); check(bOnlyFirstLOD || ResultImage->GetLODCount() <= BlendedImage->GetLODCount()); const EImageFormat BaseFormat = BaseImage->GetFormat(); TSharedPtr TempMaskImage; if (MaskImage->GetFormat() != EImageFormat::L_UByte) { UE_LOG(LogMutableCore, Log, TEXT("Image layer format not supported. A generic one will be used. ")); FImageOperator ImOp = FImageOperator::GetDefault(nullptr); constexpr int32 Quality = 4; TempMaskImage = ImOp.ImagePixelFormat( Quality, MaskImage, EImageFormat::L_UByte ); MaskImage = TempMaskImage.Get(); } if (BaseFormat == EImageFormat::L_UByte) { BufferLayerCombine(ResultImage, BaseImage, MaskImage, BlendedImage, bOnlyFirstLOD); } else if (BaseFormat == EImageFormat::RGB_UByte) { BufferLayerCombine(ResultImage, BaseImage, MaskImage, BlendedImage, bOnlyFirstLOD); } else if (BaseFormat == EImageFormat::RGBA_UByte || BaseFormat == EImageFormat::BGRA_UByte) { // \todo: pass swizzle template argument if BGRA_UBYTE, not yet supported. BufferLayerCombine(ResultImage, BaseImage, MaskImage, BlendedImage, bOnlyFirstLOD); } else { UE_LOG(LogMutableCore, Log, TEXT("Image layer format not supported. A generic one will be used. ")); FImageOperator ImOp = FImageOperator::GetDefault(nullptr); constexpr int32 Quality = 4; TSharedPtr TempBaseImage = ImOp.ImagePixelFormat(Quality, BaseImage, EImageFormat::RGBA_UByte); TSharedPtr TempBlededImage = ImOp.ImagePixelFormat(Quality, BlendedImage, EImageFormat::RGBA_UByte); BufferLayerCombine(ResultImage, TempBaseImage.Get(), MaskImage, TempBlededImage.Get(), bOnlyFirstLOD); } } template void ImageLayerCombineColour(FImage* ResultImage, const FImage* BaseImage, FVector4f Color) { check(ResultImage->GetFormat() == BaseImage->GetFormat()); check(ResultImage->GetSizeX() == BaseImage->GetSizeX()); check(ResultImage->GetSizeY() == BaseImage->GetSizeY()); check(ResultImage->GetLODCount() == BaseImage->GetLODCount()); const EImageFormat BaseFormat = BaseImage->GetFormat(); if (BaseFormat == EImageFormat::L_UByte) { BufferLayerCombineColour(ResultImage, BaseImage, Color); } else if (BaseFormat == EImageFormat::RGB_UByte) { BufferLayerCombineColour(ResultImage, BaseImage, Color); } else if (BaseFormat == EImageFormat::RGBA_UByte || BaseFormat == EImageFormat::BGRA_UByte) { // \todo: pass swizzle template argument if BGRA_UBYTE, not yet supported. BufferLayerCombineColour(ResultImage, BaseImage, Color); } else { checkf(false, TEXT("Unsupported format.")); } } template< uint32 (*BLEND_FUNC)(uint32, uint32), uint32 (*BLEND_FUNC_MASKED)(uint32, uint32, uint32)> void ImageLayerCombineColour(FImage* ResultImage, const FImage* BaseImage, const FImage* MaskImage, FVector4f Color) { check(ResultImage->GetFormat() == BaseImage->GetFormat()); check(ResultImage->GetSizeX() == BaseImage->GetSizeX()); check(ResultImage->GetSizeY() == BaseImage->GetSizeY()); check(ResultImage->GetLODCount() == BaseImage->GetLODCount()); const EImageFormat BaseFormat = BaseImage->GetFormat(); if (MaskImage->GetFormat() != EImageFormat::L_UByte) { checkf(false, TEXT("Unsupported mask format.")); BufferLayerCombineColour(ResultImage, BaseImage, Color); } if (BaseFormat == EImageFormat::L_UByte) { BufferLayerCombineColour(ResultImage, BaseImage, MaskImage, Color); } else if (BaseFormat == EImageFormat::RGB_UByte) { BufferLayerCombineColour(ResultImage, BaseImage, MaskImage, Color); } else if (BaseFormat == EImageFormat::RGBA_UByte || BaseFormat == EImageFormat::BGRA_UByte) { // \todo: pass swizzle template argument if BGRA_UBYTE, not yet supported. BufferLayerCombineColour(ResultImage, BaseImage, MaskImage, Color); } else { checkf(false, TEXT("Unsupported format.")); } } template void BufferLayerCombineFunctor(FImage* DestImage, const FImage* BaseImage, const FImage* BlendImage, ImageCombineFn&& ImageCombine) { static_assert(NCBase > 0 && NCBase <= 4); static_assert(NCBlend > 0 && NCBlend <= 4); check(BaseImage->GetFormat() == DestImage->GetFormat()); check(BaseImage->GetSizeX() == BlendImage->GetSizeX()); check(BaseImage->GetSizeY() == BlendImage->GetSizeY()); check(BaseImage->GetSizeX() == DestImage->GetSizeX()); check(BaseImage->GetSizeY() == DestImage->GetSizeY()); check(BaseImage->GetLODCount() <= BlendImage->GetLODCount()); check(BaseImage->GetLODCount() <= DestImage->GetLODCount()); constexpr int32 BatchNumElems = 4098*2; OpImageLayerInternal::FOpLayerBatchArgs BatchArgs; BatchArgs.BatchNumElems = BatchNumElems; BatchArgs.Result = DestImage; BatchArgs.Base = BaseImage; BatchArgs.Blend = BlendImage; BatchArgs.ResultBytesPerElem = NCBase; BatchArgs.BaseBytesPerElem = NCBase; BatchArgs.BlendBytesPerElem = NCBlend; const int32 NumBatches = OpImageLayerInternal::GetOpLayerNumBatches(BatchArgs); ParallelExecutionUtils::InvokeBatchParallelFor(NumBatches, [ &BatchArgs, ImageCombine ](int32 BatchId) { OpImageLayerInternal::FOpLayerBatchViews BatchViews = OpImageLayerInternal::GetOpLayerBatchViews(BatchId, BatchArgs); const uint8* BaseBuf = BatchViews.Base.GetData(); const uint8* BlendBuf = BatchViews.Blend.GetData(); uint8* DestBuf = BatchViews.Result.GetData(); for (int32 I = 0; I < BatchViews.NumElems; ++I) { const uint32 Base = PackPixel(&BaseBuf[NCBase * I]); const uint32 Blend = PackPixel(&BlendBuf[NCBlend * I]); const uint32 Result = ImageCombine(Base, Blend); UnpackPixel(&DestBuf[NCBase* I], Result); } }); } // Same functionality as above, in this case we use a functor which allows to pass user data. template void ImageLayerCombineFunctor(FImage* ResultImage, const FImage* BaseImage, const FImage* BlendedImage, ImageCombineFn&& ImageCombine) { check(ResultImage->GetFormat() == BaseImage->GetFormat()); check(ResultImage->GetSizeX() == BaseImage->GetSizeX()); check(ResultImage->GetSizeY() == BaseImage->GetSizeY()); check(ResultImage->GetLODCount() == BaseImage->GetLODCount()); check(BaseImage->GetSizeX() == BlendedImage->GetSizeX()); check(BaseImage->GetSizeY() == BlendedImage->GetSizeY()); check(ResultImage->GetLODCount() <= BlendedImage->GetLODCount()); const EImageFormat BaseFormat = BaseImage->GetFormat(); const EImageFormat BlendFormat = BlendedImage->GetFormat(); if (BaseFormat == EImageFormat::L_UByte ) { if (BlendFormat == EImageFormat::L_UByte ) { BufferLayerCombineFunctor<1, 1>(ResultImage, BaseImage, BlendedImage, Forward(ImageCombine)); } else if (BlendFormat == EImageFormat::RGB_UByte ) { BufferLayerCombineFunctor<1, 3>(ResultImage, BaseImage, BlendedImage, Forward(ImageCombine)); } else if (BlendFormat == EImageFormat::RGBA_UByte || BlendFormat == EImageFormat::BGRA_UByte) { BufferLayerCombineFunctor<1, 4>(ResultImage, BaseImage, BlendedImage, Forward(ImageCombine)); } else { checkf(false, TEXT("Unsupported format.")); } } else if (BaseFormat == EImageFormat::RGB_UByte) { if (BlendFormat == EImageFormat::L_UByte ) { BufferLayerCombineFunctor<3, 1>(ResultImage, BaseImage, BlendedImage, Forward(ImageCombine)); } else if (BlendFormat == EImageFormat::RGB_UByte ) { BufferLayerCombineFunctor<3, 3>(ResultImage, BaseImage, BlendedImage, Forward(ImageCombine)); } else if (BlendFormat == EImageFormat::RGBA_UByte || BlendFormat == EImageFormat::BGRA_UByte ) { BufferLayerCombineFunctor<3, 4>(ResultImage, BaseImage, BlendedImage, Forward(ImageCombine)); } else { checkf(false, TEXT("Unsupported format.")); } } else if (BaseFormat == EImageFormat::RGBA_UByte || BaseFormat == EImageFormat::BGRA_UByte) { if (BlendFormat == EImageFormat::L_UByte ) { BufferLayerCombineFunctor<4, 1>(ResultImage, BaseImage, BlendedImage, Forward(ImageCombine)); } else if (BlendFormat == EImageFormat::RGB_UByte ) { BufferLayerCombineFunctor<4, 3>(ResultImage, BaseImage, BlendedImage, Forward(ImageCombine)); } else if (BlendFormat == EImageFormat::RGBA_UByte || BlendFormat == EImageFormat::BGRA_UByte ) { BufferLayerCombineFunctor<4, 4>(ResultImage, BaseImage, BlendedImage, Forward(ImageCombine)); } else { checkf(false, TEXT("Unsupported format.")); } } else { checkf(false, TEXT("Unsupported format.")); } } //! Blend a subimage on the base using a mask. void ImageBlendOnBaseNoAlpha(FImage* BaseImage, const FImage* MaskImage, const FImage* BlendedImage, const box& Rect) { ImageLayerOnBaseNoAlpha(BaseImage, MaskImage, BlendedImage, Rect); } template void BufferLayer(FImage*, const FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayerEmbeddedMask(FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayerEmbeddedMask(FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayerEmbeddedMask(FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayerEmbeddedMask(FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayerEmbeddedMask(FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayerEmbeddedMask(FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayerEmbeddedMask(FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayerEmbeddedMask(FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayerEmbeddedMask(FImage*, const FImage*, const FImage*, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, bool, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, bool, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, bool, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, bool, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, bool, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, bool, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, bool, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, bool, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, bool, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, bool, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, bool, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, bool, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, bool, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, bool, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, bool, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, bool, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, bool, bool, bool); template void BufferLayer(FImage*, const FImage*, const FImage*, bool, bool, bool); template void BufferLayerInPlace(FImage*, const FImage*, bool, uint32, uint32); template void BufferLayerInPlace(FImage*, const FImage*, bool, uint32, uint32); template void BufferLayerInPlace(FImage*, const FImage*, bool, uint32, uint32); template void BufferLayerInPlace(FImage*, const FImage*, bool, uint32, uint32); template void BufferLayerInPlace(FImage*, const FImage*, bool, uint32, uint32); template void BufferLayerInPlace(FImage*, const FImage*, bool, uint32, uint32); template void BufferLayerInPlace(FImage*, const FImage*, bool, uint32, uint32); template void BufferLayerInPlace(FImage*, const FImage*, bool, uint32, uint32); template void BufferLayerInPlace(FImage*, const FImage*, bool, uint32, uint32); template void BufferLayerColour(FImage*, const FImage*, const FImage*, FVector4f); template void BufferLayerColour(FImage*, const FImage*, const FImage*, FVector4f); template void BufferLayerColour(FImage*, const FImage*, const FImage*, FVector4f); template void BufferLayerColour(FImage*, const FImage*, const FImage*, FVector4f); template void BufferLayerColour(FImage*, const FImage*, const FImage*, FVector4f); template void BufferLayerColour(FImage*, const FImage*, const FImage*, FVector4f); template void BufferLayerColour(FImage*, const FImage*, const FImage*, FVector4f); template void BufferLayerColour(FImage*, const FImage*, const FImage*, FVector4f); template void BufferLayerColour(FImage*, const FImage*, const FImage*, FVector4f); template void BufferLayerColourFromAlpha(FImage*, const FImage*, FVector4f); template void BufferLayerColourFromAlpha(FImage*, const FImage*, FVector4f); template void BufferLayerColourFromAlpha(FImage*, const FImage*, FVector4f); template void BufferLayerColourFromAlpha(FImage*, const FImage*, FVector4f); template void BufferLayerColourFromAlpha(FImage*, const FImage*, FVector4f); template void BufferLayerColourFromAlpha(FImage*, const FImage*, FVector4f); template void BufferLayerColourFromAlpha(FImage*, const FImage*, FVector4f); template void BufferLayerColourFromAlpha(FImage*, const FImage*, FVector4f); template void BufferLayerColour(FImage*, const FImage*, FVector4f); template void BufferLayerColour(FImage*, const FImage*, FVector4f); template void BufferLayerColour(FImage*, const FImage*, FVector4f); template void BufferLayerColour(FImage*, const FImage*, FVector4f); template void BufferLayerColour(FImage*, const FImage*, FVector4f); template void BufferLayerColour(FImage*, const FImage*, FVector4f); template void BufferLayerColour(FImage*, const FImage*, FVector4f); template void BufferLayerColour(FImage*, const FImage*, FVector4f); template void BufferLayerColourInPlace(FImage*, const FImage*, FVector4f, bool, uint32, uint8); template void BufferLayerColourInPlace(FImage*, const FImage*, FVector4f, bool, uint32, uint8); template void BufferLayerColourInPlace(FImage*, const FImage*, FVector4f, bool, uint32, uint8); template void BufferLayerColourInPlace(FImage*, const FImage*, FVector4f, bool, uint32, uint8); template void BufferLayerColourInPlace(FImage*, const FImage*, FVector4f, bool, uint32, uint8); template void BufferLayerColourInPlace(FImage*, const FImage*, FVector4f, bool, uint32, uint8); template void BufferLayerColourInPlace(FImage*, const FImage*, FVector4f, bool, uint32, uint8); template void BufferLayerColourInPlace(FImage*, const FImage*, FVector4f, bool, uint32, uint8); template void BufferLayerColourInPlace(FImage*, const FImage*, FVector4f, bool, uint32, uint8); template void BufferLayerColourInPlace(FImage*, FVector4f, bool, uint32, uint8); template void BufferLayerColourInPlace(FImage*, FVector4f, bool, uint32, uint8); template void BufferLayerColourInPlace(FImage*, FVector4f, bool, uint32, uint8); template void BufferLayerColourInPlace(FImage*, FVector4f, bool, uint32, uint8); template void BufferLayerColourInPlace(FImage*, FVector4f, bool, uint32, uint8); template void BufferLayerColourInPlace(FImage*, FVector4f, bool, uint32, uint8); template void BufferLayerColourInPlace(FImage*, FVector4f, bool, uint32, uint8); template void BufferLayerColourInPlace(FImage*, FVector4f, bool, uint32, uint8); template void BufferLayerColourInPlace(FImage*, FVector4f, bool, uint32, uint8); // BufferLayerComposite* uses are specializations, no need to explicitly instanciate. //template void BufferLayerComposite(FImage*, const FImage*, bool, uint8); //template void BufferLayerCompositeVector(FImage*, const FImage*, bool, uint8); template void ImageLayerCombine(FImage*, const FImage*, const FImage*, bool); template void ImageLayerCombine(FImage*, const FImage*, const FImage*, const FImage*, bool); template void ImageLayerCombineColour(FImage*, const FImage*, FVector4f); template void ImageLayerCombineColour(FImage*, const FImage*, const FImage*, FVector4f); template void ImageLayerCombineFunctor(FImage*, const FImage*, const FImage*, FNormalCompositeFunctor&&); template void ImageLayerCombineFunctor(FImage*, const FImage*, const FImage*, FNormalCompositeIdentityFunctor&&); }