// Copyright Epic Games, Inc. All Rights Reserved. #pragma once #ifndef OIT_ENABLED #define OIT_ENABLED 0 #endif #ifndef OIT_IS_BASEPASS #define OIT_IS_BASEPASS 0 #endif #if OIT_ENABLED ////////////////////////////////////////////////////////////////////////////////////////// // Sample storage format #define OIT_FORMAT_10BITS 1 #define OIT_FORMAT_16BITS 2 #define OIT_FORMAT_COMPACT 3 #define OIT_FORMAT OIT_FORMAT_10BITS ////////////////////////////////////////////////////////////////////////////////////////// struct FOITSample { float3 Color; float Depth; float3 Trans; float Pad; }; ////////////////////////////////////////////////////////////////////////////////////////// uint OITPack10F(float Value) { return (f32tof16(Value) >> 5) & 0x000003FF; } float OITUnpack10F(uint Value) { return f16tof32((Value << 5) & 0x7FE0); } uint OITPack6F(float Value) { return (f32tof16(Value) >> 5) & 0x000003FF; } float OITUnpack6F(uint Value) { return f16tof32((Value << 5) & 0x7FE0); } void OITSwap(inout FOITSample A, inout FOITSample B) { FOITSample T = A; A = B; B = T; } uint OITPackR11G11B10F(float3 rgb) { uint r = (f32tof16(rgb.r) << 17) & 0xFFE00000; uint g = (f32tof16(rgb.g) << 6) & 0x001FFC00; uint b = (f32tof16(rgb.b) >> 5) & 0x000003FF; return r | g | b; } float3 OITUnpackR11G11B10F(uint rgb) { float r = f16tof32((rgb >> 17) & 0x7FF0); float g = f16tof32((rgb >> 6) & 0x7FF0); float b = f16tof32((rgb << 5) & 0x7FE0); return float3(r, g, b); } ////////////////////////////////////////////////////////////////////////////////////////// // Per-attribut compaction 12bytes/96bits #if OIT_FORMAT == OIT_FORMAT_10BITS #define FOITPackedSample uint3 #define OIT_DATA_COUNT 3 FOITPackedSample PackOITSample(FOITSample In) { FOITPackedSample Out = (FOITPackedSample)0; Out.x = asuint(In.Depth); Out.y = OITPackR11G11B10F(In.Color); Out.z = OITPackR11G11B10F(saturate(In.Trans)); return Out; } FOITSample UnpackOITSample(FOITPackedSample In) { FOITSample Out = (FOITSample)0; Out.Depth = asfloat(In.x); Out.Color = OITUnpackR11G11B10F(In.y); Out.Trans = saturate(OITUnpackR11G11B10F(In.z)); return Out; } #endif ////////////////////////////////////////////////////////////////////////////////////////// // Fat compaction 16bytes/128bits #if OIT_FORMAT == OIT_FORMAT_16BITS #define FOITPackedSample uint4 #define OIT_DATA_COUNT 4 FOITPackedSample PackOITSample(float3 InColor, float3 InTrans, float InDepth) { uint4 Out = 0; Out.x = f32tof16(InColor.r) | (f32tof16(InColor.g) << 16); Out.y = f32tof16(InColor.b) | (f32tof16(InTrans.r) << 16); Out.z = f32tof16(InTrans.g) | (f32tof16(InTrans.b) << 16); Out.w = InDepth; return Out; } void UnpackOITSample(FOITPackedSample In, inout float3 OutColor, inout float3 OutTrans, inout float OutDepth) { OutColor.r = f16tof32(0xFFFF & (In.x)); OutColor.g = f16tof32(0xFFFF & (In.x >> 16)); OutColor.b = f16tof32(0xFFFF & (In.y)); OutTrans.r = f16tof32(0xFFFF & (In.y >> 16)); OutTrans.g = f16tof32(0xFFFF & (In.z)); OutTrans.b = f16tof32(0xFFFF & (In.z >> 16)); OutDepth = In.w; } #endif // OIT_FORMAT_16BITS ////////////////////////////////////////////////////////////////////////////////////////// // Compact sample into 8bytes/64bits (untested|add dithering if banding) #if OIT_FORMAT == OIT_FORMAT_COMPACT #define FOITPackedSample uint2 #define OIT_DATA_COUNT 2 FOITPackedSample PackOITSample(float3 InColor, float3 InTrans, float InDepth) { uint ColorR10 = OITPack10F(InColor.r); uint ColorG10 = OITPack10F(InColor.g); uint ColorB10 = OITPack10F(InColor.b); uint TransR6 = OITPack6F(InTrans.r); uint TransG6 = OITPack6F(InTrans.g); uint TransB6 = OITPack6F(InTrans.b); uint Depth16 = f32tof16(InDepth); FOITPackedSample Out = 0; Out.x = ColorR10 | (ColorG10 << 10) | (ColorB10 << 20) | (((TransB6 >> 4) & 0x3) << 30); Out.y = Depth16 | (TransR6 << 16) | (TransG6 << 22) | (TransB6 & 0xF) << 28); return Out; } void UnpackOITSample(FOITPackedSample In, inout float3 OutColor, inout float3 OutTrans, inout float OutDepth) { OutColor.r = OITUnpack10F((In.x) & 3FF); OutColor.g = OITUnpack10F((In.x>>10) & 3FF); OutColor.b = OITUnpack10F((In.x>>20) & 3FF); OutTrans.r = OITPack6F( (In.y>>16) & 0x3F); OutTrans.g = OITPack6F( (In.y>>22) & 0x3F); OutTrans.b = OITPack6F(((In.y>>28) & 0xF) | ((In.x>>30) & 0x3)); OutDepth = f16tof32(In.y & 0xFFFF); } #endif // OIT_FORMAT_COMPACT ////////////////////////////////////////////////////////////////////////////////////////// FOITSample OITLoadSample(Texture2DArray InTexture, uint2 InPixelCoord) { FOITPackedSample Packed = (FOITPackedSample)0; Packed.x = InTexture[uint3(InPixelCoord, 0)]; #if OIT_DATA_COUNT > 1 Packed.y = InTexture[uint3(InPixelCoord, 1)]; #endif #if OIT_DATA_COUNT > 2 Packed.z = InTexture[uint3(InPixelCoord, 2)]; #endif #if OIT_DATA_COUNT > 3 Packed.w = InTexture[uint3(InPixelCoord, 3)]; #endif return UnpackOITSample(Packed); } #if OIT_IS_BASEPASS ////////////////////////////////////////////////////////////////////////////////////////// // Special Add/Load/Store sample function for base-pass FOITSample OITLoadSample(uint2 InPixelCoord) { FOITPackedSample Packed = (FOITPackedSample)0; Packed.x = TranslucentBasePass.OIT.OutOITSampleData[uint3(InPixelCoord, 0)]; #if OIT_DATA_COUNT > 1 Packed.y = TranslucentBasePass.OIT.OutOITSampleData[uint3(InPixelCoord, 1)]; #endif #if OIT_DATA_COUNT > 2 Packed.z = TranslucentBasePass.OIT.OutOITSampleData[uint3(InPixelCoord, 2)]; #endif #if OIT_DATA_COUNT > 3 Packed.w = TranslucentBasePass.OIT.OutOITSampleData[uint3(InPixelCoord, 3)]; #endif return UnpackOITSample(Packed); } void OITStoreSample(uint2 InPixelCoord, FOITSample In) { FOITPackedSample Packed = PackOITSample(In); TranslucentBasePass.OIT.OutOITSampleData[uint3(InPixelCoord, 0)] = Packed.x; #if OIT_DATA_COUNT > 1 TranslucentBasePass.OIT.OutOITSampleData[uint3(InPixelCoord, 1)] = Packed.y; #endif #if OIT_DATA_COUNT > 2 TranslucentBasePass.OIT.OutOITSampleData[uint3(InPixelCoord, 2)] = Packed.z; #endif #if OIT_DATA_COUNT > 3 TranslucentBasePass.OIT.OutOITSampleData[uint3(InPixelCoord, 4)] = Packed.w; #endif } // Simple k-buffer insertion void AddOITSample_KB(uint2 InPixelCoord, float3 InColor, float3 InTrans, float InDepth) { uint SampleOffset = 0; InterlockedAdd(TranslucentBasePass.OIT.OutOITSampleCount[InPixelCoord], 1u, SampleOffset); if (SampleOffset < TranslucentBasePass.OIT.MaxSamplePerPixel) { const uint2 SampleLocalCoord = uint2(SampleOffset % TranslucentBasePass.OIT.MaxSideSamplePerPixel, SampleOffset / TranslucentBasePass.OIT.MaxSideSamplePerPixel); const uint2 SampleCoord = InPixelCoord * TranslucentBasePass.OIT.MaxSideSamplePerPixel + SampleLocalCoord; FOITSample Sample = (FOITSample)0; Sample.Color = InColor; Sample.Trans = InTrans; Sample.Depth = InDepth; OITStoreSample(SampleCoord, Sample); } } uint2 GetStoringBaseCoord(uint2 InPixelCoord) { return InPixelCoord * TranslucentBasePass.OIT.MaxSideSamplePerPixel; } uint2 GetStoringCoord(uint2 InBaseCoord, uint InSampleIt) { const uint2 SampleOffset = uint2(InSampleIt % TranslucentBasePass.OIT.MaxSideSamplePerPixel, InSampleIt / TranslucentBasePass.OIT.MaxSideSamplePerPixel); return InBaseCoord + SampleOffset; } // Multi-Layer Alpha Blending (MLAB) - Sample are inserted in a sorted manner and overflow sample are merged with alpha blending void AddOITSample_MLAB(uint2 InPixelCoord, float3 InColor, float3 InTrans, float InDepth) { RASTER_ORDERED_VIEW_LOCK(); // ROV read to for pixel synchronization uint SampleCount = min(TranslucentBasePass.OIT.OutOITSampleCount[InPixelCoord], TranslucentBasePass.OIT.MaxSamplePerPixel); FOITSample Current = (FOITSample)0; Current.Depth = InDepth; Current.Color = InColor; Current.Trans = InTrans; const uint2 SampleBaseCoord = GetStoringBaseCoord(InPixelCoord); // Check if sample fall into the merged/combined bucket, and if it needs to be discard due to transmission threshold bool bNeedInsertSort = true; if (SampleCount > 0) { const uint2 LastSampleCoord = GetStoringCoord(SampleBaseCoord, SampleCount-1); const FOITSample LastSample = OITLoadSample(LastSampleCoord); if (Current.Depth > LastSample.Depth) // Scene depth (not inv-Z) { if (all(LastSample.Trans < TranslucentBasePass.OIT.TransmittanceThreshold)) { // The return is import here as it avoid GPU hang. Discard is not enough here, probably due to ROV/UAV output discard; return; } } } // Bubble sort and insert if (bNeedInsertSort) { for (uint SampleIt = 0; SampleIt < SampleCount; ++SampleIt) { const uint2 SampleCoord = GetStoringCoord(SampleBaseCoord, SampleIt); const FOITSample Sample = OITLoadSample(SampleCoord); if (Current.Depth < Sample.Depth) // Scene depth (not inv-Z) { OITStoreSample(SampleCoord, Current); Current = Sample; } } } // Merge last fragment if (SampleCount >= TranslucentBasePass.OIT.MaxSamplePerPixel) { const uint2 SampleCoord = GetStoringCoord(SampleBaseCoord, TranslucentBasePass.OIT.MaxSamplePerPixel - 1); FOITSample Last = OITLoadSample(SampleCoord); Last.Color = Last.Color + Last.Trans * Current.Color; Last.Trans = saturate(Last.Trans * Current.Trans); Last.Depth = min(Last.Depth, Current.Depth); // Normally Last.Depth should already be the min. depth OITStoreSample(SampleCoord, Last); } else { // If there are still room to store sample (i.e., not over budget), insert the sample at the end (note: SampleCount has not being incremented yet) const uint2 SampleCoord = GetStoringCoord(SampleBaseCoord, SampleCount); OITStoreSample(SampleCoord, Current); } // Write to ROV to unlock pixel SampleCount = min(SampleCount + 1, TranslucentBasePass.OIT.MaxSamplePerPixel); TranslucentBasePass.OIT.OutOITSampleCount[InPixelCoord] = SampleCount; RASTER_ORDERED_VIEW_UNLOCK(); } // Add a translucent sample to the list of existing translucent samples // InColor : Radiance/Luminance reflected by the surface (pre-multiplied alpha) // InTrans : Transmittance of the surface (i.e., 1-Opacity) // InDepth : Scene depth (i.e. not DeviceZ) void AddOITSample(uint2 InPixelCoord, float3 InColor, float3 InTrans, float InDepth) { AddOITSample_MLAB(InPixelCoord, InColor, InTrans, InDepth); } ////////////////////////////////////////////////////////////////////////////////////////// #endif // OIT_IS_BASEPASS #endif // OIT_ENABLED