// Copyright Epic Games, Inc. All Rights Reserved. // Generate vector truncation warnings to errors. #pragma warning(error: 3206) //------------------------------------------------------- ENUM VALUES #define CASTING_PASS_STANDALONE 0 #define CASTING_PASS_PROBE_OCCLUSION 1 #define DIFFUSE_TERM 0 #define SPECULAR_TERM 1 //------------------------------------------------------- CONFIGS #define DEBUG_SSRT 0 #define DEBUG_RAY_COUNT 0 #define CONFIG_LDS_STORE_VIEW_N 1 #if QUALITY == 1 #define CONFIG_RAY_STEPS 8 #define CONFIG_RAY_COUNT 4 #elif QUALITY == 2 #define CONFIG_RAY_STEPS 8 #define CONFIG_RAY_COUNT 8 #elif QUALITY == 3 #define CONFIG_RAY_STEPS 8 #define CONFIG_RAY_COUNT 16 #elif QUALITY == 4 #define CONFIG_RAY_STEPS 12 #define CONFIG_RAY_COUNT 32 #else #error Unknown Quality. #endif #if CONFIG_RAY_COUNT == 4 #define TILE_PIXEL_SIZE_X 8 #define TILE_PIXEL_SIZE_X_LOG 3 #define TILE_PIXEL_SIZE_Y 8 #define TILE_PIXEL_SIZE_Y_LOG 3 #elif CONFIG_RAY_COUNT == 8 #define TILE_PIXEL_SIZE_X 8 #define TILE_PIXEL_SIZE_X_LOG 3 #define TILE_PIXEL_SIZE_Y 4 #define TILE_PIXEL_SIZE_Y_LOG 2 #elif CONFIG_RAY_COUNT == 16 #define TILE_PIXEL_SIZE_X 4 #define TILE_PIXEL_SIZE_X_LOG 2 #define TILE_PIXEL_SIZE_Y 4 #define TILE_PIXEL_SIZE_Y_LOG 2 #elif CONFIG_RAY_COUNT == 32 #define TILE_PIXEL_SIZE_X 4 #define TILE_PIXEL_SIZE_X_LOG 2 #define TILE_PIXEL_SIZE_Y 2 #define TILE_PIXEL_SIZE_Y_LOG 1 #else #error Unknown Quality. #endif #if CONFIG_RAY_COUNT > 1 #define CONFIG_KARIS_WEIGHTING 1 #else #define CONFIG_KARIS_WEIGHTING 0 #endif //------------------------------------------------------- CONFIG DISABLED DEFAULTS #ifndef CONFIG_LDS_STORE_VIEW_N #define CONFIG_LDS_STORE_VIEW_N 0 #endif #define TILE_PIXEL_COUNT (TILE_PIXEL_SIZE_X * TILE_PIXEL_SIZE_Y) #define LANE_PER_GROUPS (TILE_PIXEL_COUNT * CONFIG_RAY_COUNT) //------------------------------------------------------- INCLUDES #if DIM_LIGHTING_TERM == DIFFUSE_TERM #define IS_SSGI_SHADER 1 #endif #include "SSRTRayCast.ush" #include "SSRTTileClassificationBuffer.ush" #include "../DeferredShadingCommon.ush" #include "../Random.ush" #include "../BRDF.ush" #include "../MonteCarlo.ush" #include "../SceneTextureParameters.ush" #include "../Substrate/Substrate.ush" #include "../HZB.ush" //------------------------------------------------------- PARAMETERS uint bRejectUncertainRays; Texture2D ProbeOcclusionDistanceTexture; Texture2D ColorTexture; #if SUPPORTS_INDEPENDENT_SAMPLERS #define ColorTextureSampler GlobalPointClampedSampler #else SamplerState ColorTextureSampler; #endif float4 ColorBufferScaleBias; float2 ReducedColorUVMax; float PixelPositionToFullResPixel; float2 FullResPixelOffset; RWTexture2D IndirectDiffuseOutput; RWTexture2D AmbientOcclusionOutput; RWTexture2D DebugOutput; //------------------------------------------------------- LDS groupshared uint SharedMemory0[LANE_PER_GROUPS * 2]; groupshared uint SharedMemory1[LANE_PER_GROUPS]; //------------------------------------------------------- FUNCTIONS #if LANE_PER_GROUPS == 64 && 0 #undef GroupMemoryBarrierWithGroupSync #define GroupMemoryBarrierWithGroupSync() #endif uint CompressN(float3 N) { // matches 8bits GBuffer A to be lossless. uint3 K = uint3(saturate(N * 0.5 + 0.5) * 255.0); return uint(K.x << 0 | K.y << 8 | K.z << 16); } float3 DecompressN(uint EncodedN) { uint3 K; K.x = (EncodedN >> 0) & 0xFF; K.y = (EncodedN >> 8) & 0xFF; K.z = (EncodedN >> 16) & 0xFF; return float3(K) * (2.0 / 255.0) - 1.0; } uint2 DecodeGroupPixelOffset(uint GroupPixelId) { return uint2(GroupPixelId % TILE_PIXEL_SIZE_X, (GroupPixelId >> TILE_PIXEL_SIZE_X_LOG) % TILE_PIXEL_SIZE_Y); } uint EncodeGroupPixelOffset(uint2 GroupPixelOffset) { return GroupPixelOffset.x | (GroupPixelOffset.y << TILE_PIXEL_SIZE_X_LOG); } uint2 ComputePixelPosition(uint2 GroupId, uint2 GroupPixelOffset) { return GroupId * uint2(TILE_PIXEL_SIZE_X, TILE_PIXEL_SIZE_Y) + GroupPixelOffset; } void UpdateLane2DCoordinateInformations( uint2 PixelPosition, inout float2 BufferUV, inout float2 ScreenPos, inout float2 FullResPixelPosition) { FullResPixelPosition = PixelPosition * PixelPositionToFullResPixel + FullResPixelOffset; // TODO: split screen BufferUV = FullResPixelPosition * View.BufferSizeAndInvSize.zw; ScreenPos = ViewportUVToScreenPos(FullResPixelPosition * View.ViewSizeAndInvSize.zw); } float3 ComputeTranslatedWorldPositions(float2 ScreenPos, float SceneDepth) { return mul(float4(GetScreenPositionForProjectionType(ScreenPos, SceneDepth), SceneDepth, 1), View.ScreenToTranslatedWorld).xyz; } uint2 ComputePixelPosition(float3 TranslatedWorldPosition) { float4 ClipPosition = mul(float4(TranslatedWorldPosition, 1), View.TranslatedWorldToClip); float2 ScreenPos = ClipPosition.xy * rcp(ClipPosition.w); float2 ViewportUV = ScreenPosToViewportUV(ScreenPos); return uint2(ViewportUV * View.ViewSizeAndInvSize.xy); } float ComputeSceneDepth(float3 TranslatedWorldPosition) { // TODO: do everything in view space instead of world space? return mul(float4(TranslatedWorldPosition, 1.0), View.TranslatedWorldToView).z; } uint2 ComputeRandomSeed(uint2 PixelPosition) { return Rand3DPCG16(int3(PixelPosition, View.StateFrameIndexMod8)).xy; } float3 ComputeL(float3 N, float2 E) { float3x3 TangentBasis = GetTangentBasis(N); #if 1 float3 TangentL = CosineSampleHemisphereConcentric(E).xyz; #else float3 TangentL = CosineSampleHemisphere(E).xyz; #endif return mul(TangentL, TangentBasis); } //------------------------------------------------------- ENTRY POINT [numthreads(TILE_PIXEL_SIZE_X, TILE_PIXEL_SIZE_Y, CONFIG_RAY_COUNT)] void MainCS( uint2 GroupId : SV_GroupID, uint GroupThreadIndex : SV_GroupIndex) { // Id of the wave in the group. uint GroupWaveIndex = GroupThreadIndex / 64; FSSRTTileInfos TileInfos; { const uint BinsAddress = TILE_PIXEL_COUNT * 2; uint GroupPixelId = GroupThreadIndex % TILE_PIXEL_COUNT; uint RaySequenceId = GroupThreadIndex / TILE_PIXEL_COUNT; // Compute TileCoord from GroupId to ensure the compiler understand it is group invariant to use scalar load. uint2 TileCoord = GroupId / uint2(SSRT_TILE_RES_DIVISOR / TILE_PIXEL_SIZE_X, SSRT_TILE_RES_DIVISOR / TILE_PIXEL_SIZE_Y); TileInfos = LoadTileInfos(TileCoord); // Store GBuffer into LDS BRANCH if (RaySequenceId == 0) { uint2 GroupPixelOffset = DecodeGroupPixelOffset(GroupPixelId); uint2 PixelPosition = ComputePixelPosition(GroupId, GroupPixelOffset); float2 BufferUV = 0; float2 ScreenPos = 0; float2 FullResPixelPosition = 0; UpdateLane2DCoordinateInformations(PixelPosition, /* out */ BufferUV, /* out */ ScreenPos, /* out */ FullResPixelPosition); #if SUBTRATE_GBUFFER_FORMAT==1 const FSubstrateTopLayerData TopLayerData = SubstrateUnpackTopLayerData(Substrate.TopLayerTexture.Load(uint3(FullResPixelPosition, 0))); const float Roughness = TopLayerData.Roughness; const float3 WorldNormal = TopLayerData.WorldNormal; const bool bIsValid = IsSubstrateMaterial(TopLayerData); #else // SUBTRATE_GBUFFER_FORMAT==1 FGBufferData GBuffer = GetGBufferDataFromSceneTextures(BufferUV); const float Roughness = GBuffer.Roughness; const float3 WorldNormal = GBuffer.WorldNormal; const bool bIsValid = GBuffer.ShadingModelID != SHADINGMODELID_UNLIT; #endif // SUBTRATE_GBUFFER_FORMAT==1 float DeviceZ = SceneDepthTexture.SampleLevel(SceneDepthTextureSampler, BufferUV, 0).r; float ProbeOcclusionDistance = ProbeOcclusionDistanceTexture.Load(int3(PixelPosition, 0)).x; bool bTraceRay = bIsValid; #if CONFIG_LDS_STORE_VIEW_N SharedMemory0[TILE_PIXEL_COUNT * 0 | GroupPixelId] = CompressN(mul(float4(WorldNormal, 0), View.TranslatedWorldToView).xyz); #else SharedMemory0[TILE_PIXEL_COUNT * 0 | GroupPixelId] = CompressN(WorldNormal); #endif SharedMemory0[TILE_PIXEL_COUNT * 1 | GroupPixelId] = asuint(bTraceRay ? DeviceZ : -1.0); #if DIM_LIGHTING_TERM == DIFFUSE_TERM { // NOP } #elif DIM_LIGHTING_TERM == SPECULAR_TERM { SharedMemory1[GroupPixelId] = asuint(Roughness); } #else #error Unnimplemented #endif } else if (GroupWaveIndex == 1) // TODO. { // Clears the bins SharedMemory0[BinsAddress | GroupPixelId] = 0; } } GroupMemoryBarrierWithGroupSync(); // Shoot ray { uint GroupPixelId; uint RaySequenceId; uint CompressedN; float DeviceZ; float Roughness; float ProbeOcclusionDistance; bool bTraceRay; { GroupPixelId = GroupThreadIndex % TILE_PIXEL_COUNT; RaySequenceId = GroupThreadIndex / TILE_PIXEL_COUNT; uint Raw0 = SharedMemory0[TILE_PIXEL_COUNT * 0 | GroupPixelId]; uint Raw1 = SharedMemory0[TILE_PIXEL_COUNT * 1 | GroupPixelId]; uint Raw2 = SharedMemory1[TILE_PIXEL_COUNT * 0 | GroupPixelId]; CompressedN = Raw0; DeviceZ = asfloat(Raw1); bTraceRay = asfloat(Raw1) > 0; #if DIM_LIGHTING_TERM == DIFFUSE_TERM { Roughness = 0; ProbeOcclusionDistance = 0; } #elif DIM_LIGHTING_TERM == SPECULAR_TERM { Roughness = asfloat(Raw2); ProbeOcclusionDistance = 0; } #else #error Unnimplemented #endif } GroupMemoryBarrierWithGroupSync(); #if DEBUG_RAY_COUNT float DebugRayCount = 0.0; #endif uint2 CompressedColor; if (RaySequenceId == 0 && 0) { uint2 GroupPixelOffset = DecodeGroupPixelOffset(GroupPixelId); uint2 PixelPosition = ComputePixelPosition(GroupId, GroupPixelOffset); //DebugOutput[PixelPosition] = float4(DeviceZ, bTraceRay ? 1 : 0, 0, 0); //DebugOutput[PixelPosition] = float4(N * 0.5 + 0.5, 0); } BRANCH if (bTraceRay) { float3 N; float3 ViewN; #if CONFIG_LDS_STORE_VIEW_N { ViewN = DecompressN(CompressedN); N = mul(float4(ViewN, 0), View.ViewToTranslatedWorld).xyz; } #else { N = DecompressN(CompressedN); ViewN = mul(float4(N, 0), View.TranslatedWorldToView).xyz; } #endif float a = Roughness * Roughness; float a2 = a * a; float SceneDepth = ConvertFromDeviceZ(DeviceZ); uint2 GroupPixelOffset = DecodeGroupPixelOffset(GroupPixelId); uint2 PixelPosition = ComputePixelPosition(GroupId, GroupPixelOffset); float2 BufferUV = 0; float2 ScreenPos = 0; float2 FullResPixelPosition = 0; UpdateLane2DCoordinateInformations(PixelPosition, /* out */ BufferUV, /* out */ ScreenPos, /* out */ FullResPixelPosition); float3 PositionTranslatedWorld = mul(float4(GetScreenPositionForProjectionType(ScreenPos, SceneDepth), SceneDepth, 1), View.ScreenToTranslatedWorld).xyz; float3 V = -GetCameraVectorFromTranslatedWorldPosition(PositionTranslatedWorld); float StepOffset = InterleavedGradientNoise(PixelPosition + 0.5, View.StateFrameIndexMod8); #if !SSGI_TRACE_CONE StepOffset -= 0.9; #endif bool bDebugPrint = all(PixelPosition == uint2(View.ViewSizeAndInvSize.xy) / 2); // Initialize the ray FSSRTRay Ray; float RayRoughness; float3 L; bool bRayWasClipped; #if DIM_LIGHTING_TERM == DIFFUSE_TERM { uint2 RandomSeed = ComputeRandomSeed(PixelPosition); float2 E = Hammersley16(RaySequenceId, CONFIG_RAY_COUNT, RandomSeed); float3 ViewL = ComputeL(ViewN, E); Ray = InitScreenSpaceRay(ScreenPos, DeviceZ, ViewL); bRayWasClipped = true; // TODO(Guillaume) RayRoughness = 1.0; L = 0.0; } #elif DIM_LIGHTING_TERM == SPECULAR_TERM { uint2 RandomSeed = ComputeRandomSeed(PixelPosition); float2 E = Hammersley16(RaySequenceId, CONFIG_RAY_COUNT, RandomSeed); float3x3 TangentBasis = GetTangentBasis(N); float3 TangentV = mul(TangentBasis, V); float3 H = mul(ImportanceSampleVisibleGGX(E, a, TangentV).xyz, TangentBasis); L = 2 * dot( V, H ) * H - V; Ray = InitScreenSpaceRayFromWorldSpace( PositionTranslatedWorld, L, /* WorldTMax = */ SceneDepth, /* SceneDepth = */ SceneDepth, /* SlopeCompareToleranceScale */ 2.0f, /* bExtendRayToScreenBorder = */ true, /* out */ bRayWasClipped); RayRoughness = Roughness * 0.25; } #else #error Unnimplemented #endif // Cast the ray float Level; float3 HitUVz; bool bHit; bool bUncertain; { float3 DebugOutput; CastScreenSpaceRay( FurthestHZBTexture, FurthestHZBTextureSampler, /* StartMipLevel = */ 1.0, CreateDefaultCastSettings(), Ray, RayRoughness, CONFIG_RAY_STEPS, StepOffset, HZBUvFactorAndInvFactor, bDebugPrint, /* out */ DebugOutput, /* out */ HitUVz, /* out */ Level, /* out */ bHit, /* out */ bUncertain); #if DEBUG_RAY_COUNT DebugRayCount += 1.0; #endif } // Ray is also uncertain if it has been clipped. bUncertain = bUncertain || bRayWasClipped; #if 0 // Backface check if (bHit) { float3 SampleNormal = GetGBufferDataFromSceneTextures(HitUVz.xy).WorldNormal; bHit = dot(SampleNormal, L) < 0; } #endif // if there was a hit BRANCH if (bHit) { float2 ReducedColorUV = HitUVz.xy * ColorBufferScaleBias.xy + ColorBufferScaleBias.zw; ReducedColorUV = min(ReducedColorUV, ReducedColorUVMax); float4 SampleColor = ColorTexture.SampleLevel(ColorTextureSampler, ReducedColorUV, Level); //SampleColor = float4(ReducedColorUV.xy, 0, 1); float SampleColorWeight = 1.0; #if CONFIG_KARIS_WEIGHTING SampleColorWeight *= rcp( 1 + Luminance(SampleColor.rgb) ); #endif float3 DiffuseColor = SampleColor.rgb * SampleColorWeight; float AmbientOcclusion = 1.0; #if CONFIG_COLOR_TILE_CLASSIFICATION { float Lumi = Luminance(DiffuseColor.rgb); AmbientOcclusion *= saturate(Lumi / 0.25); } #endif CompressedColor.x = asuint(f32tof16(DiffuseColor.r) << 16 | f32tof16(DiffuseColor.g)); CompressedColor.y = asuint(f32tof16(DiffuseColor.b) << 16 | f32tof16(AmbientOcclusion)); } else { CompressedColor = uint2(0, 0); } } else // if (!bTraceRay) { CompressedColor = uint2(0, 0); } // Output debugging info instead of actual intersection. #if DEBUG_RAY_COUNT { CompressedColor.x = asuint(f32tof16(DebugRayCount) << 16); CompressedColor.y = 0; } #endif uint DestPos = GroupPixelId + RaySequenceId * TILE_PIXEL_COUNT; SharedMemory0[LANE_PER_GROUPS * 0 | DestPos] = CompressedColor.x; SharedMemory0[LANE_PER_GROUPS * 1 | DestPos] = CompressedColor.y; } GroupMemoryBarrierWithGroupSync(); // Store ray to UAV BRANCH if (GroupThreadIndex < TILE_PIXEL_COUNT) { const uint GroupPixelId = GroupThreadIndex; float3 DiffuseColor = 0; float AmbientOcclusion = 0; uint SampleMask = 0; UNROLL_N(CONFIG_RAY_COUNT) for (uint RaySequenceId = 0; RaySequenceId < CONFIG_RAY_COUNT; RaySequenceId++) { uint SrcPos = GroupPixelId + RaySequenceId * TILE_PIXEL_COUNT; uint Row0 = SharedMemory0[LANE_PER_GROUPS * 0 | SrcPos]; uint Row1 = SharedMemory0[LANE_PER_GROUPS * 1 | SrcPos]; DiffuseColor.r += f16tof32(Row0 >> 16); DiffuseColor.g += f16tof32(Row0 >> 0); DiffuseColor.b += f16tof32(Row1 >> 16); AmbientOcclusion += f16tof32(Row1 >> 0); SampleMask |= (f16tof32(Row1 >> 0) > 0.0) ? (1u << RaySequenceId) : 0; } #if CONFIG_RAY_COUNT > 1 { DiffuseColor *= rcp(float(CONFIG_RAY_COUNT)); AmbientOcclusion *= rcp(float(CONFIG_RAY_COUNT)); } #endif #if CONFIG_KARIS_WEIGHTING { DiffuseColor *= rcp( 1 - Luminance(DiffuseColor) ); } #endif // TODO Guillaume: need to have engine wide consistent solution for IndirectLightingColorScale. //DiffuseColor *= View.IndirectLightingColorScale; AmbientOcclusion = 1 - AmbientOcclusion; uint2 GroupPixelOffset = DecodeGroupPixelOffset(GroupPixelId); uint2 OutputPixelCoordinate = ComputePixelPosition(GroupId, GroupPixelOffset); // Output. IndirectDiffuseOutput[OutputPixelCoordinate] = float4(DiffuseColor, 1.0); AmbientOcclusionOutput[OutputPixelCoordinate] = AmbientOcclusion; } // if (GroupThreadIndex < TILE_PIXEL_COUNT) } // MainCS()