// Copyright Epic Games, Inc. All Rights Reserved. /*============================================================================= PostProcessSubsurface.usf: Screenspace subsurface scattering shaders. =============================================================================*/ #include "Common.ush" #include "Substrate/Substrate.ush" #include "PostProcessCommon.ush" #include "DeferredShadingCommon.ush" #include "PostProcessSubsurfaceCommon.ush" // for VisualizeSSS #include "MiniFontCommon.ush" #include "ScreenPass.ush" Texture2D SubsurfaceInput0_Texture; Texture2D SubsurfaceInput1_Texture; Texture2D SubsurfaceInput2_Texture; Texture2D SubsurfaceInput3_Texture; SamplerState SubsurfaceSampler0; SamplerState SubsurfaceSampler1; SamplerState SubsurfaceSampler2; SamplerState SubsurfaceSampler3; #if SUPPORTS_INDEPENDENT_SAMPLERS #define SharedSubsurfaceSampler0 SubsurfaceSampler0 #define SharedSubsurfaceSampler1 SubsurfaceSampler0 #define SharedSubsurfaceSampler2 SubsurfaceSampler0 #define SharedSubsurfaceSampler3 SubsurfaceSampler0 #else #define SharedSubsurfaceSampler0 SubsurfaceSampler0 #define SharedSubsurfaceSampler1 SubsurfaceSampler1 #define SharedSubsurfaceSampler2 SubsurfaceSampler2 #define SharedSubsurfaceSampler3 SubsurfaceSampler3 #endif SCREEN_PASS_TEXTURE_VIEWPORT(SubsurfaceInput0) SCREEN_PASS_TEXTURE_VIEWPORT(SubsurfaceInput1) SCREEN_PASS_TEXTURE_VIEWPORT(Output) RWTexture2D ProfileIdTexture; //SUBSURFACE_SETUP_PASS #define SUBSURFACE_SETUP_PASS_MAIN 0 #define SUBSURFACE_SETUP_PASS_PASSTHROUGH 1 #define SUBSURFACE_PASS_ONE 0 #define SUBSURFACE_PASS_TWO 1 #define SUBSURFACE_DIRECTION_HORIZONTAL SUBSURFACE_PASS_ONE #define SUBSURFACE_DIRECTION_VERTICAL SUBSURFACE_PASS_TWO // Controls the quality (number of samples) of the blur kernel. #define SUBSURFACE_QUALITY_LOW 0 #define SUBSURFACE_QUALITY_MEDIUM 1 #define SUBSURFACE_QUALITY_HIGH 2 // Full resolution recombine. #define SUBSURFACE_RECOMBINE_MODE_FULLRES 0 // Half resolution recombine. #define SUBSURFACE_RECOMBINE_MODE_HALFRES 1 // Just reconstruct the lighting (needed for scalability). #define SUBSURFACE_RECOMBINE_MODE_NO_SCATTERING 2 // Controls the quality of lighting reconstruction. #define SUBSURFACE_RECOMBINE_QUALITY_LOW 0 #define SUBSURFACE_RECOMBINE_QUALITY_HIGH 1 #ifndef SUBSURFACE_RECOMBINE_QUALITY #define SUBSURFACE_RECOMBINE_QUALITY SUBSURFACE_RECOMBINE_QUALITY_LOW #endif #if SUBSURFACE_RECOMBINE_QUALITY == SUBSURFACE_RECOMBINE_QUALITY_HALFRES #define SUBSURFACE_HALFRES 1 #endif // Validate if the checkerboard neighbor pixel is subsurface in recombine pass #ifndef SUBSURFACE_RECOMBINE #define SUBSURFACE_RECOMBINE 0 #endif #ifndef SUBSURFACE_SETUP_PASS #define SUBSURFACE_SETUP_PASS 0 #endif #ifndef CHECKERBOARD_NEIGHBOR_SSS_VALIDATION #define CHECKERBOARD_NEIGHBOR_SSS_VALIDATION SUBSURFACE_RECOMBINE #if CHECKERBOARD_NEIGHBOR_SSS_VALIDATION uint CheckerboardNeighborSSSValidation; #endif #endif //============================================================================= // setup for "SeparableSSS.ush" //============================================================================= #if SUBSURFACE_QUALITY == SUBSURFACE_QUALITY_LOW #define SSSS_N_KERNELWEIGHTCOUNT SSSS_KERNEL2_SIZE #define SSSS_N_KERNELWEIGHTOFFSET SSSS_KERNEL2_OFFSET #elif SUBSURFACE_QUALITY == SUBSURFACE_QUALITY_MEDIUM #define SSSS_N_KERNELWEIGHTCOUNT SSSS_KERNEL1_SIZE #define SSSS_N_KERNELWEIGHTOFFSET SSSS_KERNEL1_OFFSET #else // SUBSURFACE_QUALITY == SUBSURFACE_QUALITY_HIGH #define SSSS_N_KERNELWEIGHTCOUNT SSSS_KERNEL0_SIZE #define SSSS_N_KERNELWEIGHTOFFSET SSSS_KERNEL0_OFFSET #endif // 0: faster // 1: no color bleeding in z direction #define SSSS_FOLLOW_SURFACE 1 float GetMaskFromDepthInAlpha(float Alpha) { return Alpha > 0; } #if SUBTRATE_GBUFFER_FORMAT==1 FSubstrateSubsurfaceData LoadSubstrateSSSData(float2 UV) { const float2 PixelPos = UV.xy * View.BufferSizeAndInvSize.xy; return SubstrateLoadSubsurfaceData(Substrate.MaterialTextureArray, Substrate.FirstSliceStoringSubstrateSSSData, PixelPos); } FSubstrateSubsurfaceHeader LoadSubstrateSSSHeader(float2 UV) { const float2 PixelPos = UV.xy * View.BufferSizeAndInvSize.xy; return SubstrateLoadSubsurfaceHeader(Substrate.MaterialTextureArray, Substrate.FirstSliceStoringSubstrateSSSData, PixelPos); } #endif // can be optimized float GetSubsurfaceStrength(float2 UV) { #if SUBTRATE_GBUFFER_FORMAT==1 const FSubstrateSubsurfaceHeader SSSHeader = LoadSubstrateSSSHeader(UV); const float Mask = SubstrateSubSurfaceHeaderGetUseDiffusion(SSSHeader) ? 1.0f : 0.0f; const float RadiusScale = SubstrateSubSurfaceHeaderGetProfileRadiusScale(SSSHeader); #else FScreenSpaceData ScreenSpaceData = GetScreenSpaceData(UV); const float Mask = UseSubsurfaceProfile(ScreenSpaceData.GBuffer.ShadingModelID) ? 1.0f : 0.0f; const float RadiusScale = ScreenSpaceData.GBuffer.CustomData.a; #endif return Mask * RadiusScale; } // @return .rgb is the weight for color channel, .a is the sample location float4 GetSubsurfaceProfileKernel(uint SampleIndex, uint SubsurfaceProfileInt) { #if SUBTRATE_GBUFFER_FORMAT==1 if (SubsurfaceProfileInt == SSS_PROFILE_ID_PERPIXEL) return float4(1, 1, 1, 1); #endif const float4 TableMax = float4(1, 1, 1, SUBSURFACE_KERNEL_SIZE); return View.SSProfilesTexture.Load(int3(SampleIndex, SubsurfaceProfileInt, 0)) * TableMax; } float3 GetSubsurfaceProfileNormal(float2 BufferUV) { BufferUV = clamp(BufferUV, SubsurfaceInput0_UVViewportBilinearMin, SubsurfaceInput0_UVViewportBilinearMax); FGBufferData LocalGBufferData = GetGBufferData(BufferUV); return LocalGBufferData.WorldNormal; } uint GetSubsurfaceProfileId(float2 BufferUV) { BufferUV = clamp(BufferUV, SubsurfaceInput0_UVViewportBilinearMin, SubsurfaceInput0_UVViewportBilinearMax); #if SUBTRATE_GBUFFER_FORMAT==1 const FSubstrateSubsurfaceHeader SSSHeader = LoadSubstrateSSSHeader(BufferUV); return SubstrateSubSurfaceHeaderGetProfileId(SSSHeader); #else FGBufferData LocalGBufferData = GetGBufferData(BufferUV); return ExtractSubsurfaceProfileInt(LocalGBufferData); #endif } float3 GetSubsurfaceProfileBoundaryColorBleed(uint SubsurfaceProfileInt) { #if SUBTRATE_GBUFFER_FORMAT==1 if (SubsurfaceProfileInt == SSS_PROFILE_ID_PERPIXEL) return float3(1.f, 1.f, 1.f); #endif return GetSubsurfaceProfileKernel(SSSS_BOUNDARY_COLOR_BLEED_OFFSET, SubsurfaceProfileInt).rgb; } float4 GetSceneColor(float2 BufferUV) { BufferUV = clamp(BufferUV, SubsurfaceInput0_UVViewportBilinearMin, SubsurfaceInput0_UVViewportBilinearMax); return Texture2DSampleLevel(SubsurfaceInput0_Texture, SharedSubsurfaceSampler0, BufferUV, 0); } #if SUBTRATE_GBUFFER_FORMAT==1 // Calculate the Subsurface profile kernel on the fly using integration instead of LUT. half4 GetSubsurfaceProfileKernel(int SampleIndex, float3 D, float ScatterRadiusInMM, float WorldUnitScale) { // See notes in SubsurfaceProfile.cpp. Scatter radius is in mm instead of cm here. ScatterRadiusInMM = max(ScatterRadiusInMM,1.0f); const float Range = ((2 * SSSS_N_KERNELWEIGHTCOUNT - 1) > 20) ? 3.0f : 2.0f; const float r = Range * float(SampleIndex) / (SSSS_N_KERNELWEIGHTCOUNT - 1); const float r0 = Range * (float(SampleIndex) - 0.5f) / (SSSS_N_KERNELWEIGHTCOUNT - 1); const float r1 = Range * (float(SampleIndex) + 0.5f) / (SSSS_N_KERNELWEIGHTCOUNT - 1); const float x = r * r / (Range ); const float x0 = r0 * r0 / (Range ); const float x1 = r1 * r1 / (Range ); const float3 MaxCDF = GetCDF3D(D, ScatterRadiusInMM * Range * (1.0f + 0.5f / (SSSS_N_KERNELWEIGHTCOUNT - 1))); half4 Kernel = (half4)0.0f; BRANCH if (SampleIndex == 0) { Kernel.rgb = GetCDF3D(D, ScatterRadiusInMM * x1) / MaxCDF; Kernel.a = 0.0f; } else { // Need to multiply by 0.5f, and modify MaxCDF to match as close as offline LUT. Here favors integration // instead of discrete approximation. Kernel.rgb = (GetCDF3D(D, ScatterRadiusInMM * x1) - GetCDF3D(D, ScatterRadiusInMM * x0)) / MaxCDF; Kernel.a = 2.0 * x; // 2.0f is consistent to the scale in ComputeMirroredBSSSKernel of BurleyNormalizedSSS.cpp } // Scale to match offline encoding // const float TableMaxA = SUBSURFACE_KERNEL_SIZE; // const FLinearColor TableColorScale = FLinearColor( // 1.0f / TableMaxRGB, // 1.0f / TableMaxRGB, // 1.0f / TableMaxRGB, // 1.0f / TableMaxA); // C *= TableColorScale // C.W *= Data.ScatterRadius / SUBSURFACE_RADIUS_SCALE; // SUBSURFACE_KERNEL_SIZE and TableMaxA cancels out Kernel.a *= (ScatterRadiusInMM * BURLEY_MM_2_CM) * WorldUnitScale /** SUBSURFACE_KERNEL_SIZE*/ /(/*TableMaxA **/ SUBSURFACE_RADIUS_SCALE); return Kernel; } #endif // BufferUV back to grid posistion of the input uint2 ConvertUVPos2Input0GridPos(float2 BufferUV) { return clamp(BufferUV, 0, 1) * SubsurfaceInput0_Extent - 0.5f; } uint ExtractSubsurfaceProfileIntWithInvalid(float2 UV,float Depth) { uint ProfileID = SSS_PROFILE_ID_INVALID; // the invalid status be determined by the scene depth. if (Depth > 0) { uint2 PosOfSampledUV = ConvertUVPos2Input0GridPos(UV); ProfileID = ProfileIdTexture[PosOfSampledUV]; } return ProfileID; } // from https://github.com/iryoku/separable-sss/tree/master/Demo // Jorge Jimenez http://www.iryoku.com/ // http://www.iryoku.com/translucency/downloads/Real-Time-Realistic-Skin-Translucency.pdf #include "SeparableSSS.ush" //============================================================================= bool InUnitBox(float2 UV) { return UV.x >= 0 && UV.y >= 0 && UV.y < 1 && UV.y < 1; } // @return 0=don't blend in, 1:fully blend in float ComputeFullResLerp(float ProfileRadiusScale, float PixelRadiusScale, float2 UVSceneColor, float2 FullResInputSizeInverse) { float SSSScaleX = SubsurfaceParams.x; float scale = SSSScaleX / CalcSceneDepth(UVSceneColor); float HorizontalScaler = SUBSURFACE_RADIUS_SCALE; // Calculate the final step to fetch the surrounding pixels: float finalStep = scale * HorizontalScaler; finalStep *= ProfileRadiusScale; float PixelSizeRadius = finalStep / (FullResInputSizeInverse.x * 0.5f); // tweaked for skin, a more flat kernel might need a smaller value, around 2 seems reasonable because we do half res const float PixelSize = 4.0f; float Ret = saturate(PixelSizeRadius - PixelSize); // opacity allows to scale the radius - at some point we should fade in the full resolution, we don't have a masking other than that. Ret *= saturate(PixelRadiusScale * 10); // todo: Subsurface has some non scatter contribution - all that should come from the Full res return Ret; } uint MiniFontCharFromSubsurfaceProfile(uint SubsurfaceProfileInt) { if (SubsurfaceProfileInt <= 9) { return _0_ + SubsurfaceProfileInt; } SubsurfaceProfileInt -= 10; if (SubsurfaceProfileInt <= _Z_ - _A_) { return _A_ + SubsurfaceProfileInt; } return _QUESTIONMARK_; } // visualization (doesn't have to be fast) void VisualizePS(in noperspective float4 UVAndScreenPos : TEXCOORD0, float4 SvPosition : SV_POSITION, out float4 OutColor : SV_Target0) { float2 UV = UVAndScreenPos.xy; OutColor = Texture2DSample(SubsurfaceInput0_Texture, SharedSubsurfaceSampler0, UV); int2 PixelPos = (int2)SvPosition.xy; float2 ViewLocalUV = (PixelPos - SubsurfaceInput0_ViewportMin) * SubsurfaceInput0_ViewportSizeInverse; float2 IDAreaLocalUV = ViewLocalUV * 2 - 1.0f; if (InUnitBox(IDAreaLocalUV)) { float2 UV = SubsurfaceInput0_ViewportMin * SubsurfaceInput0_ExtentInverse + IDAreaLocalUV * (SubsurfaceInput0_ViewportSize * SubsurfaceInput0_ExtentInverse); #if SUBTRATE_GBUFFER_FORMAT==1 const FSubstrateSubsurfaceHeader SSSHeader = LoadSubstrateSSSHeader(UV); const uint SubsurfaceProfileInt = SubstrateSubSurfaceHeaderGetProfileId(SSSHeader); const bool bIsProfile = SubstrateSubSurfaceHeaderGetIsProfile(SSSHeader); const float PixelRadiusScale = SubstrateSubSurfaceHeaderGetProfileRadiusScale(SSSHeader); #else const FScreenSpaceData ScreenSpaceData = GetScreenSpaceData(UV); const uint SubsurfaceProfileInt = ExtractSubsurfaceProfileInt(ScreenSpaceData.GBuffer); const bool bIsProfile = UseSubsurfaceProfile(ScreenSpaceData.GBuffer.ShadingModelID); const float PixelRadiusScale = ScreenSpaceData.GBuffer.CustomData.a; #endif const float ProfileRadiusScale = GetSubsurfaceProfileKernel(SSSS_N_KERNELWEIGHTOFFSET + SSSS_N_KERNELWEIGHTCOUNT - 1, SubsurfaceProfileInt).a; OutColor = float4(0.5f, 0.5f, 0.5f, 0); BRANCH if (bIsProfile) { if (SubsurfaceProfileInt == 0) { // default (no Profile) OutColor = float4(0.8f, 0.7f, 0.6f, 0); } if (SubsurfaceProfileInt == 1) { OutColor = float4(1, 0, 0, 0) * 0.5f; } if (SubsurfaceProfileInt == 2) { OutColor = float4(0, 1, 0, 0) * 0.5f; } if (SubsurfaceProfileInt == 3) { OutColor = float4(0, 0, 1, 0) * 0.5f; } if (SubsurfaceProfileInt == 4) { OutColor = float4(1, 0, 1, 0) * 0.5f; } if (SubsurfaceProfileInt == 5) { OutColor = float4(0, 1, 1, 0) * 0.5f; } if (SubsurfaceProfileInt == 6) { OutColor = float4(1, 1, 0, 0) * 0.5f; } if (SubsurfaceProfileInt == 100) { OutColor = float4(0, 0.2f, 0, 0); } if (SubsurfaceProfileInt == 255) { OutColor = float4(1, 1, 1, 0); } int2 LeftTop = (PixelPos / 8) * 8; PrintCharacter(PixelPos, OutColor.rgb, float3(1, 1, 1), LeftTop, MiniFontCharFromSubsurfaceProfile(SubsurfaceProfileInt)); OutColor.rgb *= ComputeFullResLerp(ProfileRadiusScale, PixelRadiusScale, UV, SubsurfaceInput0_ExtentInverse); } } } struct SDiffuseAndSpecular { float3 Diffuse; float3 Specular; }; #if CHECKERBOARD_NEIGHBOR_SSS_VALIDATION bool IsCheckerBoardNeighborSubsurface(float2 SceneUV, int2 PixelOffset) { const float2 UV = SceneUV + PixelOffset * View.BufferSizeAndInvSize.zw; #if SUBTRATE_GBUFFER_FORMAT==1 const FSubstrateSubsurfaceData SSSData = LoadSubstrateSSSData(UV); const bool bHasSSSProfile = SubstrateSubSurfaceHeaderGetIsValid(SSSData.Header); #else FScreenSpaceData ScreenSpaceData = GetScreenSpaceData(UV); const bool bHasSSSProfile = UseSubsurfaceProfile(ScreenSpaceData.GBuffer.ShadingModelID); #endif return bHasSSSProfile; } #endif // can be moved/shared half3 LookupSceneColor(float2 SceneUV, int2 PixelOffset) { // faster float3 SceneColor = SubsurfaceInput0_Texture.SampleLevel(SharedSubsurfaceSampler0, SceneUV, 0, PixelOffset).rgb; #if CHECKERBOARD_NEIGHBOR_SSS_VALIDATION // Fix border background color leaking into subsurface diffuse/specular. BRANCH if (CheckerboardNeighborSSSValidation != 0) { bool bIsSubsurface = IsCheckerBoardNeighborSubsurface(SceneUV, PixelOffset); SceneColor = lerp(0, SceneColor, bIsSubsurface); } #endif return SceneColor; } // @param UVSceneColor for the full res rendertarget (BufferSize) e.g. SceneColor or GBuffers // @param ReconstructMethod 0/1/2/3 (should be a literal constant to allow compiler optimizations) SDiffuseAndSpecular ReconstructLighting(float2 UVSceneColor, uint ReconstructMethod) { SDiffuseAndSpecular Ret; #if SUBSTRATE_OPAQUE_ROUGH_REFRACTION_ENABLED { // In this mode, we only read isolated diffuse lighting. float3 CenterSample = SubsurfaceInput0_Texture.SampleLevel(SharedSubsurfaceSampler0, UVSceneColor, 0).rgb; Ret.Diffuse = CenterSample; Ret.Specular = 0.0f; } // If SUBSURFACE_CHANNEL_MODE is 0, checkerboard is forced on #elif SUBSURFACE_PROFILE_CHECKERBOARD || SUBSURFACE_CHANNEL_MODE == 0 { bool bChecker = CheckerFromSceneColorUV(UVSceneColor); // todo: We could alternate the diagonal with TemporalAA or even only only 1 sample for low spec or 4 for high spec float3 Quant0 = SubsurfaceInput0_Texture.SampleLevel(SharedSubsurfaceSampler0, UVSceneColor, 0).rgb; // todo: expose as scalability setting (can be evaluate best without TemporalAA) // 0:fast but pattern can appear, 1:better, 2: even better, 3: best but expensive float3 Quant1; if(ReconstructMethod == 0) { // cheap, crappy Quant1 = LookupSceneColor(UVSceneColor, int2(1, 0)); } else if(ReconstructMethod == 1) { // acceptable but not perfect Quant1 = 0.5f * ( LookupSceneColor(UVSceneColor, int2( 1, 0)) + LookupSceneColor(UVSceneColor, int2(-1, 0))); } else if(ReconstructMethod == 2) { // almost same as 1? Quant1 = 0.25f * ( LookupSceneColor(UVSceneColor, int2( 1, 0)) + LookupSceneColor(UVSceneColor, int2( 0, 1)) + LookupSceneColor(UVSceneColor, int2(-1, 0)) + LookupSceneColor(UVSceneColor, int2( 0, -1))); } else if(ReconstructMethod == 3) { // very good float3 A = LookupSceneColor(UVSceneColor, int2( 1, 0)); float3 B = LookupSceneColor(UVSceneColor, int2(-1, 0)); float3 C = LookupSceneColor(UVSceneColor, int2( 0, 1)); float3 D = LookupSceneColor(UVSceneColor, int2( 0, -1)); // Luminance could be green channel only float a = Luminance(A); float b = Luminance(B); float c = Luminance(C); float d = Luminance(D); float ab = abs(a - b); float cd = abs(c - d); // take the average in the direction that avoids dither pattern Quant1 = 0.5f * lerp(A + B, C + D, ab > cd); } Ret.Diffuse = lerp(Quant1, Quant0, bChecker); Ret.Specular = lerp(Quant0, Quant1, bChecker); } #else // SUBSURFACE_PROFILE_CHECKERBOARD { // If we're not doing checkerboard encoding, we just need to read a single pixel and decode (combined diffuse/spec in RGB) float4 CenterSample = SubsurfaceInput0_Texture.SampleLevel(SharedSubsurfaceSampler0, UVSceneColor, 0); float3 CombinedColor = CenterSample.rgb; float DiffuseLuminance = CenterSample.a; float CombinedLuminance = Luminance(CombinedColor); float DiffuseFactor = saturate(DiffuseLuminance / CombinedLuminance); float SpecularFactor = 1.0f - DiffuseFactor; Ret.Diffuse = CombinedColor * DiffuseFactor; Ret.Specular = CombinedColor * SpecularFactor; } #endif // !SUBSURFACE_PROFILE_CHECKERBOARD return Ret; } // @param UVSceneColor for the full res rendertarget (BufferSize) e.g. SceneColor or GBuffers // @return .RGB Color that should be scattared, .A:1 for subsurface scattering material, 0 for not float4 SetupSubsurfaceForOnePixel(float2 UVSceneColor) { #if SUBTRATE_GBUFFER_FORMAT==1 const FSubstrateSubsurfaceHeader SSSHeader = LoadSubstrateSSSHeader(UVSceneColor); const bool bHasSSSDiffusion = SubstrateSubSurfaceHeaderGetUseDiffusion(SSSHeader); #else FScreenSpaceData ScreenSpaceData = GetScreenSpaceData(UVSceneColor); const bool bHasSSSDiffusion = UseSubsurfaceProfile(ScreenSpaceData.GBuffer.ShadingModelID); #endif float4 Ret = 0; BRANCH if (bHasSSSDiffusion) { // '1' is lower quality but that is acceptable here in regular rendering // When rendering resolution is very low, '3' is required to reduce checkerboard pattern. uint CheckerboardReconstructMethod = (SUBSURFACE_RECOMBINE_QUALITY == SUBSURFACE_RECOMBINE_QUALITY_HIGH) ? 3 : 1; SDiffuseAndSpecular DiffuseAndSpecular = ReconstructLighting(UVSceneColor, CheckerboardReconstructMethod); Ret.rgb = DiffuseAndSpecular.Diffuse; // it's a valid sample Ret.a = 1; } return Ret; } Buffer TileTypeCountBuffer; bool ShouldGenerateMipmaps(uint TileType) { bool bShouldGenerateMipmaps = false; if (SUBSURFACE_TILE_TYPE_AFIS == TileType) { uint TileCount = TileTypeCountBuffer[TileType]; bShouldGenerateMipmaps = (TileCount >= SubsurfaceParams.w); } return bShouldGenerateMipmaps; } // Ref: https://graphics.pixar.com/library/ApproxBSSRDF/paper.pdf #include "SubsurfaceBurleyNormalized.ush" #if SUBSURFACE_BURLEY_COMPUTE //----------------------------------------------------------------------------------- // Indirect pass // Indirect dispatch parameters RWBuffer RWSeparableGroupBuffer; RWBuffer RWBurleyGroupBuffer; RWBuffer RWIndirectDispatchArgsBuffer; RWBuffer RWIndirectDrawArgsBuffer; RWBuffer RWTileTypeCountBuffer; Buffer GroupBuffer; [numthreads(1, 1, 1)] void InitValueBufferCS() { // Clear the buffer counter for all types of tile. UNROLL for (uint i = 0; i < SUBSURFACE_TILE_TYPE_COUNT; ++i) { RWTileTypeCountBuffer[i] = 0; } } uint MinGenerateMipsTileCount; uint Offset; RWBuffer RWMipsConditionBuffer; [numthreads(1, 1, 1)] void FillMipsConditionBufferCS() { uint TileCount = TileTypeCountBuffer[Offset]; RWMipsConditionBuffer[Offset] = (TileCount >= MinGenerateMipsTileCount) ? 1 : 0; } [numthreads(8, 1, 1)] void BuildIndirectDispatchArgsCS(uint2 DispatchThreadId : SV_DispatchThreadID) { if (DispatchThreadId.y == 0 && DispatchThreadId.x < SUBSURFACE_TILE_TYPE_COUNT && DispatchThreadId.x != SUBSURFACE_TILE_TYPE_PASSTHROUGH && DispatchThreadId.x != SUBSURFACE_TILE_TYPE_ALLNONPASSTHROUGH && DispatchThreadId.x != SUBSURFACE_TILE_TYPE_ALL) { uint TileType = DispatchThreadId.x; const uint TileCount = TileTypeCountBuffer[TileType]; // Indirect draw RWIndirectDrawArgsBuffer[TileType * DRAW_INDIRECT_UINT_COUNT + 0] = SubsurfaceUniformParameters.bRectPrimitive > 0 ? 4 : 6; // VertexCountPerInstance RWIndirectDrawArgsBuffer[TileType * DRAW_INDIRECT_UINT_COUNT + 1] = TileCount; // InstanceCount RWIndirectDrawArgsBuffer[TileType * DRAW_INDIRECT_UINT_COUNT + 2] = 0; // StartVertexLocation RWIndirectDrawArgsBuffer[TileType * DRAW_INDIRECT_UINT_COUNT + 3] = 0; // StartInstanceLocation // Indirect dispatch const uint IndirectDispatchSize = TileCount; // (TileCount + (THREAD_SIZE_1D*THREAD_SIZE_1D) - 1) / (THREAD_SIZE_1D*THREAD_SIZE_1D); WriteDispatchIndirectArgs(RWIndirectDispatchArgsBuffer, TileType, IndirectDispatchSize, 1, 1); } } uint GetSubsurfaceType(uint ProfileId) { return GetSubsurfaceProfileUseBurley(ProfileId) ? TYPE_BURLEY : TYPE_SEPARABLE; } void AddSeparableGroup(uint2 GroupXY) { //buffer counter is stored in the .Count in the first element. uint IndexToStore = 0; InterlockedAdd(RWTileTypeCountBuffer[SUBSURFACE_TILE_TYPE_SEPARABLE], 1, IndexToStore); if (IndexToStore >= SubsurfaceUniformParameters.MaxGroupCount) { return; } RWSeparableGroupBuffer[IndexToStore] = PackTileCoord12bits(GroupXY); } void AddBurleyGroup(uint2 GroupXY) { //buffer counter is stored in the .Count in the first element. uint IndexToStore = 0; InterlockedAdd(RWTileTypeCountBuffer[SUBSURFACE_TILE_TYPE_AFIS], 1, IndexToStore); if (IndexToStore >= SubsurfaceUniformParameters.MaxGroupCount) { return; } RWBurleyGroupBuffer[IndexToStore] = PackTileCoord12bits(GroupXY); } void AddSubsurfaceComputeGroup(uint GI, uint Type, uint2 GroupXY) { #if COMPILER_SUPPORTS_WAVE_VOTE && COMPILER_SUPPORTS_WAVE_BIT_ORAND const uint SSSInViewInWave = WaveActiveBitOr(Type); const bool AnySSSInViewInWave = (SSSInViewInWave & TYPE_IN_VIEWPORT) && ((SSSInViewInWave & TYPE_SEPARABLE)|| (SSSInViewInWave & TYPE_BURLEY)); if (WaveIsFirstLane() && AnySSSInViewInWave) { InterlockedOr(SubsurfaceTypeFlag, SSSInViewInWave); } #else InterlockedOr(SubsurfaceTypeFlag, Type); #endif GroupMemoryBarrierWithGroupSync(); if (GI == 0 && ((SubsurfaceTypeFlag & TYPE_SEPARABLE) && (SubsurfaceTypeFlag & TYPE_IN_VIEWPORT))) { AddSeparableGroup(GroupXY); } if (GI == 0 && ((SubsurfaceTypeFlag & TYPE_BURLEY) && (SubsurfaceTypeFlag & TYPE_IN_VIEWPORT))) { AddBurleyGroup(GroupXY); } } // Get random index that has Mask[index] = 1 (s.t. Sum(Mask)>=1) uint GetRandomIndex(uint2 Pos, uint4 Mask) { uint4 PrefixSum = uint4( Mask[0], Mask[0]+Mask[1], Mask[0]+Mask[1]+Mask[2], Mask[0]+Mask[1]+Mask[2]+Mask[3]); uint RandomSelectedBit = (Rand3DPCG16(int3(Pos, View.FrameNumber)).x % PrefixSum[3]) + 1u; uint m0 = (RandomSelectedBit == PrefixSum[0]) & Mask[0]; uint m1 = (RandomSelectedBit == PrefixSum[1]) & Mask[1]; uint m2 = (RandomSelectedBit == PrefixSum[2]) & Mask[2]; uint m3 = (RandomSelectedBit == PrefixSum[3]) & Mask[3]; uint SelectedIndexWithValidMask = firstbitlow((m0 << 0u)|(m1 << 1u)|(m2 << 2u)|(m3 << 3u)); return min(SelectedIndexWithValidMask,3u); } RWTexture2D SetupTexture; RWTexture2D ProfileEdgeMask; [numthreads(THREAD_SIZE_X*THREAD_SIZE_Y, 1, 1)] void SetupIndirectCS( uint3 IndirectG_ID : SV_GroupID, uint GI : SV_GroupIndex) { // initalize the group shared variable if (GI == 0) { SubsurfaceTypeFlag = 0; } GroupMemoryBarrierWithGroupSync(); uint2 G_ID = UnpackTileCoord12bits(GroupBuffer[IndirectG_ID.x]); uint2 DT_ID = G_ID * float2(THREAD_SIZE_X, THREAD_SIZE_Y) + float2(GI % THREAD_SIZE_X, GI / THREAD_SIZE_X); uint2 Pos = DT_ID.xy + Output_ViewportMin; float2 BufferUV = ConvertGridPos2UV(Pos); uint Type = 0; float4 OutColor = 0; float4 OutColor2 = 0; #if SUBSURFACE_HALF_RES // order aligned with Gather() hardware implementation // RGB: color*A, A:weight 0 if no subsurface scattering float4 A = SetupSubsurfaceForOnePixel(min(BufferUV + float2(-0.5, 0.5f) * SubsurfaceInput0_ExtentInverse, SubsurfaceInput0_UVViewportBilinearMax)); float4 B = SetupSubsurfaceForOnePixel(min(BufferUV + float2(0.5, 0.5f) * SubsurfaceInput0_ExtentInverse, SubsurfaceInput0_UVViewportBilinearMax)); float4 C = SetupSubsurfaceForOnePixel(min(BufferUV + float2(0.5, -0.5f) * SubsurfaceInput0_ExtentInverse, SubsurfaceInput0_UVViewportBilinearMax)); float4 D = SetupSubsurfaceForOnePixel(min(BufferUV + float2(-0.5, -0.5f) * SubsurfaceInput0_ExtentInverse, SubsurfaceInput0_UVViewportBilinearMax)); float4 Sum = (A + B) + (C + D); // Each alpha value of A/B/C/D should be either 1.0f or 0.0, so a check of 0.5 should be safe bool bHasSubsurface = (Sum.a >= .50f); // Sum.a should either be one of {0,1,2,3,4}, add a max of 1 to avoid floating point specials. If Sum.a is 0, Sum.rgb should be 0 as well so it // shouldn't matter what Div is. float Div = 1.0f / max(Sum.a, 1.0f); OutColor.rgb = Sum.rgb * Div; float4 FourDepth = GatherSceneDepth(BufferUV, SubsurfaceInput0_ExtentInverse); // average all valid depth values to a single one float SingleDepth = dot(FourDepth, float4(A.a, B.a, C.a, D.a)) * Div; OutColor.a = SingleDepth; if (OutColor.a > 0) { bHasSubsurface = true; } //BufferUV += min(float2(-0.25, -0.25f) * SubsurfaceInput0_ExtentInverse, SubsurfaceInput0_UVViewportBilinearMax); #else // SUBSURFACE_HALF_RES bool bHasSubsurface = false; OutColor = SetupSubsurfaceForOnePixel(BufferUV); if (OutColor.a > 0) { bHasSubsurface = true; float SourceDepth = CalcSceneDepth(BufferUV); float Noise = InterleavedGradientNoise(float2(Pos.x,Pos.y), View.StateFrameIndexMod8); // Divide by 1024 because the mantissa is 10 bits. OutColor.a = SourceDepth + Noise * (SourceDepth / 1024.0f); //@TODO: sparkling //supress the edge color lighting information at edge. //with firefly elimination } #endif // SUBSURFACE_HALF_RES #if ENABLE_PROFILE_ID_CACHE uint ProfileId = GetSubsurfaceProfileId(BufferUV); #endif // setup the Subsurface type uint SelectedProfile = 0; if (bHasSubsurface) { #if SUBSURFACE_HALF_RES #if !SUBSURFACE_FORCE_SEPARABLE #if 1 { float2 UVA = min(BufferUV + float2(-0.5, 0.5f) * SubsurfaceInput0_ExtentInverse, SubsurfaceInput0_UVViewportBilinearMax); float2 UVB = min(BufferUV + float2(0.5, 0.5f) * SubsurfaceInput0_ExtentInverse, SubsurfaceInput0_UVViewportBilinearMax); float2 UVC = min(BufferUV + float2(0.5, -0.5f) * SubsurfaceInput0_ExtentInverse, SubsurfaceInput0_UVViewportBilinearMax); float2 UVD = min(BufferUV + float2(-0.5, -0.5f) * SubsurfaceInput0_ExtentInverse, SubsurfaceInput0_UVViewportBilinearMax); BufferUV = lerp(BufferUV, UVA, A.a); BufferUV = lerp(BufferUV, UVB, B.a); BufferUV = lerp(BufferUV, UVC, C.a); BufferUV = lerp(BufferUV, UVD, D.a); } #else { // Gatter available and randomize. float4x2 UVs; UVs[0] = min(BufferUV + float2(-0.5, 0.5f) * SubsurfaceInput0_ExtentInverse, SubsurfaceInput0_UVViewportBilinearMax); UVs[1] = min(BufferUV + float2(0.5, 0.5f) * SubsurfaceInput0_ExtentInverse, SubsurfaceInput0_UVViewportBilinearMax); UVs[2] = min(BufferUV + float2(0.5, -0.5f) * SubsurfaceInput0_ExtentInverse, SubsurfaceInput0_UVViewportBilinearMax); UVs[3] = min(BufferUV + float2(-0.5, -0.5f) * SubsurfaceInput0_ExtentInverse, SubsurfaceInput0_UVViewportBilinearMax); uint UVIndex = GetRandomIndex(Pos, uint4(A.a, B.a, C.a, D.a)); BufferUV = UVs[UVIndex]; } #endif SelectedProfile = GetSubsurfaceProfileId(BufferUV); Type = GetSubsurfaceType(SelectedProfile); #if ENABLE_PROFILE_ID_CACHE ProfileId = SelectedProfile; #endif #else Type = TYPE_SEPARABLE; #endif #else #if ENABLE_PROFILE_ID_CACHE SelectedProfile = ProfileId; #else SelectedProfile = GetSubsurfaceProfileId(BufferUV); #endif Type = GetSubsurfaceType(SelectedProfile); #endif } if (all(Pos < Output_ViewportMax)) { Type |= TYPE_IN_VIEWPORT; #if (SUBSURFACE_SETUP_PASS == SUBSURFACE_SETUP_PASS_PASSTHROUGH) //write to the final buffer, need mask to recombine. OutColor.a = GetMaskFromDepthInAlpha(OutColor.a); #endif SetupTexture[Pos] = OutColor; #if ENABLE_PROFILE_ID_CACHE ProfileIdTexture[Pos] = ProfileId; #endif } #if !(SUBSURFACE_FORCE_SEPARABLE || (SUBSURFACE_SETUP_PASS == SUBSURFACE_SETUP_PASS_PASSTHROUGH)) AddSubsurfaceComputeGroup(GI, Type, G_ID.xy); #endif } [numthreads(THREAD_SIZE_X*THREAD_SIZE_Y, 1, 1)] void MainIndirectDispatchCS( uint3 IndirectG_ID : SV_GroupID, uint GI : SV_GroupIndex ) { uint2 G_ID = UnpackTileCoord12bits(GroupBuffer[IndirectG_ID.x]); uint2 DT_ID = G_ID * float2(THREAD_SIZE_X, THREAD_SIZE_Y) + float2(GI % THREAD_SIZE_X, GI / THREAD_SIZE_X); //1. Call function based on which function is defined and in which phase #if SUBSURFACE_TYPE == SSS_TYPE_BURLEY && !SUBSURFACE_FORCE_SEPARABLE BurleyComputeMain(DT_ID, G_ID, GI); #elif SUBSURFACE_TYPE == SSS_TYPE_SSSS uint2 Pos = DT_ID.xy*SUBSURFACE_GROUP_SIZE / THREAD_SIZE_1D + Output_ViewportMin; float2 BufferUV = ConvertGridPos2UV(Pos); uint2 BufferPos = Pos; #if SUBTRATE_GBUFFER_FORMAT==1 const FSubstrateSubsurfaceHeader SSSHeader = LoadSubstrateSSSHeader(BufferUV); const uint bUseBurley = SubstrateSubSurfaceHeaderGetIsProfile(SSSHeader) ? GetSubsurfaceProfileUseBurley(SubstrateSubSurfaceHeaderGetProfileId(SSSHeader)) : SubstrateSubSurfaceHeaderGetUseDiffusion(SSSHeader); #else const FScreenSpaceData ScreenSpaceData = GetScreenSpaceData(BufferUV); const uint SubsurfaceProfileInt = ExtractSubsurfaceProfileInt(ScreenSpaceData.GBuffer); const bool bUseBurley = GetSubsurfaceProfileUseBurley(SubsurfaceProfileInt); #endif // We need to check Burley here because the previous pass might write burley samplings here unless we have SUBSURFACE_FORCE_SEPARABLE to overwrite the setting. #if !SUBSURFACE_FORCE_SEPARABLE BRANCH if (bUseBurley) { return; } #endif // pass one and pass two #if SUBSURFACE_PASS == SUBSURFACE_PASS_ONE // horizontal float2 ViewportDirectionUV = float2(1, 0) * SUBSURFACE_RADIUS_SCALE; #elif SUBSURFACE_PASS == SUBSURFACE_PASS_TWO // vertical float2 ViewportDirectionUV = float2(0, 1) * SUBSURFACE_RADIUS_SCALE * (SubsurfaceInput0_Extent.x * SubsurfaceInput0_ExtentInverse.y); #endif ViewportDirectionUV *= (SubsurfaceInput0_ViewportSize.x * SubsurfaceInput0_ExtentInverse.x); float4 OutColor = SSSSBlurPS(BufferPos, BufferUV, ViewportDirectionUV, false, SubsurfaceInput0_Extent); #if SUBSURFACE_PASS == SUBSURFACE_PASS_TWO // second pass prepares the setup from the recombine pass which doesn't need depth but wants to reconstruct the color OutColor.a = GetMaskFromDepthInAlpha(OutColor.a); #endif if (all(Pos < Output_ViewportMax)) { SSSColorUAV[Pos] = OutColor; } #endif } #endif // Recombines the half res Subsurface filtered lighting contribution (upsampled and renormalized with the alpha) // with the SceneColor. void SubsurfaceRecombinePS(in FScreenVertexOutput Input, out float4 OutColor : SV_Target0) { const float2 BufferUV = Input.UV.xy; #if SUBTRATE_GBUFFER_FORMAT==1 FSubstrateSubsurfaceData SSSData = LoadSubstrateSSSData(BufferUV); const bool bUseSSSDiffusion = SubstrateSubSurfaceHeaderGetUseDiffusion(SSSData.Header); #else FScreenSpaceData ScreenSpaceData = GetScreenSpaceData(BufferUV); const bool bUseSSSDiffusion = UseSubsurfaceProfile(ScreenSpaceData.GBuffer.ShadingModelID); #endif if (!bUseSSSDiffusion) { OutColor = Texture2DSample(SubsurfaceInput0_Texture, SharedSubsurfaceSampler0, BufferUV); return; } #if SUBTRATE_GBUFFER_FORMAT==1 const uint ProfileId = SubstrateSubSurfaceHeaderGetProfileId(SSSData.Header); // we multiply the base color later in to get more crips human skin textures (scanned data always has Subsurface included) const float3 StoredBaseColor = SubstrateSubsurfaceExtrasGetBaseColor(SSSData.Extras); const float PixelRadiusScale = SubstrateSubSurfaceHeaderGetProfileRadiusScale(SSSData.Header); // asset specific color float3 Tint = 0.0f; float ProfileRadiusScale = 1.0f; if (SubstrateSubSurfaceHeaderGetIsProfile(SSSData.Header)) { Tint = GetSubsurfaceProfileKernel(SSSS_TINT_SCALE_OFFSET, ProfileId).rgb; ProfileRadiusScale = GetSubsurfaceProfileKernel(SSSS_N_KERNELWEIGHTOFFSET + SSSS_N_KERNELWEIGHTCOUNT - 1, ProfileId).a; } else if (ProfileId == SSS_PROFILE_ID_PERPIXEL) { // This color is used to lerping 'incident light' into 'diffused light', i.e, controlling the sharpness of the lighting. // In case of direct DMPF control, this is not needed. Tint = float3(1, 1, 1); } #else const uint ProfileId = ExtractSubsurfaceProfileInt(ScreenSpaceData.GBuffer); // we multiply the base color later in to get more crips human skin textures (scanned data always has Subsurface included) const float3 StoredBaseColor = ScreenSpaceData.GBuffer.StoredBaseColor; const float PixelRadiusScale = ScreenSpaceData.GBuffer.CustomData.a; // asset specific color const float3 Tint = GetSubsurfaceProfileKernel(SSSS_TINT_SCALE_OFFSET, ProfileId).rgb; const float ProfileRadiusScale = GetSubsurfaceProfileKernel(SSSS_N_KERNELWEIGHTOFFSET + SSSS_N_KERNELWEIGHTCOUNT - 1, ProfileId).a; #endif float3 SSSColor = float3(0, 0, 0); float LerpFactor = 1; #if SUBSURFACE_RECOMBINE_MODE != SUBSURFACE_RECOMBINE_MODE_NO_SCATTERING #if SUBSURFACE_HALF_RES // fade out subsurface scattering if radius is too small to be more crips (not blend with half resolution) // minor quality improvement (faces are more detailed in distance) LerpFactor = ComputeFullResLerp(ProfileRadiusScale, PixelRadiusScale, BufferUV, SubsurfaceInput1_ExtentInverse); #endif // SUBSURFACE_HALF_RES { // subsurface scattering buffer has garbage data outside of viewport bilinear min/max. float2 ClampedBufferUV = clamp(BufferUV, SubsurfaceInput1_UVViewportBilinearMin, SubsurfaceInput1_UVViewportBilinearMax); float4 SSSColorWithAlpha = Texture2DSampleLevel(SubsurfaceInput1_Texture, SharedSubsurfaceSampler1, ClampedBufferUV, 0); // renormalize to dilate RGB to fix half res upsampling artifacts SSSColor = SSSColorWithAlpha.rgb / max(SSSColorWithAlpha.a, 0.00001f); } #else // SUBSURFACE_RECOMBINE_MODE == SUBSURFACE_RECOMBINE_MODE_NO_SCATTERING // Scalability requests no Scatter, but we still need to reconstruct a color LerpFactor = 0; #endif // SUBSURFACE_RECOMBINE_MODE float3 DirectColor = Texture2DSample(SubsurfaceInput0_Texture, SharedSubsurfaceSampler0, BufferUV).rgb; bool bIsHalfResUpsample = false; if (bIsHalfResUpsample && GetSubsurfaceProfileUseBurley(ProfileId)) { #if SUBTRATE_GBUFFER_FORMAT==1 FBurleyParameter BurleyParameter = GetBurleyParameters(SSSData); #else FBurleyParameter BurleyParameter = GetBurleyParameters(ProfileId, ScreenSpaceData.GBuffer); #endif float Depth = CalcSceneDepth(BufferUV); float3 CenterSampleWeight = CalculateCenterSampleWeight(Depth, BurleyParameter); SSSColor = lerp(SSSColor,DirectColor,CenterSampleWeight); } uint ReconstructMethod = (SUBSURFACE_RECOMBINE_QUALITY == SUBSURFACE_RECOMBINE_QUALITY_HIGH) ? 3 : 1; SDiffuseAndSpecular DiffuseAndSpecular = ReconstructLighting(BufferUV, ReconstructMethod); float3 ExtractedNonSubsurface = DiffuseAndSpecular.Specular; float3 FadedTint = Tint * LerpFactor; // combine potentially half res with full res float3 SubsurfaceLighting = lerp(DiffuseAndSpecular.Diffuse, SSSColor, FadedTint); OutColor = float4(SubsurfaceLighting * StoredBaseColor + ExtractedNonSubsurface, 0.0f); } void SubsurfaceRecombineCopyPS(in FScreenVertexOutput Input, out float4 OutColor : SV_Target0) { const float2 BufferUV = Input.UV.xy; OutColor = Texture2DSampleLevel(SubsurfaceInput0_Texture, SharedSubsurfaceSampler0, BufferUV, 0); } void SubsurfaceViewportCopyPS(noperspective float4 InUV : TEXCOORD0, out float4 OutColor : SV_Target0) { OutColor = Texture2DSample(SubsurfaceInput0_Texture, SharedSubsurfaceSampler0, InUV.xy); }