UnrealEngine/Engine/Shaders/Private/Common.ush

// Copyright Epic Games, Inc. All Rights Reserved.

/*=============================================================================
	PlatformCommon.usf: Common shader code
=============================================================================*/

#pragma once

#include "/Engine/Public/Platform.ush"

#include "PackUnpack.ush"

// These types are used for material translator generated code, or any functions the translated code can call
#if PIXELSHADER && !FORCE_MATERIAL_FLOAT_FULL_PRECISION
	#define MaterialFloat half
	#define MaterialFloat2 half2
	#define MaterialFloat3 half3
	#define MaterialFloat4 half4
	#define MaterialFloat3x3 half3x3
	#define MaterialFloat4x4 half4x4
	#define MaterialFloat4x3 half4x3
#else
	// Material translated vertex shader code always uses floats,
	// Because it's used for things like world position and UVs
	#define MaterialFloat float
	#define MaterialFloat2 float2
	#define MaterialFloat3 float3
	#define MaterialFloat4 float4
	#define MaterialFloat3x3 float3x3
	#define MaterialFloat4x4 float4x4
	#define MaterialFloat4x3 float4x3
#endif

#ifndef COMPUTE_SHADED
#define COMPUTE_SHADED 0
#endif

struct FloatDeriv
{
	float Value;
	float Ddx;
	float Ddy;
};

struct FloatDeriv2
{
	float2 Value;
	float2 Ddx;
	float2 Ddy;
};

struct FloatDeriv3
{
	float3 Value;
	float3 Ddx;
	float3 Ddy;
};

struct FloatDeriv4
{
	float4 Value;
	float4 Ddx;
	float4 Ddy;
};

FloatDeriv ConstructFloatDeriv(float InValue, float InDdx, float InDdy)
{
	FloatDeriv Ret;
	Ret.Value = InValue;
	Ret.Ddx = InDdx;
	Ret.Ddy = InDdy;
	return Ret;
}

FloatDeriv2 ConstructFloatDeriv2(float2 InValue, float2 InDdx, float2 InDdy)
{
	FloatDeriv2 Ret;
	Ret.Value = InValue;
	Ret.Ddx = InDdx;
	Ret.Ddy = InDdy;
	return Ret;
}

FloatDeriv3 ConstructFloatDeriv3(float3 InValue, float3 InDdx, float3 InDdy)
{
	FloatDeriv3 Ret;
	Ret.Value = InValue;
	Ret.Ddx = InDdx;
	Ret.Ddy = InDdy;
	return Ret;
}

FloatDeriv4 ConstructFloatDeriv4(float4 InValue, float4 InDdx, float4 InDdy)
{
	FloatDeriv4 Ret;
	Ret.Value = InValue;
	Ret.Ddx = InDdx;
	Ret.Ddy = InDdy;
	return Ret;
}


#define FLOAT_MAX         (asfloat(0x7F7FFFFF)) // Largest representable finite float (~3.4028235e38)
#define POSITIVE_INFINITY (asfloat(0x7F800000))
#define NEGATIVE_INFINITY (asfloat(0xFF800000))

#define METER_TO_CENTIMETER		100.0f
#define CENTIMETER_TO_METER		(1.0f / METER_TO_CENTIMETER)
#define KILOMETER_TO_METER  	1000.0f
#define METER_TO_KILOMETER  	(1.0f / KILOMETER_TO_METER)
#define KILOMETER_TO_CENTIMETER	(KILOMETER_TO_METER * METER_TO_CENTIMETER)
#define CENTIMETER_TO_KILOMETER	(1.0f / KILOMETER_TO_CENTIMETER)

#define NearDepthValue (HAS_INVERTED_Z_BUFFER ? 1.0f : 0.0f)
#define FarDepthValue  (HAS_INVERTED_Z_BUFFER ? 0.0f : 1.0f)

float NearestDeviceDepth(float DepthA,
                         float DepthB,
                         float DepthC = FarDepthValue
)
{
#if HAS_INVERTED_Z_BUFFER
	return max3(DepthA, DepthB, DepthC);
#else
	return min3(DepthA, DepthB, DepthC);
#endif
}

float FarthestDeviceDepth(float DepthA,
                          float DepthB,
                          float DepthC = NearDepthValue
)
{
#if HAS_INVERTED_Z_BUFFER
	return min3(DepthA, DepthB, DepthC);
#else
	return max3(DepthA, DepthB, DepthC);
#endif
}

const static MaterialFloat PI = 3.1415926535897932f;
const static float MaxHalfFloat = 65504.0f;
const static float Max11BitsFloat = 65024.0f;
const static float Max10BitsFloat = 64512.0f;
const static float3 Max111110BitsFloat3 = float3(Max11BitsFloat, Max11BitsFloat, Max10BitsFloat);

#define SUPPORTS_TEXTURE_EXTERNAL		(COMPILER_GLSL_ES3_1)

#if !SUPPORTS_TEXTURE_EXTERNAL
#define TextureExternal Texture2D
#endif

#ifndef REGISTER
	#if COMPILER_HLSLCC
		#define REGISTER(x)
	#else
		#define REGISTER(x)		: register(x)
	#endif
#endif

#ifndef SUPPORTS_TEXTURECUBE_ARRAY
#define SUPPORTS_TEXTURECUBE_ARRAY 1
#endif

#if SUPPORTS_TEXTURECUBE_ARRAY == 0
	// Define TextureCubeArray to something which will compile so we can use it in uniform buffers
	#define TextureCubeArray TextureCube
#endif

// Control MIP level used for material texture fetches. By default only raytracing
// shaders (i.e., !PIXELSHADER) use manual MIP level selection. A material shader
// can opt. in to force a specific MIP level.
//
// * USE_FORCE_TEXTURE_MIP : enable/disable manual MIP level selection
// * FORCED_TEXTURE_MIP    : force a specific MIP level
//
#if COMPUTE_SHADED && !defined(USE_FORCE_TEXTURE_MIP)
	#define USE_FORCE_TEXTURE_MIP 0
#endif
#if !PIXELSHADER && !defined(USE_FORCE_TEXTURE_MIP)
	#define USE_FORCE_TEXTURE_MIP 1
#endif
#ifndef USE_FORCE_TEXTURE_MIP
	#define USE_FORCE_TEXTURE_MIP 0
#endif
#ifndef FORCED_TEXTURE_MIP
	#define FORCED_TEXTURE_MIP 0.0f
#endif

// Add definition of types used by generated uniform buffers
#include "GeneratedUniformBufferTypes.ush"

// Generated file that contains uniform buffer declarations needed by the shader being compiled
#include "/Engine/Generated/GeneratedUniformBuffers.ush"

// uniform buffers specifics
#include "CommonViewUniformBuffer.ush"

// In HLSL, fmod is implemented as 'Lhs - trunc(Lhs / Rhs) * Rhs'
// In some cases, using floor rather than trunc is better
float FmodFloor(float Lhs, float Rhs)
{
	return Lhs - floor(Lhs / Rhs) * Rhs;
}

float2 FmodFloor(float2 Lhs, float2 Rhs)
{
	return Lhs - floor(Lhs / Rhs) * Rhs;
}

float3 FmodFloor(float3 Lhs, float3 Rhs)
{
	return Lhs - floor(Lhs / Rhs) * Rhs;
}

float4 FmodFloor(float4 Lhs, float4 Rhs)
{
	return Lhs - floor(Lhs / Rhs) * Rhs;
}

float VectorSum(float V) { return V; }
float VectorSum(float2 V) { return V.x + V.y; }
float VectorSum(float3 V) { return V.x + V.y + V.z; }
float VectorSum(float4 V) { return V.x + V.y + V.z + V.w; }

#include "LargeWorldCoordinates.ush"

#include "InstancedStereo.ush"

#include "Definitions.usf"

#include "AssertionMacros.ush"

#ifndef VELOCITY_ENCODE_DEPTH
#define VELOCITY_ENCODE_DEPTH 1
#endif

#if FEATURE_LEVEL >= FEATURE_LEVEL_SM5
	#define VELOCITY_ENCODE_GAMMA 1
#else
	#define VELOCITY_ENCODE_GAMMA 0
#endif

#if OPENGL_PROFILE
	#define ENCODED_VELOCITY_TYPE uint4
#else
	#define ENCODED_VELOCITY_TYPE float4
#endif

//Tie Editor features to platform support and the COMPILE_SHADERS_FOR_DEVELOPMENT which is set via CVAR.
#define USE_EDITOR_SHADERS (PLATFORM_SUPPORTS_EDITOR_SHADERS && USE_DEVELOPMENT_SHADERS)

// Using SV_ClipDistance has overhead (15% slower base pass in triangle bound test scene on PS4) so projects have to opt-in
#define USE_GLOBAL_CLIP_PLANE (PROJECT_ALLOW_GLOBAL_CLIP_PLANE && !MATERIAL_DOMAIN_POSTPROCESS && !MATERIAL_DOMAIN_UI)

#ifndef RAYTRACINGSHADER
	#define RAYTRACINGSHADER (RAYHITGROUPSHADER || RAYMISSSHADER || RAYCALLABLESHADER)
#endif

#if RAYTRACINGSHADER

// These built-ins are not available in ray tracing
// Define dummy versions so that ray-tracing materials will at least compile
#define clip(x)
#define ddx(x) 0
#define ddy(x) 0
#define fwidth(x) 0

#endif

#ifndef USE_RAYTRACED_TEXTURE_RAYCONE_LOD
#define USE_RAYTRACED_TEXTURE_RAYCONE_LOD (RAYHITGROUPSHADER)
#endif // USE_RAYTRACED_TEXTURE_RAYCONE_LOD

static float GlobalTextureMipBias = 0;
static float GlobalRayCone_TexArea = 0;
float ComputeRayConeLod(Texture2D Tex)
{
#if USE_RAYTRACED_TEXTURE_RAYCONE_LOD
	uint2 Dimensions;
	Tex.GetDimensions(Dimensions.x, Dimensions.y);
	int TexArea = Dimensions.x * Dimensions.y;
	return 0.5f * log2(GlobalRayCone_TexArea * TexArea);
#else
    return FORCED_TEXTURE_MIP;
#endif
}

float  ClampToHalfFloatRange(float  X) { return clamp(X, float(0), MaxHalfFloat); }
float2 ClampToHalfFloatRange(float2 X) { return clamp(X, float(0).xx, MaxHalfFloat.xx); }
float3 ClampToHalfFloatRange(float3 X) { return clamp(X, float(0).xxx, MaxHalfFloat.xxx); }
float4 ClampToHalfFloatRange(float4 X) { return clamp(X, float(0).xxxx, MaxHalfFloat.xxxx); }

// This would need to be a #define in GLSL to ignore the SamplerState, however, it is currently a function call in HLSL
// for type checking of the parameters - ironically the type checking is really only needed in GLSL!
MaterialFloat4 Texture1DSample(Texture1D Tex, SamplerState Sampler, float UV)
{
#if USE_FORCE_TEXTURE_MIP
	return Tex.SampleLevel(Sampler, UV, 0);
#else
	return Tex.Sample(Sampler, UV);
#endif
}
MaterialFloat4 Texture2DSample(Texture2D Tex, SamplerState Sampler, float2 UV)
{
#if USE_FORCE_TEXTURE_MIP
	return Tex.SampleLevel(Sampler, UV, ComputeRayConeLod(Tex) + GlobalTextureMipBias);
#else
	return Tex.Sample(Sampler, UV);
#endif
}
MaterialFloat4 Texture2DSample(Texture2D Tex, SamplerState Sampler, FloatDeriv2 UV)
{
#if USE_FORCE_TEXTURE_MIP
	return Tex.SampleLevel(Sampler, UV.Value, ComputeRayConeLod(Tex) + GlobalTextureMipBias);
#else
	return Tex.SampleGrad(Sampler, UV.Value, UV.Ddx, UV.Ddy);
#endif
}
MaterialFloat Texture2DSample_A8(Texture2D Tex, SamplerState Sampler, float2 UV)
{
#if USE_FORCE_TEXTURE_MIP
	return Tex.SampleLevel(Sampler, UV, ComputeRayConeLod(Tex) + GlobalTextureMipBias) A8_SAMPLE_MASK;
#else
	return Tex.Sample(Sampler, UV) A8_SAMPLE_MASK;
#endif
}
MaterialFloat4 Texture3DSample(Texture3D Tex, SamplerState Sampler, float3 UV)
{
#if USE_FORCE_TEXTURE_MIP
	return Tex.SampleLevel(Sampler, UV, 0);
#else
	return Tex.Sample(Sampler, UV);
#endif
}
MaterialFloat4 TextureCubeSample(TextureCube Tex, SamplerState Sampler, float3 UV)
{
#if USE_FORCE_TEXTURE_MIP
	return Tex.SampleLevel(Sampler, UV, 0);
#else
	return Tex.Sample(Sampler, UV);
#endif
}
MaterialFloat4 Texture2DArraySample(Texture2DArray Tex, SamplerState Sampler, float3 UV)
{
#if USE_FORCE_TEXTURE_MIP
	return Tex.SampleLevel(Sampler, UV, 0);
#else
	return Tex.Sample(Sampler, UV);
#endif
}
MaterialFloat4 Texture1DSampleLevel(Texture1D Tex, SamplerState Sampler, float UV, MaterialFloat Mip)
{
	return Tex.SampleLevel(Sampler, UV, Mip);
}
MaterialFloat4 Texture2DSampleLevel(Texture2D Tex, SamplerState Sampler, float2 UV, MaterialFloat Mip)
{
	return Tex.SampleLevel(Sampler, UV, Mip);
}
MaterialFloat4 Texture2DSampleBias(Texture2D Tex, SamplerState Sampler, float2 UV, MaterialFloat MipBias)
{
#if USE_FORCE_TEXTURE_MIP
	return Tex.SampleLevel(Sampler, UV, ComputeRayConeLod(Tex) + MipBias + GlobalTextureMipBias);
#else
	return Tex.SampleBias(Sampler, UV, MipBias);
#endif
}
MaterialFloat4 Texture2DSampleGrad(Texture2D Tex, SamplerState Sampler, float2 UV, MaterialFloat2 DDX, MaterialFloat2 DDY)
{
	return Tex.SampleGrad(Sampler, UV, DDX, DDY);
}
MaterialFloat4 Texture3DSampleLevel(Texture3D Tex, SamplerState Sampler, float3 UV, MaterialFloat Mip)
{
	return Tex.SampleLevel(Sampler, UV, Mip);
}
MaterialFloat4 Texture3DSampleBias(Texture3D Tex, SamplerState Sampler, float3 UV, MaterialFloat MipBias)
{
#if USE_FORCE_TEXTURE_MIP
	return Tex.SampleLevel(Sampler, UV, 0);
#else
	return Tex.SampleBias(Sampler, UV, MipBias);
#endif
}
MaterialFloat4 Texture3DSampleGrad(Texture3D Tex, SamplerState Sampler, float3 UV, MaterialFloat3 DDX, MaterialFloat3 DDY)
{
	return Tex.SampleGrad(Sampler, UV, DDX, DDY);
}
MaterialFloat4 Texture2DArraySampleLevel(Texture2DArray Tex, SamplerState Sampler, float3 UV, MaterialFloat Mip)
{
	return Tex.SampleLevel(Sampler, UV, Mip);
}
MaterialFloat4 TextureCubeSampleLevel(TextureCube Tex, SamplerState Sampler, float3 UV, MaterialFloat Mip)
{
	return Tex.SampleLevel(Sampler, UV, Mip);
}
MaterialFloat TextureCubeSampleDepthLevel(TextureCube TexDepth, SamplerState Sampler, float3 UV, MaterialFloat Mip)
{
	return TexDepth.SampleLevel(Sampler, UV, Mip).x;
}
MaterialFloat4 TextureCubeSampleBias(TextureCube Tex, SamplerState Sampler, float3 UV, MaterialFloat MipBias)
{
#if USE_FORCE_TEXTURE_MIP
	return Tex.SampleLevel(Sampler, UV, 0);
#else
	return Tex.SampleBias(Sampler, UV, MipBias);
#endif
}
MaterialFloat4 TextureCubeSampleGrad(TextureCube Tex, SamplerState Sampler, float3 UV, MaterialFloat3 DDX, MaterialFloat3 DDY)
{
	return Tex.SampleGrad(Sampler, UV, DDX, DDY);
}
MaterialFloat4 TextureExternalSample(TextureExternal Tex, SamplerState Sampler, float2 UV)
{
#if USE_FORCE_TEXTURE_MIP
	#if SUPPORTS_TEXTURE_EXTERNAL
		return Tex.SampleLevel(Sampler, UV, 0);
	#else
		return Tex.SampleLevel(Sampler, UV, ComputeRayConeLod(Tex) + GlobalTextureMipBias);
	#endif
#else
	return Tex.Sample(Sampler, UV);
#endif
}
MaterialFloat4 TextureExternalSampleGrad(TextureExternal Tex, SamplerState Sampler, float2 UV, MaterialFloat2 DDX, MaterialFloat2 DDY)
{
	return Tex.SampleGrad(Sampler, UV, DDX, DDY);
}
MaterialFloat4 TextureExternalSampleLevel(TextureExternal Tex, SamplerState Sampler, float2 UV, MaterialFloat Mip)
{
	return Tex.SampleLevel(Sampler, UV, Mip);
}
MaterialFloat4 Texture2DGatherRed(Texture2D Tex, SamplerState Sampler, float2 UV)
{
	return Tex.GatherRed(Sampler, UV);
}
MaterialFloat4 Texture2DGatherGreen(Texture2D Tex, SamplerState Sampler, float2 UV)
{
	return Tex.GatherGreen(Sampler, UV);
}
MaterialFloat4 Texture2DGatherBlue(Texture2D Tex, SamplerState Sampler, float2 UV)
{
	return Tex.GatherBlue(Sampler, UV);
}
MaterialFloat4 Texture2DGatherAlpha(Texture2D Tex, SamplerState Sampler, float2 UV)
{
	return Tex.GatherAlpha(Sampler, UV);
}
MaterialFloat4 TextureCubeGatherRed(TextureCube Tex, SamplerState Sampler, float3 UV)
{
	uint StatusIgnored;
	return Tex.GatherRed(Sampler, UV, StatusIgnored);
}
MaterialFloat4 TextureCubeGatherGreen(TextureCube Tex, SamplerState Sampler, float3 UV)
{
	uint StatusIgnored;
	return Tex.GatherGreen(Sampler, UV, StatusIgnored);
}
MaterialFloat4 TextureCubeGatherBlue(TextureCube Tex, SamplerState Sampler, float3 UV)
{
	uint StatusIgnored;
	return Tex.GatherBlue(Sampler, UV, StatusIgnored);
}
MaterialFloat4 TextureCubeGatherAlpha(TextureCube Tex, SamplerState Sampler, float3 UV)
{
	uint StatusIgnored;
	return Tex.GatherAlpha(Sampler, UV, StatusIgnored);
}
MaterialFloat4 Texture2DArrayGatherRed(Texture2DArray Tex, SamplerState Sampler, float3 UV)
{
	return Tex.GatherRed(Sampler, UV);
}
MaterialFloat4 Texture2DArrayGatherGreen(Texture2DArray Tex, SamplerState Sampler, float3 UV)
{
	return Tex.GatherGreen(Sampler, UV);
}
MaterialFloat4 Texture2DArrayGatherBlue(Texture2DArray Tex, SamplerState Sampler, float3 UV)
{
	return Tex.GatherBlue(Sampler, UV);
}
MaterialFloat4 Texture2DArrayGatherAlpha(Texture2DArray Tex, SamplerState Sampler, float3 UV)
{
	return Tex.GatherAlpha(Sampler, UV);
}

// Re-routed texture sampling for decals
// On iOS and Android MALI devices decal UVs has issues with deriavatives on polygon edges resulting in 'wireframe' rendering artifacts (UE-50971)
// to workaround we always sample top mip level
MaterialFloat4 Texture1DSample_Decal(Texture1D Tex, SamplerState Sampler, float UV)
{
#if METAL_ES3_1_PROFILE || COMPILER_GLSL_ES3_1
	return Texture1DSampleLevel(Tex, Sampler, UV, 0);
#else
	return Texture1DSample(Tex, Sampler, UV);
#endif
}
MaterialFloat4 Texture2DSample_Decal(Texture2D Tex, SamplerState Sampler, float2 UV)
{
#if METAL_ES3_1_PROFILE || COMPILER_GLSL_ES3_1
	return Texture2DSampleLevel(Tex, Sampler, UV, 0);
#else
	return Texture2DSample(Tex, Sampler, UV);
#endif
}
MaterialFloat4 Texture3DSample_Decal(Texture3D Tex, SamplerState Sampler, float3 UV)
{
#if METAL_ES3_1_PROFILE || COMPILER_GLSL_ES3_1
	return Texture3DSampleLevel(Tex, Sampler, UV, 0);
#else
	return Texture3DSample(Tex, Sampler, UV);
#endif
}
MaterialFloat4 Texture2DArraySample_Decal(Texture2DArray Tex, SamplerState Sampler, float3 UV)
{
#if METAL_ES3_1_PROFILE || COMPILER_GLSL_ES3_1
	return Texture2DArraySampleLevel(Tex, Sampler, UV, 0);
#else
	return Texture2DArraySample(Tex, Sampler, UV);
#endif
}
MaterialFloat4 TextureCubeSample_Decal(TextureCube Tex, SamplerState Sampler, float3 UV)
{
#if METAL_ES3_1_PROFILE || COMPILER_GLSL_ES3_1
	return TextureCubeSampleLevel(Tex, Sampler, UV, 0);
#else
	return TextureCubeSample(Tex, Sampler, UV);
#endif
}
MaterialFloat4 TextureExternalSample_Decal(TextureExternal Tex, SamplerState Sampler, float2 UV)
{
#if METAL_ES3_1_PROFILE || COMPILER_GLSL_ES3_1
	return TextureExternalSampleLevel(Tex, Sampler, UV, 0);
#else
	return TextureExternalSample(Tex, Sampler, UV);
#endif
}

MaterialFloat4 Texture2DArraySampleBias(Texture2DArray Tex, SamplerState Sampler, float3 UV, MaterialFloat MipBias)
{
#if USE_FORCE_TEXTURE_MIP
	return Tex.SampleLevel(Sampler, UV, 0);
#else
	return Tex.SampleBias(Sampler, UV, MipBias);
#endif
}
MaterialFloat4 Texture2DArraySampleGrad(Texture2DArray Tex, SamplerState Sampler, float3 UV, MaterialFloat2 DDX, MaterialFloat2 DDY)
{
	return Tex.SampleGrad(Sampler, UV, DDX, DDY);
}

//converts an input 1d to 2d position. Useful for locating z frames that have been laid out in a 2d grid like a flipbook.
float2 Tile1Dto2D(float xsize, float idx)
{
	float2 xyidx = 0;
	xyidx.y = floor(idx / xsize);
	xyidx.x = idx - xsize * xyidx.y;

	return xyidx;
}

// return a pseudovolume texture sample.
// useful for simulating 3D texturing with a 2D texture or as a texture flipbook with lerped transitions
// treats 2d layout of frames a 3d texture and performs bilinear filtering by blending with an offset Z frame.
// Wrap repeat mode along XY is not seamless. This is however enough for current sampling use cases all in [0,1].
// @param Tex          = Input Texture Object storing Volume Data
// @param inPos        = Input float3 for Position, 0-1
// @param xysize       = Input float for num frames in x,y directions
// @param numframes    = Input float for num total frames
// @param mipmode      = Sampling mode: 0 = use miplevel, 1 = use UV computed gradients, 2 = Use gradients (default=0)
// @param miplevel     = MIP level to use in mipmode=0 (default 0)
// @param InDDX, InDDY = Texture gradients in mipmode=2
float4 PseudoVolumeTexture(Texture2D Tex, SamplerState TexSampler, float3 InPos, float2 XYSize, float NumFrames,
	uint MipMode = 0, float MipLevel = 0, float2 InDDX = 0, float2 InDDY = 0)
{
	float Z = InPos.z - 0.5f / NumFrames;	// This offset is needed to have a behavior consistent with hardware sampling (voxel value is at their center)
	float ZFrame = floor(Z * NumFrames);
	float ZPhase = frac(Z * NumFrames);

	float2 UV = frac(InPos.xy) / XYSize;

	float2 CurFrame = Tile1Dto2D(XYSize.x, ZFrame) / XYSize;
	float2 NextFrame = Tile1Dto2D(XYSize.x, ZFrame + 1) / XYSize;

	float2 UVCurFrame = UV + CurFrame;
	float2 UVNextFrame = UV + NextFrame;

	float4 SampleA = 0, SampleB = 0;
	switch (MipMode)
	{
	case 0: // Mip level
		SampleA = Tex.SampleLevel(TexSampler, UVCurFrame, MipLevel);
		SampleB = Tex.SampleLevel(TexSampler, UVNextFrame, MipLevel);
		break;
	case 1: // Gradients automatic from UV
		SampleA = Texture2DSample(Tex, TexSampler, UVCurFrame);
		SampleB = Texture2DSample(Tex, TexSampler, UVNextFrame);
		break;
	case 2: // Deriviatives provided
		SampleA = Tex.SampleGrad(TexSampler, UVCurFrame,  InDDX, InDDY);
		SampleB = Tex.SampleGrad(TexSampler, UVNextFrame, InDDX, InDDY);
		break;
	default:
		break;
	}

	return lerp(SampleA, SampleB, ZPhase);
}

// return a pseudovolume texture sample where the input is a 4 channel packed texure that has neighbour slices.
// useful for simulating 3D texturing with a 2D texture or as a texture flipbook with lerped transitions
// treats 2d layout of frames a 3d texture and performs bilinear filtering by blending with an offset Z frame.
// Wrap repeat mode along XY is not seamless. This is however enough for current sampling use cases all in [0,1].
// @param Tex          = Input Texture Object storing Volume Data
// @param inPos        = Input float3 for Position, 0-1
// @param xysize       = Input float for num frames in x,y directions
// @param numframes    = Input float for num total frames
// @param mipmode      = Sampling mode: 0 = use miplevel, 1 = use UV computed gradients, 2 = Use gradients (default=0)
// @param miplevel     = MIP level to use in mipmode=0 (default 0)
// @param InDDX, InDDY = Texture gradients in mipmode=2
float2 PseudoVolumeTexture2ChannelPacked(Texture2D Tex, SamplerState TexSampler, float3 InPos, float2 XYSize, float NumFrames,
	uint MipMode = 0, float MipLevel = 0, float2 InDDX = 0, float2 InDDY = 0)
{
	float Z = InPos.z - 0.5f / NumFrames;	// This offset is needed to have a behavior consistent with hardware sampling (voxel value is at their center)
	float ZFrame = floor(Z * NumFrames);
	float ZPhase = frac(Z * NumFrames);

	float2 UV = frac(InPos.xy) / XYSize;

	float2 CurFrame = Tile1Dto2D(XYSize.x, ZFrame) / XYSize;

	float2 UVCurFrame = UV + CurFrame;

	float4 Sample = 0;
	switch (MipMode)
	{
	case 0: // Mip level
		Sample = Tex.SampleLevel(TexSampler, UVCurFrame, MipLevel);
		break;
	case 1: // Gradients automatic from UV
		Sample = Texture2DSample(Tex, TexSampler, UVCurFrame);
		break;
	case 2: // Deriviatives provided
		Sample = Tex.SampleGrad(TexSampler, UVCurFrame,  InDDX, InDDY);
		break;
	default:
		break;
	}

	return lerp(Sample.rb, Sample.ga, ZPhase);
}

#if FEATURE_LEVEL >= FEATURE_LEVEL_SM5 // Cubemap arrays are not supported in SM4 feature level
MaterialFloat4 TextureCubeArraySample(TextureCubeArray Tex, SamplerState Sampler, float4 UV)
{
	return Tex.Sample(Sampler, UV);
}

MaterialFloat4 TextureCubeArraySampleLevel(TextureCubeArray Tex, SamplerState Sampler, float4 UV, MaterialFloat Mip)
{
	return Tex.SampleLevel(Sampler, UV, Mip);
}

MaterialFloat4 TextureCubeArraySampleBias(TextureCubeArray Tex, SamplerState Sampler, float4 UV, MaterialFloat MipBias)
{
#if USE_FORCE_TEXTURE_MIP
	return Tex.SampleLevel(Sampler, UV, 0);
#else
	return Tex.SampleBias(Sampler, UV, MipBias);
#endif
}

MaterialFloat4 TextureCubeArraySampleGrad(TextureCubeArray Tex, SamplerState Sampler, float4 UV, MaterialFloat3 DDX, MaterialFloat3 DDY)
{
	return Tex.SampleGrad(Sampler, UV, DDX, DDY);
}

// OVerload with explicit array parameter, used by some non-material shader code
MaterialFloat4 TextureCubeArraySampleLevel(TextureCubeArray Tex, SamplerState Sampler, float3 UV, float ArrayIndex, MaterialFloat Mip)
{
	return TextureCubeArraySampleLevel(Tex, Sampler, float4(UV, ArrayIndex), Mip);
}

MaterialFloat4 TextureCubeArrayGatherRed(TextureCubeArray Tex, SamplerState Sampler, float4 UV)
{
	uint StatusIgnored;
	return Tex.GatherRed(Sampler, UV, StatusIgnored);
}
MaterialFloat4 TextureCubeArrayGatherGreen(TextureCubeArray Tex, SamplerState Sampler, float4 UV)
{
	uint StatusIgnored;
	return Tex.GatherGreen(Sampler, UV, StatusIgnored);
}
MaterialFloat4 TextureCubeArrayGatherBlue(TextureCubeArray Tex, SamplerState Sampler, float4 UV)
{
	uint StatusIgnored;
	return Tex.GatherBlue(Sampler, UV, StatusIgnored);
}
MaterialFloat4 TextureCubeArrayGatherAlpha(TextureCubeArray Tex, SamplerState Sampler, float4 UV)
{
	uint StatusIgnored;
	return Tex.GatherAlpha(Sampler, UV, StatusIgnored);
}
#endif // FEATURE_LEVEL >= FEATURE_LEVEL_SM5

// TANGENTTOWORLD0 is the first row of the tangent to world matrix, w might be needed for padding and is not used yet.
// TANGENTTOWORLD2 is the last row of the tangent to world matrix, determinant of tangent basis in w

// Helper macro to determine whether we need to separately interpolate the world vertex normal to the pixel center.
// Currently only curvature-to-roughness needs this interpolation, so disable it when it's not used to save on interpolants.
#define USE_WORLDVERTEXNORMAL_CENTER_INTERPOLATION	(FEATURE_LEVEL >= FEATURE_LEVEL_SM5 && MATERIAL_NORMAL_CURVATURE_TO_ROUGHNESS)

#if FEATURE_LEVEL >= FEATURE_LEVEL_SM5

	#define TANGENTTOWORLD0					TEXCOORD10
	#define TANGENTTOWORLD2					TEXCOORD11

	// _centroid is needed to get better quality with MSAA

	// The D3D shader compiler combines _centroid and non centroid. Using float3 would result in a internal
	// shader compiler error. This block is using float4 to prevent that.
#if USE_WORLDVERTEXNORMAL_CENTER_INTERPOLATION
	#define TANGENTTOWORLD_INTERPOLATOR_BLOCK	float4 TangentToWorld0 : TEXCOORD10_centroid; float4	TangentToWorld2	: TEXCOORD11_centroid; \
		float4	TangentToWorld2_Center	: TEXCOORD15;
#else
	#define TANGENTTOWORLD_INTERPOLATOR_BLOCK	float4 TangentToWorld0 : TEXCOORD10_centroid; float4	TangentToWorld2	: TEXCOORD11_centroid;
#endif

#else
	#define TANGENTTOWORLD0					TEXCOORD10
	#define TANGENTTOWORLD2					TEXCOORD11
	#if MOBILE_EMULATION
		#define TANGENTTOWORLD_INTERPOLATOR_BLOCK	float4 TangentToWorld0 : TANGENTTOWORLD0; float4 TangentToWorld2 : TANGENTTOWORLD2;
	#else
		#define TANGENTTOWORLD_INTERPOLATOR_BLOCK	half4 TangentToWorld0 : TANGENTTOWORLD0; half4 TangentToWorld2 : TANGENTTOWORLD2;
	#endif
#endif


float3 LuminanceFactors()
{
#if UE_LEGACY_LUMINANCE_FACTORS
	return float3(0.3, 0.59, 0.11);
#else
	#if WORKING_COLOR_SPACE_IS_SRGB || MATERIAL_DOMAIN_UI
		return float3(0.2126390059, 0.7151686788, 0.0721923154);
	#else
		return float3(WORKING_COLOR_SPACE_RGB_TO_XYZ_MAT._m10_m11_m12);
	#endif
#endif
}

MaterialFloat Luminance( MaterialFloat3 LinearColor )
{
	return dot( LinearColor, MaterialFloat3(LuminanceFactors()));
}

float length2(float2 v)
{
	return dot(v, v);
}
float length2(float3 v)
{
	return dot(v, v);
}
float length2(float4 v)
{
	return dot(v, v);
}

uint Mod(uint a, uint b)
{
#if FEATURE_LEVEL >= FEATURE_LEVEL_ES3_1
	return a % b;
#else
	return a - (b * (uint)((float)a / (float)b));
#endif
}

uint2 Mod(uint2 a, uint2 b)
{
#if FEATURE_LEVEL >= FEATURE_LEVEL_ES3_1
	return a % b;
#else
	return a - (b * (uint2)((float2)a / (float2)b));
#endif
}

uint3 Mod(uint3 a, uint3 b)
{
#if FEATURE_LEVEL >= FEATURE_LEVEL_ES3_1
	return a % b;
#else
	return a - (b * (uint3)((float3)a / (float3)b));
#endif
}

#define POW_CLAMP 0.000001f

// Clamp the base, so it's never <= 0.0f (INF/NaN).
MaterialFloat ClampedPow(MaterialFloat X,MaterialFloat Y)
{
	return pow(max(abs(X),POW_CLAMP),Y);
}
MaterialFloat2 ClampedPow(MaterialFloat2 X,MaterialFloat2 Y)
{
	return pow(max(abs(X),MaterialFloat2(POW_CLAMP,POW_CLAMP)),Y);
}
MaterialFloat3 ClampedPow(MaterialFloat3 X,MaterialFloat3 Y)
{
	return pow(max(abs(X),MaterialFloat3(POW_CLAMP,POW_CLAMP,POW_CLAMP)),Y);
}
MaterialFloat4 ClampedPow(MaterialFloat4 X,MaterialFloat4 Y)
{
	return pow(max(abs(X),MaterialFloat4(POW_CLAMP,POW_CLAMP,POW_CLAMP,POW_CLAMP)),Y);
}

// Pow function that will return 0 if Base is <= 0 (or small enough to yield a floating point special).
// This is done to prevent floating point specials when compilers expands pow into exp(Exponent * log(Base)).
MaterialFloat PositiveClampedPow(MaterialFloat Base, MaterialFloat Exponent)
{
	return (Base <= 2.980233e-8f) ? 0.0f : pow(Base, Exponent);
}
MaterialFloat2 PositiveClampedPow(MaterialFloat2 Base, MaterialFloat2 Exponent)
{
	return MaterialFloat2(PositiveClampedPow(Base.x, Exponent.x), PositiveClampedPow(Base.y, Exponent.y));
}
MaterialFloat3 PositiveClampedPow(MaterialFloat3 Base, MaterialFloat3 Exponent)
{
	return MaterialFloat3(PositiveClampedPow(Base.xy, Exponent.xy), PositiveClampedPow(Base.z, Exponent.z));
}
MaterialFloat4 PositiveClampedPow(MaterialFloat4 Base, MaterialFloat4 Exponent)
{
	return MaterialFloat4(PositiveClampedPow(Base.xy, Exponent.xy), PositiveClampedPow(Base.zw, Exponent.zw));
}

float DDX(float Input)
{
#if USE_FORCE_TEXTURE_MIP
	return 0;
#else
	return ddx(Input);
#endif
}

float2 DDX(float2 Input)
{
#if USE_FORCE_TEXTURE_MIP
	return 0;
#else
	return ddx(Input);
#endif
}

float3 DDX(float3 Input)
{
#if USE_FORCE_TEXTURE_MIP
	return 0;
#else
	return ddx(Input);
#endif
}

float4 DDX(float4 Input)
{
#if USE_FORCE_TEXTURE_MIP
	return 0;
#else
	return ddx(Input);
#endif
}

float DDY(float Input)
{
#if USE_FORCE_TEXTURE_MIP
	return 0;
#else
	return ddy(Input);
#endif
}

float2 DDY(float2 Input)
{
#if USE_FORCE_TEXTURE_MIP
	return 0;
#else
	return ddy(Input);
#endif
}

float3 DDY(float3 Input)
{
#if USE_FORCE_TEXTURE_MIP
	return 0;
#else
	return ddy(Input);
#endif
}

float4 DDY(float4 Input)
{
#if USE_FORCE_TEXTURE_MIP
	return 0;
#else
	return ddy(Input);
#endif
}

#include "FastMath.ush"
#include "Random.ush"	// used by MaterialExpressionNoise

/**
 * Use this function to compute the pow() in the specular computation.
 * This allows to change the implementation depending on platform or it easily can be replaced by some approxmation.
 */
MaterialFloat PhongShadingPow(MaterialFloat X, MaterialFloat Y)
{
	// The following clamping is done to prevent NaN being the result of the specular power computation.
	// Clamping has a minor performance cost.

	// In HLSL pow(a, b) is implemented as exp2(log2(a) * b).

	// For a=0 this becomes exp2(-inf * 0) = exp2(NaN) = NaN.

	// As seen in #TTP 160394 "QA Regression: PS3: Some maps have black pixelated artifacting."
	// this can cause severe image artifacts (problem was caused by specular power of 0, lightshafts propagated this to other pixels).
	// The problem appeared on PlayStation 3 but can also happen on similar PC NVidia hardware.

	// In order to avoid platform differences and rarely occuring image atrifacts we clamp the base.

	// Note: Clamping the exponent seemed to fix the issue mentioned TTP but we decided to fix the root and accept the
	// minor performance cost.

	return ClampedPow(X, Y);
}

#if FEATURE_LEVEL < FEATURE_LEVEL_ES3_1 && !COMPILER_METAL
	// DX11 (feature levels >= 10) feature sets natively supports uints in shaders; we just use floats on other platforms.
	#define uint4	int4
#endif


// Helper macro used to interpolate the given member
#define INTERPOLATE_MEMBER(member) O.member = a.member * aInterp + b.member * bInterp


#if FEATURE_LEVEL >= FEATURE_LEVEL_SM4
	/**
		* Number of MSAA samples supported by deferred passes in D3D11.
		* This is hardcoded because it allows deferred passes to optimize for the given value (for example, unrolling a loop).
		*/
	#define NumMSAASamples 4
#endif

// shadow and light function
Texture2D		LightAttenuationTexture;
SamplerState	LightAttenuationTextureSampler;

// We don't use an inline function so we can avoid type promotion/ coercion.
#define RETURN_COLOR( Color ) ( Color )

// Convert from unorm to snorm and viceversa
float ConvertTangentUnormToSnorm8(float Input)
{
	int IntVal = int(round(Input * 255.0f));
	//negate
	IntVal = select(IntVal > 127, IntVal | 0xFFFFFF80, IntVal);
	return clamp(IntVal / 127.0f, -1, 1);
}
float2 ConvertTangentUnormToSnorm8(float2 Input)
{
	int2 IntVal = int2(round(Input * 255.0f));
	//negate
	IntVal = select(IntVal > 127, IntVal | 0xFFFFFF80, IntVal);
	return clamp(IntVal / 127.0f, -1, 1);
}
float3 ConvertTangentUnormToSnorm8(float3 Input)
{
	int3 IntVal = int3(round(Input * 255.0f));
	IntVal = select(IntVal > 127, IntVal | 0xFFFFFF80, IntVal);
	return clamp(IntVal / 127.0f, -1, 1);
}
float4 ConvertTangentUnormToSnorm8(float4 Input)
{
	int4 IntVal = int4(round(Input * 255.0f));
	//negate
	IntVal = select(IntVal > 127, IntVal | 0xFFFFFF80, IntVal);
	return clamp(IntVal / 127.0f, -1, 1);
}
float ConvertTangentUnormToSnorm16(float Input)
{
	int IntVal = int(round(Input * 65535.0f));
	//negate
	IntVal = select(IntVal > 32767, IntVal | 0xFFFF8000, IntVal);
	return clamp(IntVal / 32767.0f, -1, 1);
}
float2 ConvertTangentUnormToSnorm16(float2 Input)
{
	int2 IntVal = int2(round(Input * 65535.0f));
	//negate
	IntVal = select(IntVal > 32767, IntVal | 0xFFFFFF80, IntVal);
	return clamp(IntVal / 32767.0f, -1, 1);
}
float3 ConvertTangentUnormToSnorm16(float3 Input)
{
	int3 IntVal = int3(round(Input * 65535.0f));
	IntVal = select(IntVal > 32767, IntVal | 0xFFFFFF80, IntVal);
	return clamp(IntVal / 32767.0f, -1, 1);
}
float4 ConvertTangentUnormToSnorm16(float4 Input)
{
	int4 IntVal = int4(round(Input * 65535.0f));
	//negate
	IntVal = select(IntVal > 32767, IntVal | 0xFFFFFF80, IntVal);
	return clamp(IntVal / 32767.0f, -1, 1);
}
float ConvertTangentSnormToUnorm8(float Input)
{
	float Res = select(Input >= 0.0f, Input * 127, ((Input + 1.0) * 127) + 128);
	return clamp(Res / 255, 0.0f, 0.99f);
}
float2 ConvertTangentSnormToUnorm8(float2 Input)
{
	float2 Res = select(Input >= 0.0f, Input * 127, ((Input + 1.0) * 127) + 128);
	return clamp(Res / 255, 0.0f, 0.99f);
}
float3 ConvertTangentSnormToUnorm8(float3 Input)
{
	float3 Res = select(Input >= 0.0f, Input * 127, ((Input + 1.0) * 127) + 128);
	return clamp(Res / 255, 0.0f, 0.99f);
}
float4 ConvertTangentSnormToUnorm8(float4 Input)
{
	float4 Res = select(Input >= 0.0f, Input * 127, ((Input + 1.0) * 127) + 128);
	return clamp(Res / 255, 0.0f, 0.99f);
}
float ConvertTangentSnormToUnorm16(float Input)
{
	float Res = select(Input >= 0.0f, Input * 32767, ((Input + 1.0) * 32767) + 32768);
	return clamp(Res / 65535, 0.0f, 0.99f);
}
float2 ConvertTangentSnormToUnorm16(float2 Input)
{
	float2 Res = select(Input >= 0.0f, Input * 32767, ((Input + 1.0) * 32767) + 32768);
	return clamp(Res / 65535, 0.0f, 0.99f);
}
float3 ConvertTangentSnormToUnorm16(float3 Input)
{
	float3 Res = select(Input >= 0.0f, Input * 32767, ((Input + 1.0) * 32767) + 32768);
	return clamp(Res / 65535, 0.0f, 0.99f);
}
float4 ConvertTangentSnormToUnorm16(float4 Input)
{
	float4 Res = select(Input >= 0.0f, Input * 32767, ((Input + 1.0) * 32767) + 32768);
	return clamp(Res / 65535, 0.0f, 0.99f);
}

// Tangent space bias/unbias
// We don't use a function so we can avoid type promotion/ coercion.
#define TangentBias(X)		(X)
#define TangentUnbias(X)	(X)

float Square( float x )
{
	return x*x;
}

float2 Square( float2 x )
{
	return x*x;
}

float3 Square( float3 x )
{
	return x*x;
}

float4 Square( float4 x )
{
	return x*x;
}

float Pow2( float x )
{
	return x*x;
}

float2 Pow2( float2 x )
{
	return x*x;
}

float3 Pow2( float3 x )
{
	return x*x;
}

float4 Pow2( float4 x )
{
	return x*x;
}

float Pow3( float x )
{
	return x*x*x;
}

float2 Pow3( float2 x )
{
	return x*x*x;
}

float3 Pow3( float3 x )
{
	return x*x*x;
}

float4 Pow3( float4 x )
{
	return x*x*x;
}

float Pow4( float x )
{
	float xx = x*x;
	return xx * xx;
}

float2 Pow4( float2 x )
{
	float2 xx = x*x;
	return xx * xx;
}

float3 Pow4( float3 x )
{
	float3 xx = x*x;
	return xx * xx;
}

float4 Pow4( float4 x )
{
	float4 xx = x*x;
	return xx * xx;
}

float Pow5( float x )
{
	float xx = x*x;
	return xx * xx * x;
}

float2 Pow5( float2 x )
{
	float2 xx = x*x;
	return xx * xx * x;
}

float3 Pow5( float3 x )
{
	float3 xx = x*x;
	return xx * xx * x;
}

float4 Pow5( float4 x )
{
	float4 xx = x*x;
	return xx * xx * x;
}

float Pow6( float x )
{
	float xx = x*x;
	return xx * xx * xx;
}

float2 Pow6( float2 x )
{
	float2 xx = x*x;
	return xx * xx * xx;
}

float3 Pow6( float3 x )
{
	float3 xx = x*x;
	return xx * xx * xx;
}

float4 Pow6( float4 x )
{
	float4 xx = x*x;
	return xx * xx * xx;
}

// Only valid for x >= 0
MaterialFloat AtanFast( MaterialFloat x )
{
	// Minimax 3 approximation
	MaterialFloat3 A = x < 1 ? MaterialFloat3( x, 0, 1 ) : MaterialFloat3( 1/x, 0.5 * PI, -1 );
	return A.y + A.z * ( ( ( -0.130234 * A.x - 0.0954105 ) * A.x + 1.00712 ) * A.x - 0.00001203333 );
}

/** Converts a linear input value into a value to be stored in the light attenuation buffer. */
MaterialFloat EncodeLightAttenuation(MaterialFloat InColor)
{
	// Apply a 1/2 power to the input, which allocates more bits for the darks and prevents banding
	// Similar to storing colors in gamma space, except this uses less instructions than a pow(x, 1/2.2)
	return sqrt(InColor);
}

/** Converts a linear input value into a value to be stored in the light attenuation buffer. */
MaterialFloat4 EncodeLightAttenuation(MaterialFloat4 InColor)
{
	return sqrt(InColor);
}

/** Converts value stored in the light attenuation buffer into a linear light attenuation value. */
MaterialFloat DecodeLightAttenuation(MaterialFloat InColor)
{
	return Square(InColor);
}

/** Converts value stored in the light attenuation buffer into a linear light attenuation value. */
MaterialFloat4 DecodeLightAttenuation(MaterialFloat4 InColor)
{
	return Square(InColor);
}

// Like RGBM but this can be interpolated.
MaterialFloat4 RGBTEncode(MaterialFloat3 Color)
{
	MaterialFloat4 RGBT;
	MaterialFloat Max = max(max(Color.r, Color.g), max(Color.b, 1e-6));
	MaterialFloat RcpMax = rcp(Max);
	RGBT.rgb = Color.rgb * RcpMax;
	RGBT.a = Max * rcp(1.0 + Max);
	return RGBT;
}

MaterialFloat3 RGBTDecode(MaterialFloat4 RGBT)
{
	RGBT.a = RGBT.a * rcp(1.0 - RGBT.a);
	return RGBT.rgb * RGBT.a;
}


MaterialFloat4 RGBMEncode( MaterialFloat3 Color )
{
	Color *= 1.0 / 64.0;

	float4 rgbm;
	rgbm.a = saturate( max( max( Color.r, Color.g ), max( Color.b, 1e-6 ) ) );
	rgbm.a = ceil( rgbm.a * 255.0 ) / 255.0;
	rgbm.rgb = Color / rgbm.a;
	return rgbm;
}

MaterialFloat4 RGBMEncodeFast( MaterialFloat3 Color )
{
	// 0/0 result written to fixed point buffer goes to zero
	MaterialFloat4 rgbm;
	rgbm.a = dot( Color, 255.0 / 64.0 );
	rgbm.a = ceil( rgbm.a );
	rgbm.rgb = Color / rgbm.a;
	rgbm *= MaterialFloat4( 255.0 / 64.0, 255.0 / 64.0, 255.0 / 64.0, 1.0 / 255.0 );
	return rgbm;
}

MaterialFloat3 RGBMDecode( MaterialFloat4 rgbm, MaterialFloat MaxValue )
{
	return rgbm.rgb * (rgbm.a * MaxValue);
}

MaterialFloat3 RGBMDecode( MaterialFloat4 rgbm )
{
	return rgbm.rgb * (rgbm.a * 64.0f);
}

MaterialFloat4 RGBTEncode8BPC(MaterialFloat3 Color, MaterialFloat Range)
{
	MaterialFloat Max = max(max(Color.r, Color.g), max(Color.b, 1e-6));
	Max = min(Max, Range);

	MaterialFloat4 RGBT;
	RGBT.a = (Range + 1) / Range *  Max / (1 + Max);

	// quantise alpha to 8 bit.
	RGBT.a = ceil(RGBT.a*255.0) / 255.0;
	Max = RGBT.a / (1 + 1 / Range - RGBT.a);

	MaterialFloat RcpMax = rcp(Max);
	RGBT.rgb = Color.rgb * RcpMax;
	return RGBT;
}

MaterialFloat3 RGBTDecode8BPC(MaterialFloat4 RGBT, MaterialFloat Range)
{
	RGBT.a = RGBT.a / (1 + 1 / Range - RGBT.a);
	return RGBT.rgb * RGBT.a;
}

/** Get render target write mask value
  * This gets a bit from a write mask texture created with FRTWriteMaskDecodeCS. Only supprted on some platforms.
  */
#if PLATFORM_SUPPORTS_RENDERTARGET_WRITE_MASK
uint DecodeRTWriteMask(uint2 PixelPos, Texture2D<uint> RTWriteMaskTexture, uint NumEncodedTextures)
{
	uint2 TileIndex = PixelPos >> 3;
	uint2 TileOffset = (PixelPos >> 2) & 1;

	uint Shift = (TileOffset.y * 2 + TileOffset.x) * NumEncodedTextures;
	uint Mask = ~((~0u) << NumEncodedTextures);

	return (RTWriteMaskTexture.Load(uint3(TileIndex, 0)) >> Shift) & Mask;
}
#endif

/** Calculates the ScreenUV given the screen position and an offset fraction. */
float2 CalcScreenUVFromOffsetFraction(float4 ScreenPosition, float2 OffsetFraction)
{
	float2 NDC = ScreenPosition.xy / ScreenPosition.w;
	// Apply the offset in NDC space so that it is consistent regardless of scene color buffer size
	// Clamp to valid area of the screen to avoid reading garbage
	//@todo - soft clamp
	float2 OffsetNDC = clamp(NDC + OffsetFraction * float2(2, -2), -.999f, .999f);
	return float2(OffsetNDC * ResolvedView.ScreenPositionScaleBias.xy + ResolvedView.ScreenPositionScaleBias.wz);
}

float4 GetPerPixelLightAttenuation(float2 UV)
{
	return DecodeLightAttenuation(Texture2DSampleLevel(LightAttenuationTexture, LightAttenuationTextureSampler, UV, 0));
}

// Returns whether or not the given projection matrix is orthographic
bool IsOrthoProjection(float4x4 ViewToClip)
{
	return ViewToClip._44 >= 1.0f;
}

// Return whether View has ortho or perspective projection
bool IsOrthoProjection(ViewState InView)
{
	return IsOrthoProjection(InView.ViewToClip);
}

// Returns whether or not the default view is orthographic
bool IsOrthoProjection()
{
	return IsOrthoProjection(View.ViewToClip);
}

// Returns whether or not the given projection matrix is orthographic via float
float IsOrthoProjectionFloat()
{
	return select(IsOrthoProjection(), 1.0f, 0.0f);
}

// inverse operation of ConvertFromDeviceZ()
// @param SceneDepth (linear in world units, W)
// @return DeviceZ (Z/W)
float ConvertToDeviceZ(float SceneDepth)
{
	FLATTEN
	if (IsOrthoProjection())
	{
		// Ortho
		return SceneDepth * View.ViewToClip[2][2] + View.ViewToClip[3][2];
	}
	else
	{
		// Perspective
		return 1.0f / ((SceneDepth + View.InvDeviceZToWorldZTransform[3]) * View.InvDeviceZToWorldZTransform[2]);
	}
}

// also see ConvertToDeviceZ()
// @param DeviceZ value that is stored in the depth buffer (Z/W)
// @return SceneDepth (linear in world units, W)
float ConvertFromDeviceZ(float DeviceZ)
{
	// Supports ortho and perspective, see CreateInvDeviceZToWorldZTransform()
	return DeviceZ * View.InvDeviceZToWorldZTransform[0] + View.InvDeviceZToWorldZTransform[1] + 1.0f / (DeviceZ * View.InvDeviceZToWorldZTransform[2] - View.InvDeviceZToWorldZTransform[3]);
}

float2 AdjustClipToPrevClipForProjectionType(float2 Velocity, float DeviceZ)
{
	if(IsOrthoProjection())
	{
		/*
		* Motion vectors on far distance planes are greatly influenced by slight rotations in camera angle
		* And with Orthographic cameras, this magnifies the velocities over large distances.
		* As a result, we need to counter this by reducing the speed with the DeviceZ
		*/
#if HAS_INVERTED_Z_BUFFER
		/**
		* Because this is ClipToPrevClip rather than velocity, we use the non - linear device z which works better than linear depth,
		* however if adjusting velocity directly, linear depth should be used instead.
		*/
		Velocity *= DeviceZ;
#else
		Velocity *= 1.0f - DeviceZ;
#endif
	}
	return Velocity;
}

float GetScreenPositionDepth(float4 ScreenPosition)
{
#if RAYTRACINGSHADER
	// Workaround for UE-212146: Accessing global ResolvedView in this case triggers internal compiler error on some platforms.
	return select(IsOrthoProjection(ResolveView()), ConvertFromDeviceZ(ScreenPosition.z), ScreenPosition.w);
#else
	return select(IsOrthoProjection(ResolvedView), ConvertFromDeviceZ(ScreenPosition.z), ScreenPosition.w);
#endif
}

float3 GetCameraVector(ViewState InView, float3 Position, float3 CameraPosition)
{
	return select(IsOrthoProjection(InView), InView.ViewForward, normalize(Position - CameraPosition));
}

float3 GetCameraVector(float3 Position, float3 CameraPosition)
{
	return GetCameraVector(PrimaryView, Position, CameraPosition);
}

float3 GetCameraVectorFromWorldPosition(float3 WorldPosition)
{
	return GetCameraVector(WorldPosition, DFHackToFloat(PrimaryView.WorldCameraOrigin));
}

float3 GetCameraVectorFromTranslatedWorldPosition(ViewState InView, float3 TranslatedWorldPosition)
{
	return GetCameraVector(TranslatedWorldPosition, InView.TranslatedWorldCameraOrigin);
}

float3 GetCameraVectorFromTranslatedWorldPosition(float3 TranslatedWorldPosition)
{
	return GetCameraVectorFromTranslatedWorldPosition(PrimaryView, TranslatedWorldPosition);
}

float GetDistanceToCameraFromViewVector(float3 DistanceVector)
{
	float DistanceToCamera = length(DistanceVector);
	if (IsOrthoProjection())
	{
		/**
		* Position to camera length is not the correct value here, ortho projection needs the distance
		* as the view forward "plane" to the position instead. This length is recalculated here.
		*/
		DistanceToCamera *= DistanceToCamera / dot(DistanceVector, View.ViewForward);
	}
	return DistanceToCamera;
}

float GetDistanceToCameraFromViewVectorSqr(float3 DistanceVector)
{
	if(IsOrthoProjection())
	{
		float Distance = GetDistanceToCameraFromViewVector(DistanceVector);
		return Distance * Distance;
	}
	return dot(DistanceVector, DistanceVector);
}

float2 ScreenPositionToBufferUV(float4 ScreenPosition)
{
	return float2(ScreenPosition.xy / ScreenPosition.w * ResolvedView.ScreenPositionScaleBias.xy + ResolvedView.ScreenPositionScaleBias.wz);
}

float3 ScreenVectorFromScreenRect(float4 Position)
{
	//Screen rect does not include scene depth. Perspective projection requires ScreenToTranslated world matrix
	//Ortho Screen does not need the calculation as the screenvector is always the camera direction, so uses ViewForward

	return select(IsOrthoProjection(), View.ViewForward, mul(Position,View.ScreenToTranslatedWorld).xyz);
}

float3 ScreenVectorFromScreenRect(float4 Position, ViewState InView)
{
	//Screen rect does not include scene depth. Perspective projection requires ScreenToTranslated world matrix
	//Ortho Screen does not need the calculation as the screenvector is always the camera direction, so uses ViewForward

	return select(IsOrthoProjection(), InView.ViewForward, mul(Position, InView.ScreenToTranslatedWorld).xyz);
}

// Returns the screen position for projection matrix calculations depending on the type of projection the view is using
float2 GetScreenPositionForProjectionType(float2 ScreenPosition, float SceneDepth)
{
	// For perspective projection matrix, the scene depth is required as part of the clip to view calculations
	// For orthogonal projection matrix, scene depth should not be used
	return select(IsOrthoProjection(), ScreenPosition, ScreenPosition * SceneDepth);
}

float ConvertGivenDepthRadiusForProjectionType(float Radius, float SceneDepth, bool bForceOrthoView = false)
{
	return select(IsOrthoProjection() || bForceOrthoView, Radius, Radius * SceneDepth);
}

float GetDepthPixelRadiusForProjectionType(float SceneDepth)
{
	return View.WorldDepthToPixelWorldRadius.x * SceneDepth + View.WorldDepthToPixelWorldRadius.y;
}

// In many locations we calculate screen ray length using scene depth, this does not work for Ortho
// This code moves that calculation to the view buffer and reduces the SceneDepth option to 1 MAD
float2 GetScreenRayLengthMultiplierForProjectionType(float SceneDepth)
{
	return View.ScreenRayLengthMultiplier.xy * SceneDepth + View.ScreenRayLengthMultiplier.zw;
}

float2 SvPositionToBufferUV(float4 SvPosition)
{
	return SvPosition.xy * View.BufferSizeAndInvSize.zw;
}

// Used for post process shaders which don't need to resolve the view
float3 SvPositionToTranslatedWorld(float4 SvPosition)
{
	float4 HomWorldPos = mul(float4(SvPosition.xyz, 1), View.SVPositionToTranslatedWorld);

	return HomWorldPos.xyz / HomWorldPos.w;
}

// Used for vertex factory shaders which need to use the resolved view
float3 SvPositionToResolvedTranslatedWorld(float4 SvPosition)
{
	float4 HomWorldPos = mul(float4(SvPosition.xyz, 1), ResolvedView.SVPositionToTranslatedWorld);

	return HomWorldPos.xyz / HomWorldPos.w;
}

// prefer to use SvPositionToTranslatedWorld() for better quality
FDFVector3 SvPositionToWorld(float4 SvPosition)
{
	float3 TranslatedWorldPosition = SvPositionToTranslatedWorld(SvPosition);
	return DFFastSubtract(TranslatedWorldPosition, PrimaryView.PreViewTranslation);
}

// investigate: doesn't work for usage with PrimaryView.ScreenToWorld, see SvPositionToScreenPosition2()
float4 SvPositionToScreenPosition(float4 SvPosition)
{
	// todo: is already in .w or needs to be reconstructed like this:
//	SvPosition.w = ConvertFromDeviceZ(SvPosition.z);

	float2 PixelPos = SvPosition.xy - View.ViewRectMin.xy;

	// NDC (NormalizedDeviceCoordinates, after the perspective divide)
	float3 NDCPos = float3( (PixelPos * View.ViewSizeAndInvSize.zw - 0.5f) * float2(2, -2), SvPosition.z);

	// SvPosition.w: so .w has the SceneDepth, some mobile code and the DepthFade material expression wants that
	return float4(NDCPos.xyz, 1) * SvPosition.w;
}

// Used for vertex factory shaders which need to use the resolved view
float4 SvPositionToResolvedScreenPosition(float4 SvPosition)
{
	float2 PixelPos = SvPosition.xy - ResolvedView.ViewRectMin.xy;

	// NDC (NormalizedDeviceCoordinates, after the perspective divide)
	float3 NDCPos = float3( (PixelPos * ResolvedView.ViewSizeAndInvSize.zw - 0.5f) * float2(2, -2), SvPosition.z);

	// SvPosition.w: so .w has the SceneDepth, some mobile code and the DepthFade material expression wants that
	return float4(NDCPos.xyz, 1) * SvPosition.w;
}

void SvPositionToResolvedScreenPositionDeriv(float4 SvPosition, float2 PPZ_DDX_DDY, float2 W_DDX_DDY, inout float4 ScreenPosition, inout float4 ScreenPositionDDX, inout float4 ScreenPositionDDY)
{
	float2 PixelPos = SvPosition.xy - ResolvedView.ViewRectMin.xy;

	// NDC (NormalizedDeviceCoordinates, after the perspective divide)
	float4 NDCPos = float4((PixelPos * ResolvedView.ViewSizeAndInvSize.zw - 0.5f) * float2(2, -2), SvPosition.z, 1.0f);
	float4 NDCPosDDX = float4(ResolvedView.ViewSizeAndInvSize.z * 2.0f, 0.0f, PPZ_DDX_DDY.x, 0.0f);
	float4 NDCPosDDY = float4(ResolvedView.ViewSizeAndInvSize.w * 2.0f, 0.0f, PPZ_DDX_DDY.y, 0.0f);

	ScreenPosition = NDCPos * SvPosition.w;
	ScreenPositionDDX = NDCPos * W_DDX_DDY.x + NDCPosDDX * SvPosition.w;
	ScreenPositionDDY = NDCPos * W_DDX_DDY.y + NDCPosDDY * SvPosition.w;
}

float3 GetTranslatedWorldCameraPosFromView(ViewState InView, float2 SvPosition, bool bForceAddOrthoHeight = false)
{
	// Get the camera position per pixel in ortho, or just the camera location in perspective.
	if (IsOrthoProjection(InView))
	{
		//When using sky height, for example, the horizon calculation requires bForceAddOrthoHeight to be true to
		//avoid negative calculations when comparing the relative sky heights + view camera height
		float3 CameraPosition = SvPositionToTranslatedWorld(float4(SvPosition, 1.0f, 1.0f));
		CameraPosition.z += select(bForceAddOrthoHeight, View.ClipToView[1][1], 0.0f);
		return CameraPosition;
	}
	else
	{
		return InView.TranslatedWorldCameraOrigin;
	}
}

FDFVector3 GetWorldCameraPosFromView(ViewState InView, float2 SvPosition, bool bForceAddOrthoHeight = false)
{
	if(IsOrthoProjection(InView))
	{
		//When using sky height, for example, the horizon calculation requires bForceAddOrthoHeight to be true to
		//avoid negative calculations when comparing the relative sky heights + view camera height
		FDFVector3 CameraPosition = SvPositionToWorld(float4(SvPosition, 1.0f, 1.0f));
		CameraPosition = DFFastAdd(select(bForceAddOrthoHeight, float3(0.0f, 0.0f, View.ClipToView[1][1]), float3(0.0f,0.0f,0.0f)), CameraPosition);
		return CameraPosition;
	}
	else
	{
		return InView.WorldCameraOrigin;
	}
}

float3 GetTranslatedWorldCameraPosFromView(float2 SvPosition, bool bForceAddOrthoHeight = false)
{
	return GetTranslatedWorldCameraPosFromView(PrimaryView, SvPosition, bForceAddOrthoHeight);
}

float3 GetScreenWorldDir(in float4 SVPos)
{
	float2 ScreenPosition = SvPositionToScreenPosition(SVPos).xy;
	const float Depth = 1000000.0f;
	float4 TranslatedWorldPos = mul(float4(GetScreenPositionForProjectionType(ScreenPosition, Depth), Depth, 1), PrimaryView.ScreenToTranslatedWorld);
	return GetCameraVectorFromTranslatedWorldPosition(TranslatedWorldPos.xyz);
}

float2 SvPositionToViewportUV(float4 SvPosition)
{
	// can be optimized from 2SUB+2MUL to 2MAD
	float2 PixelPos = SvPosition.xy - ResolvedView.ViewRectMin.xy;

	return PixelPos.xy * ResolvedView.ViewSizeAndInvSize.zw;
}

float2 BufferUVToViewportUV(float2 BufferUV)
{
	float2 PixelPos = BufferUV.xy * View.BufferSizeAndInvSize.xy - View.ViewRectMin.xy;
	return PixelPos.xy * View.ViewSizeAndInvSize.zw;
}

float2 ViewportUVToBufferUV(float2 ViewportUV)
{
	float2 PixelPos = ViewportUV * View.ViewSizeAndInvSize.xy;
	return (PixelPos + View.ViewRectMin.xy) * View.BufferSizeAndInvSize.zw;
}

// Maps standard viewport UV to screen position.
float2 ViewportUVToScreenPos(float2 ViewportUV)
{
	return float2(2 * ViewportUV.x - 1, 1 - 2 * ViewportUV.y);
}

float2 ScreenPosToViewportUV(float2 ScreenPos)
{
	return float2(0.5 + 0.5 * ScreenPos.x, 0.5 - 0.5 * ScreenPos.y);
}

// Maps standard viewport UV to an unprojected viewpos.
// Viewpos can then be achieved via out.xy / out.z
float3 ScreenToViewPos(float2 ViewportUV, float SceneDepth)
{
	float2 ProjViewPos;

	ProjViewPos.x =  ViewportUV.x * View.ScreenToViewSpace.x  + View.ScreenToViewSpace.z;
	ProjViewPos.y =  ViewportUV.y * View.ScreenToViewSpace.y  + View.ScreenToViewSpace.w;
	return float3(GetScreenPositionForProjectionType(ProjViewPos, SceneDepth), SceneDepth);
}

// ----------------------------

/**
 * aligns the clip space position so that it can be used as a texture coordinate
 * to properly align in screen space
 */
MaterialFloat2 ScreenAlignedPosition( float4 ScreenPosition )
{
	return MaterialFloat2(ScreenPositionToBufferUV(ScreenPosition));
}

/**
 * Aligns the [0,1] UV to match the view within the backbuffer
 */
MaterialFloat2 ScreenAlignedUV( MaterialFloat2 UV )
{
	return (UV*MaterialFloat2(2,-2) + MaterialFloat2(-1,1))*View.ScreenPositionScaleBias.xy + View.ScreenPositionScaleBias.wz;
}

/**
 * Compute viewport coordinates from the given fragment coordinates.
 */
MaterialFloat2 GetViewportCoordinates(MaterialFloat2 InFragmentCoordinates)
{
	return InFragmentCoordinates;
}

/**
 * Unpack a normal stored in a normal map. The X and Y components are rescaled from [0,1] to [-1,1] and Z is reconstructed.
 */
MaterialFloat4 UnpackNormalMap( MaterialFloat4 TextureSample )
{
	#if DXT5_NORMALMAPS
		MaterialFloat2 NormalXY = TextureSample.ag;
	#elif LA_NORMALMAPS
		MaterialFloat2 NormalXY = TextureSample.ra;
	#else
		MaterialFloat2 NormalXY = TextureSample.rg;
	#endif

	NormalXY = NormalXY * MaterialFloat2(2.0f,2.0f) - MaterialFloat2(1.0f,1.0f);
	MaterialFloat NormalZ = sqrt( saturate( 1.0f - dot( NormalXY, NormalXY ) ) );
	return MaterialFloat4( NormalXY.xy, NormalZ, 1.0f );
}

// Antialiased version of a binary comparison between ThresholdConst and a texture channel.
float AntialiasedTextureMask( Texture2D Tex, SamplerState Sampler, float2 UV, float ThresholdConst, int Channel )
{
	// By setting MaskConst to 0001, 0010, 0100 or 1000 individual channels can be chosen (the compiler should be able to optimize that).
	MaterialFloat4 MaskConst = MaterialFloat4(Channel == 0, Channel == 1, Channel == 2, Channel == 3);

	// border width in pixels, for antialiasing 1 .. 1.5 is good but 1.0 is good for optimizations
	const float WidthConst = 1.0f;
	float InvWidthConst = 1 / WidthConst;

	// Problem:

	// A simple texture lookup with a comparison against some thresold value allows to get a mask useful
	// for many purposes (e.g. text rendering, signs, oil/water/paint). Antialiased masks look much better
	// and mip mapping provides that but only for minification. So when the texture resolution is lower than
	// the rendering size results get blurry.

	// Idea:

	// We compute the distance to the threshold line in pixels (with subpixel precision). We can visualize
	// the problem as a heightmap that intersects a axis aligned plane at the threshold height. Only surface
	// above the threshold plane contributes to the mask. Looking at one pixel the heightmap can be approximated
	// by a plane. We can easily get the plane center value form a texture lookup and get the plane equation from
	// ddx and ddy of that value (only one value per 2x2 block) or some other more precise method. We can reduce the
	// 3d problem to 2d (looking at the steepest angle only) and the resulting value tells us how much the texture value
	// changes for one pixel. This allows us to scale and bias (threshold) the texture value the so it maps to the
	// distance function. We rescaling the distance to 0.5 coverage at the line, >1 MaterialFloat a pixel inside and <0 MaterialFloat
	// a pixel outside. Clamping this value in the range from 0..1 gives us a good approximation of the pixel coverage.

	// We tried multiple possible implementations - this is the cheapest and looks ok is most cases.
	// If quality improvements are needed we can add an option to the node later on.
	float Result;
	{
		// optimized, ddx/ddy only for every 2x2 block (bad for distant stuff)
		float Sample1 = dot(MaskConst, Texture2DSample(Tex, Sampler, UV));

		// compute the derivatives of the texture content
		float2 TexDD = float2(DDX(Sample1), DDY(Sample1));

		float TexDDLength = max(abs(TexDD.x), abs(TexDD.y));
		float Top = InvWidthConst * (Sample1 - ThresholdConst);
		Result = Top / TexDDLength + ThresholdConst;
	}

	Result = saturate(Result);	// no always needed (e.g. DX9 framebuffer blending)

	return Result;
}

// While RepeatSize is a float here, the expectation is that it would be largely integer values coming in from the UI. The downstream logic assumes
// floats for all called functions (NoiseTileWrap) and this prevents any float-to-int conversion errors from automatic type conversion.
float Noise3D_Multiplexer(int Function, float3 Position, int Quality, bool bTiling, float RepeatSize)
{
	// verified, HLSL compiled out the switch if Function is a constant
	switch(Function)
	{
		case 0:
			return SimplexNoise3D_TEX(Position);
		case 1:
			return GradientNoise3D_TEX(Position, bTiling, RepeatSize);
		case 2:
			return FastGradientPerlinNoise3D_TEX(Position);
		case 3:
			return GradientNoise3D_ALU(Position, bTiling, RepeatSize);
		case 4:
			return ValueNoise3D_ALU(Position, bTiling, RepeatSize);
		default:
			return VoronoiNoise3D_ALU(Position, Quality, bTiling, RepeatSize, true).w * 2. - 1.;
	}
	return 0;
}

// @param LevelScale usually 2 but higher values allow efficient use of few levels
// @return in user defined range (OutputMin..OutputMax)
MaterialFloat MaterialExpressionNoise(float3 Position, float Scale, int Quality, int Function, bool bTurbulence, uint Levels, float OutputMin, float OutputMax, float LevelScale, float FilterWidth, bool bTiling, float RepeatSize)
{
	Position *= Scale;
	FilterWidth *= Scale;

	float Out = 0.0f;
	float OutScale = 1.0f;
	float InvLevelScale = 1.0f / LevelScale;

	LOOP for(uint i = 0; i < Levels; ++i)
	{
		// fade out noise level that are too high frequent (not done through dynamic branching as it usually requires gradient instructions)
		OutScale *= saturate(1.0 - FilterWidth);

		if(bTurbulence)
		{
			Out += abs(Noise3D_Multiplexer(Function, Position, Quality, bTiling, RepeatSize)) * OutScale;
		}
		else
		{
			Out += Noise3D_Multiplexer(Function, Position, Quality, bTiling, RepeatSize) * OutScale;
		}

		Position *= LevelScale;
		RepeatSize *= LevelScale;
		OutScale *= InvLevelScale;
		FilterWidth *= LevelScale;
	}

	if(!bTurbulence)
	{
		// bring -1..1 to 0..1 range
		Out = Out * 0.5f + 0.5f;
	}

	// Out is in 0..1 range
	return lerp(OutputMin, OutputMax, Out);
}

MaterialFloat MaterialExpressionNoise(FDFVector3 LWCPosition, float Scale, int Quality, int Function, bool bTurbulence, uint Levels, float OutputMin, float OutputMax, float LevelScale, float FilterWidth, bool bTiling, float RepeatSize)
{
	const float TileSize = 65536;
	float3 Position = abs(TileSize * DFFracDemote(DFDivideByPow2(LWCPosition, TileSize))); // Mirror and repeat every TileSize
	return MaterialExpressionNoise(Position, Scale, Quality, Function, bTurbulence, Levels, OutputMin, OutputMax, LevelScale, FilterWidth, bTiling, RepeatSize);
}

MaterialFloat MaterialExpressionNoise(FLWCVector3 LWCPosition, float Scale, int Quality, int Function, bool bTurbulence, uint Levels, float OutputMin, float OutputMax, float LevelScale, float FilterWidth, bool bTiling, float RepeatSize)
{
	float3 Position = LWCNormalizeTile(LWCPosition).Offset;
	return MaterialExpressionNoise(Position, Scale, Quality, Function, bTurbulence, Levels, OutputMin, OutputMax, LevelScale, FilterWidth, bTiling, RepeatSize);
}

// Material node for noise functions returning a vector value
// @param LevelScale usually 2 but higher values allow efficient use of few levels
// @return in user defined range (OutputMin..OutputMax)
MaterialFloat4 MaterialExpressionVectorNoise(MaterialFloat3 Position, int Quality, int Function, bool bTiling, float TileSize)
{
	float4 result = float4(0,0,0,1);
	float3x4 Jacobian = JacobianSimplex_ALU(Position, bTiling, TileSize);	// compiled out if not used

	// verified, HLSL compiled out the switch if Function is a constant
	switch (Function)
	{
	case 0:	// Cellnoise
		result.xyz = float3(Rand3DPCG16(int3(floor(NoiseTileWrap(Position, bTiling, TileSize))))) / 0xffff;
		break;
	case 1: // Color noise
		result.xyz = float3(Jacobian[0].w, Jacobian[1].w, Jacobian[2].w);
		break;
	case 2: // Gradient
		result = Jacobian[0];
		break;
	case 3: // Curl
		result.xyz = float3(Jacobian[2][1] - Jacobian[1][2], Jacobian[0][2] - Jacobian[2][0], Jacobian[1][0] - Jacobian[0][1]);
		break;
	default: // Voronoi
		result = VoronoiNoise3D_ALU(Position, Quality, bTiling, TileSize, false);
		break;
	}
	return result;
}


/*
* Clips a ray to an AABB.  Does not handle rays parallel to any of the planes.
*
* @param RayOrigin - The origin of the ray in world space.
* @param RayEnd - The end of the ray in world space.
* @param BoxMin - The minimum extrema of the box.
* @param BoxMax - The maximum extrema of the box.
* @return - Returns the closest intersection along the ray in x, and furthest in y.
*			If the ray did not intersect the box, then the furthest intersection <= the closest intersection.
*			The intersections will always be in the range [0,1], which corresponds to [RayOrigin, RayEnd] in worldspace.
*			To find the world space position of either intersection, simply plug it back into the ray equation:
*			WorldPos = RayOrigin + (RayEnd - RayOrigin) * Intersection;
*/
float2 LineBoxIntersect(float3 RayOrigin, float3 RayEnd, float3 BoxMin, float3 BoxMax)
{
	float3 InvRayDir = 1.0f / (RayEnd - RayOrigin);

	//find the ray intersection with each of the 3 planes defined by the minimum extrema.
	float3 FirstPlaneIntersections = (BoxMin - RayOrigin) * InvRayDir;
	//find the ray intersection with each of the 3 planes defined by the maximum extrema.
	float3 SecondPlaneIntersections = (BoxMax - RayOrigin) * InvRayDir;
	//get the closest of these intersections along the ray
	float3 ClosestPlaneIntersections = min(FirstPlaneIntersections, SecondPlaneIntersections);
	//get the furthest of these intersections along the ray
	float3 FurthestPlaneIntersections = max(FirstPlaneIntersections, SecondPlaneIntersections);

	float2 BoxIntersections;
	//find the furthest near intersection
	BoxIntersections.x = max(ClosestPlaneIntersections.x, max(ClosestPlaneIntersections.y, ClosestPlaneIntersections.z));
	//find the closest far intersection
	BoxIntersections.y = min(FurthestPlaneIntersections.x, min(FurthestPlaneIntersections.y, FurthestPlaneIntersections.z));
	//clamp the intersections to be between RayOrigin and RayEnd on the ray
	return saturate(BoxIntersections);
}

/** Computes distance from an AABB to a point in space. */
MaterialFloat ComputeDistanceFromBoxToPoint(MaterialFloat3 Mins, MaterialFloat3 Maxs, MaterialFloat3 InPoint)
{
	MaterialFloat3 DistancesToMin = select(InPoint < Mins, abs(InPoint - Mins), MaterialFloat(0.0));
	MaterialFloat3 DistancesToMax = select(InPoint > Maxs, abs(InPoint - Maxs), MaterialFloat(0.0));

	//@todo - this is actually incorrect, it gives manhattan distance
	MaterialFloat Distance = dot(DistancesToMin, 1);
	Distance += dot(DistancesToMax, 1);
	return Distance;
}

/** Computes squared distance from a point in space to an AABB. */
MaterialFloat ComputeSquaredDistanceFromBoxToPoint(MaterialFloat3 BoxCenter, MaterialFloat3 BoxExtent, MaterialFloat3 InPoint)
{
	MaterialFloat3 AxisDistances = max(abs(InPoint - BoxCenter) - BoxExtent, 0);
	return dot(AxisDistances, AxisDistances);
}

/** Computes distance from point inside an AABB to the AABB's surface. */
float ComputeDistanceFromBoxToPointInside(float3 BoxCenter, float3 BoxExtent, float3 InPoint)
{
	float3 DistancesToMin = max(InPoint - BoxCenter + BoxExtent, 0);
	float3 DistancesToMax = max(BoxCenter + BoxExtent - InPoint, 0);
	float3 ClosestDistances = min(DistancesToMin, DistancesToMax);
	return min(ClosestDistances.x, min(ClosestDistances.y, ClosestDistances.z));
}

bool RayHitSphere(float3 RayOrigin, float3 UnitRayDirection, float3 SphereCenter, float SphereRadius)
{
	float3 ClosestPointOnRay = max(0, dot(SphereCenter - RayOrigin, UnitRayDirection)) * UnitRayDirection;
	float3 CenterToRay = RayOrigin + ClosestPointOnRay - SphereCenter;
	return dot(CenterToRay, CenterToRay) <= Square(SphereRadius);
}

bool RaySegmentHitSphere(float3 RayOrigin, float3 UnitRayDirection, float RayLength, float3 SphereCenter, float SphereRadius)
{
	float DistanceAlongRay = dot(SphereCenter - RayOrigin, UnitRayDirection);
	float3 ClosestPointOnRay = DistanceAlongRay * UnitRayDirection;
	float3 CenterToRay = RayOrigin + ClosestPointOnRay - SphereCenter;
	return dot(CenterToRay, CenterToRay) <= Square(SphereRadius) && DistanceAlongRay > -SphereRadius && DistanceAlongRay - SphereRadius < RayLength;
}

/**
 * Returns near intersection in x, far intersection in y, or both -1 if no intersection.
 * RayDirection does not need to be unit length.
 */
float2 RayIntersectSphere(float3 RayOrigin, float3 RayDirection, float4 Sphere)
{
	float3 LocalPosition = RayOrigin - Sphere.xyz;
	float LocalPositionSqr = dot(LocalPosition, LocalPosition);

	float3 QuadraticCoef;
	QuadraticCoef.x = dot(RayDirection, RayDirection);
	QuadraticCoef.y = 2 * dot(RayDirection, LocalPosition);
	QuadraticCoef.z = LocalPositionSqr - Sphere.w * Sphere.w;

	float Discriminant = QuadraticCoef.y * QuadraticCoef.y - 4 * QuadraticCoef.x * QuadraticCoef.z;

	float2 Intersections = -1;

	// Only continue if the ray intersects the sphere
	FLATTEN
	if (Discriminant >= 0)
	{
		float SqrtDiscriminant = sqrt(Discriminant);
		Intersections = (-QuadraticCoef.y + float2(-1, 1) * SqrtDiscriminant) / (2 * QuadraticCoef.x);
	}

	return Intersections;
}

/** Transforms a vector from tangent space to world space */
MaterialFloat3 TransformTangentVectorToWorld(MaterialFloat3x3 TangentToWorld, MaterialFloat3 InTangentVector)
{
	// Transform directly to world space
	// The vector transform is optimized for this case, only one vector-matrix multiply is needed
	return mul(InTangentVector, TangentToWorld);
}

/** Transforms a vector from world space to tangent space */
MaterialFloat3 TransformWorldVectorToTangent(MaterialFloat3x3 TangentToWorld, MaterialFloat3 InWorldVector)
{
	// Transform from world to tangent space with the transpose of TangentToWorld (achieved by swapping vector / matrix multiply order)
	// Note that the transpose is only equal to the inverse for orthonormal matrices - aka only uniform scaling
	return mul(TangentToWorld, InWorldVector);
}

float3 TransformWorldVectorToView(float3 InTangentVector)
{
	// Transform from world to view space
	return mul(InTangentVector, (float3x3)ResolvedView.TranslatedWorldToView);
}

/** Computes the distance from the center to the edge of an AABB with the given extents in the given direction. */
MaterialFloat GetBoxPushout(MaterialFloat3 Normal,MaterialFloat3 Extent)
{
	return dot(abs(Normal * Extent), MaterialFloat3(1.0f, 1.0f, 1.0f));
}

/** Generates arbitrary but valid perpendicular unit vectors to ZAxis.  ZAxis should be unit length. */
void GenerateCoordinateSystem(float3 ZAxis, out float3 XAxis, out float3 YAxis)
{
	if (abs(ZAxis.x) > abs(ZAxis.y))
	{
		float InverseLength = 1.0f / sqrt(dot(ZAxis.xz, ZAxis.xz));
		XAxis = float3(-ZAxis.z * InverseLength, 0.0f, ZAxis.x * InverseLength);
	}
	else
	{
		float InverseLength = 1.0f / sqrt(dot(ZAxis.yz, ZAxis.yz));
		XAxis = float3(0.0f, ZAxis.z * InverseLength, -ZAxis.y * InverseLength);
	}

	YAxis = cross(ZAxis, XAxis);
}

// Define passthrough implementations of EvaluateAttributeAtSample for non-D3D11 platforms.
#if !(SM6_PROFILE || SM5_PROFILE)
	float EvaluateAttributeAtSample(float Attribute,uint SampleIndex) { return Attribute; }
	float2 EvaluateAttributeAtSample(float2 Attribute,uint SampleIndex) { return Attribute; }
	float3 EvaluateAttributeAtSample(float3 Attribute,uint SampleIndex) { return Attribute; }
	float4 EvaluateAttributeAtSample(float4 Attribute,uint SampleIndex) { return Attribute; }
#endif

/** Output of the screen vertex shader. */
struct FScreenVertexOutput
{
	noperspective float2 UV : TEXCOORD0;
	float4 Position : SV_POSITION;
};

/** Whether HasPixelAnimation is encoded in the velocity texture. Matches VelocityEncodeHasPixelAnimation() */
#define VELOCITY_ENCODE_HAS_PIXEL_ANIMATION VELOCITY_ENCODE_DEPTH

// for velocity rendering, motionblur and temporal AA
// velocity needs to support -2..2 screen space range for x and y
// texture is 16bit 0..1 range per channel
ENCODED_VELOCITY_TYPE EncodeVelocityToTexture(float3 V, bool bHasPixelAnimation)
{
	#if VELOCITY_ENCODE_GAMMA
		V.xy = sign(V.xy) * sqrt(abs(V.xy)) * (2.0 / sqrt(2.0));
	#endif

	// 0.499f is a value smaller than 0.5f to avoid using the full range to use the clear color (0,0) as special value
	// 0.5f to allow for a range of -2..2 instead of -1..1 for really fast motions for temporal AA
	float4 EncodedV;
	EncodedV.xy = V.xy * (0.499f * 0.5f) + 32767.0f / 65535.0f;

	#if VELOCITY_ENCODE_DEPTH
		uint Vz = asuint(V.z);

		EncodedV.z = saturate(float((Vz >> 16) & 0xFFFF) * rcp(65535.0f) + (0.1 / 65535.0f));
		EncodedV.w = saturate(float(((Vz >>  0) & 0xFFFE) | uint(bHasPixelAnimation)) * rcp(65535.0f) + (0.1 / 65535.0f));
	#else
		EncodedV.zw = 0.0;
	#endif

#if COMPILER_GLSL_ES3_1
	return uint4(EncodedV * 65535.0 + 0.5f);
#else
	return EncodedV;
#endif
}

ENCODED_VELOCITY_TYPE EncodeVelocityToTexture(float3 V)
{
	return EncodeVelocityToTexture(V, /* bHasPixelAnimation = */ false);
}

// see EncodeVelocityToTexture()
float3 DecodeVelocityFromTexture(ENCODED_VELOCITY_TYPE InEncodedV)
{
#if COMPILER_GLSL_ES3_1
	float4 EncodedV = InEncodedV / 65535.0f;
#else
	float4 EncodedV = InEncodedV;
#endif
	const float InvDiv = 1.0f / (0.499f * 0.5f);

	float3 V;
	V.xy = EncodedV.xy * InvDiv - 32767.0f / 65535.0f * InvDiv;

	#if VELOCITY_ENCODE_DEPTH
		V.z = asfloat((uint(round(EncodedV.z * 65535.0f)) << 16) | (uint(round(EncodedV.w * 65535.0f)) & 0xFFFE));
	#else
		V.z = 0.0;
	#endif

	#if VELOCITY_ENCODE_GAMMA
		V.xy = (V.xy * abs(V.xy)) * 0.5;
	#endif

	return V;
}

/** Returns whether the opaque material drawing velocity had the UMaterial::bHasPixelAnimation, which means the geometric velocity might not accurately represent how the pixel moves. */
#if VELOCITY_ENCODE_HAS_PIXEL_ANIMATION
bool DecodeHasPixelAnimationFromVelocityTexture(ENCODED_VELOCITY_TYPE EncodedV)
{
	return (uint(round(EncodedV.w * 65535.0f)) & 0x1) != 0x0;
}
#endif

// Used for the Global Illumination in the GIReplace material expression
bool GetGIReplaceState()
{
#if REFLECTIVE_SHADOW_MAP
	return true;
#else
	return false;
#endif
}

// Used for the Nanite path in the NaniteReplace material expression
bool GetNaniteReplaceState()
{
#if IS_NANITE_PASS
	return true;
#else
	return false;
#endif
}

bool GetRayTracingQualitySwitch()
{
#if (RAYTRACINGSHADER || LUMEN_CARD_CAPTURE) && !PATH_TRACING
	return true;
#else
	return false;
#endif
}

#if !PATH_TRACING && !PATH_TRACING_POST_PROCESS_MATERIAL
// Default implementations for non-path traced shaders
// The actual implementations used for the path tracer exist in PathTracingShaderUtils.h to minimize cruft in Common.ush
bool GetPathTracingQualitySwitch() { return false;  }
bool GetPathTracingIsShadow() { return false; }
bool GetPathTracingIsIndirectDiffuse() { return false; }
bool GetPathTracingIsIndirectSpecular() { return false; }
bool GetPathTracingIsIndirectVolume() { return false; }
#endif

bool GetLightmassReplaceState()
{
#if RAYTRACINGSHADER && PATH_TRACING && SIMPLIFIED_MATERIAL_SHADER
	return true;
#else
	return false;
#endif
}

struct FWriteToSliceGeometryOutput
{
	FScreenVertexOutput Vertex;
	uint LayerIndex : SV_RenderTargetArrayIndex;
};


/** Used for calculating vertex positions and UVs when drawing with DrawRectangle */
void DrawRectangle(
	in float4 InPosition,
	in float2 InTexCoord,
	out float4 OutPosition,
	out float2 OutTexCoord)
{
	OutPosition = InPosition;
	OutPosition.xy = -1.0f + 2.0f * (DrawRectangleParameters.PosScaleBias.zw + (InPosition.xy * DrawRectangleParameters.PosScaleBias.xy)) * DrawRectangleParameters.InvTargetSizeAndTextureSize.xy;
	OutPosition.xy *= float2( 1, -1 );
	OutTexCoord.xy = (DrawRectangleParameters.UVScaleBias.zw + (InTexCoord.xy * DrawRectangleParameters.UVScaleBias.xy)) * DrawRectangleParameters.InvTargetSizeAndTextureSize.zw;
}

/** Helper variant for vertex shaders which need a separate output for SV_POSITION and packed UV / Screen position */
void DrawRectangle(
	in float4 InPosition,
	in float2 InTexCoord,
	out float4 OutPosition,
	out float4 OutUVAndScreenPos)
{
	DrawRectangle(InPosition, InTexCoord, OutPosition, OutUVAndScreenPos.xy);
	OutUVAndScreenPos.zw = OutPosition.xy;
}

/** Used for calculating vertex positions when drawing with DrawRectangle */
void DrawRectangle(in float4 InPosition, out float4 OutPosition)
{
	OutPosition = InPosition;
	OutPosition.xy = -1.0f + 2.0f * (DrawRectangleParameters.PosScaleBias.zw + (InPosition.xy * DrawRectangleParameters.PosScaleBias.xy)) * DrawRectangleParameters.InvTargetSizeAndTextureSize.xy;
	OutPosition.xy *= float2( 1, -1 );
}

//Since some platforms don't remove Nans in saturate calls,
//SafeSaturate function will remove nan/inf.
//Can be expensive, only call when there's a good reason to expect Nans.
//D3D saturate actually turns NaNs -> 0  since it does the max(0.0f, value) first, and D3D NaN rules specify the non-NaN operand wins in such a case.
//See: https://docs.microsoft.com/en-us/windows/desktop/direct3dhlsl/saturate
#define SafeSaturate_Def(type)\
type SafeSaturate(type In) \
{\
	return saturate(In);\
}

SafeSaturate_Def(float)
SafeSaturate_Def(float2)
SafeSaturate_Def(float3)
SafeSaturate_Def(float4)

// By default HLSL compilers make assumptions about the non-NaN nature of most inputs. Otherwise, most of its optimizations would be invalid, e.g. assuming multiply by 0 equals 0.
// Unfortunately, as a consequence of this, the native infinite()/isnan()/isinf() intrinsics frequently get silently optimized away. To avoid this hazard we manually implement the same logic.
// These are expected to generate identical code as native intrinsics would (when working properly). The PositiveFinite versions are faster than the native.
// Also note that FXC has a quirk in which the pattern b? a : 0 is implemented with an AND instead of the more natural MOVC, i.e. optimizing for D3D bytecode instead of real-world HW instructions.
// So drivers detect this pattern and convert back to a conditional move. HLSLcc instead converts to mix(), so NaNs end up propagating through despite our efforts to kill them (see UE-66179).
// So we break this pattern using !b? 0 : a instead, which generates a MOVC.
#define IsAndMakeFinite_Def(type, booltype)\
booltype IsFinite(type In) \
{\
	return (asuint(In) & 0x7F800000) != 0x7F800000; \
}\
booltype IsPositiveFinite(type In) \
{\
	return asuint(In) < 0x7F800000; \
}\
type MakeFinite(type In) \
{\
    return select(!IsFinite(In), 0.0, In); \
}\
type MakePositiveFinite(type In) \
{\
    return select(!IsPositiveFinite(In), 0.0, In); \
}

IsAndMakeFinite_Def(float,  bool)
IsAndMakeFinite_Def(float2, bool2)
IsAndMakeFinite_Def(float3, bool3)
IsAndMakeFinite_Def(float4, bool4)

// NOTE: The raytraced implementation of the ShadowPassSwitch node is kept in RayTracingShaderUtils.ush as it needs to access per ray information.
#if RAYHITGROUPSHADER == 0
// Experimental way to allow adjusting the OpacityMask for shadow map rendering of masked materials.
// This is exposed via the ShadowPassSwitch material node. This can also be accessed with a Custom
// material node. If this turns out to be very useful we can expose as MaterialFunction
// and potentially expose other queries as well (e.g. SkeletalMesh, HitProxy, ).
// @return 0:no, 1:yes
bool GetShadowReplaceState()
{
#if SHADOW_DEPTH_SHADER
	return true;
#else
	return false;
#endif
}

float IsShadowDepthShader()
{
	return GetShadowReplaceState() ? 1.0f : 0.0f;
}

#endif // RAYHITGROUPSHADER == 0

bool GetReflectionCapturePassSwitchState()
{
	return View.RenderingReflectionCaptureMask > 0.0f;
}


#define TERRAIN_ZSCALE (1.0f/128.0f)

// Decodes a value which was packed into two 8 bit channels
float DecodePackedTwoChannelValue(float2 PackedHeight)
{
	return PackedHeight.x * 255.0 * 256.0 + PackedHeight.y * 255.0;
}

float DecodeHeightValue(float InValue)
{
	return (InValue - 32768.0) * TERRAIN_ZSCALE;
}

float DecodePackedHeight(float2 PackedHeight)
{
	return DecodeHeightValue(DecodePackedTwoChannelValue(PackedHeight));
}

/** Reverses all the <BitCount> lowest significant bits. */
uint ReverseBitsN(uint Bitfield, const uint BitCount)
{
	return reversebits(Bitfield) >> (32 - BitCount);
}

// Remap 2D array index to flattened 1D array index
inline uint Flatten2D(uint2 Coord, uint2 Dim)
{
	return Coord.x + Coord.y * Dim.x;
}

// Remap flattened array index to 2D array index
inline uint2 Unflatten2D(uint Index, uint2 Dim)
{
	return uint2(Index % Dim.x, Index / Dim.x);
}

uint2 ZOrder2D(uint Index, const uint SizeLog2)
{
	uint2 Coord = 0;

	UNROLL
	for (uint i = 0; i < SizeLog2; i++)
	{
		Coord.x |= ((Index >> (2 * i + 0)) & 0x1) << i;
		Coord.y |= ((Index >> (2 * i + 1)) & 0x1) << i;
	}

	return Coord;
}

uint3 ZOrder3D(uint Index, const uint SizeLog2)
{
    uint3 Coord = 0;

    UNROLL
    for (uint i = 0; i < SizeLog2; i++)
    {
        Coord.x |= ((Index >> (3 * i + 0)) & 0x1) << i;
        Coord.y |= ((Index >> (3 * i + 1)) & 0x1) << i;
        Coord.z |= ((Index >> (3 * i + 2)) & 0x1) << i;
    }

    return Coord;
}

uint ZOrder3DEncode(uint3 Coord, const uint SizeLog2)
{
    uint Index = 0;

    UNROLL
    for (uint i = 0; i < SizeLog2; i++)
    {
        Index |= ((Coord.x >> i) & 0x1) << (3 * i + 0);
        Index |= ((Coord.y >> i) & 0x1) << (3 * i + 1);
        Index |= ((Coord.z >> i) & 0x1) << (3 * i + 2);
    }

    return Index;
}

uint DivideAndRoundUp(uint Dividend, uint Divisor)
{
	return (Dividend + Divisor - 1) / Divisor;
}

// Optimised for power of two because it relies on a division done using bit shift
uint DivideAndRoundUp(uint Dividend, uint Divisor, uint DivisorAsBitShift)
{
	return (Dividend + Divisor - 1) >> DivisorAsBitShift;
}

#define DivideAndRoundUp4(Dividend) DivideAndRoundUp((Dividend),    4u,  2u)
#define DivideAndRoundUp8(Dividend) DivideAndRoundUp((Dividend),    8u,  3u)
#define DivideAndRoundUp16(Dividend) DivideAndRoundUp((Dividend),  16u,  4u)
#define DivideAndRoundUp32(Dividend) DivideAndRoundUp((Dividend),  32u,  5u)
#define DivideAndRoundUp64(Dividend) DivideAndRoundUp((Dividend),  64u,  6u)
#define DivideAndRoundUp128(Dividend) DivideAndRoundUp((Dividend), 128u, 7u)
#define DivideAndRoundUp128(Dividend) DivideAndRoundUp((Dividend), 256u, 8u)

float UNorm16ToF32(uint UNorm)
{
	return (UNorm & 0xFFFFu) / 65535.0f;
}

float SNorm16ToF32(uint SNorm)
{
	return UNorm16ToF32(SNorm) * 2.0f - 1.0f;
}

float ComputeZSliceFromDepth(float3 GridZParams, float SceneDepth)
{
	return log2(SceneDepth * GridZParams.x + GridZParams.y) * GridZParams.z;
}

float ComputeDepthFromZSlice(float3 GridZParams, float ZSlice)
{
	float SliceDepth = (exp2(ZSlice / GridZParams.z) - GridZParams.y) / GridZParams.x;
	return SliceDepth;
}

// see PixelShaderOutputCommon
struct FPixelShaderIn
{
	// read only
	float4 SvPosition;

	// Pixel Shader InCoverage, only usable if PIXELSHADEROUTPUT_COVERAGE is 1
	uint Coverage;

	//
	bool bIsFrontFace;
};
// see PixelShaderOutputCommon
struct FPixelShaderOut
{
	// [0..7], only usable if PIXELSHADEROUTPUT_MRT0, PIXELSHADEROUTPUT_MRT1, ... is 1
	float4 MRT[8];

	// Explicit uint output specific to Substrate.
	uint SubstrateOutput[3];
	SUBSTRATE_TOP_LAYER_TYPE SubstrateTopLayerData;

	// Pixel Shader OutCoverage, only usable if PIXELSHADEROUTPUT_COVERAGE is 1
	uint Coverage;

	// Pixel Shader OutDepth
	float Depth;
};


#define SwapGeneric(T) void Swap(inout T A, inout T B) { T Temp = A; A = B; B = Temp; }
SwapGeneric(uint)
SwapGeneric(uint2)
SwapGeneric(uint3)
SwapGeneric(uint4)

SwapGeneric(float)
SwapGeneric(float2)
SwapGeneric(float3)
SwapGeneric(float4)

// ---------------------------------------------------- Global samplers.

// If GetGlobalSampler() was not implemented in Platform.ush, provide a default one from View uniform buffer.
#if !defined(GetGlobalSampler) && SUPPORTS_INDEPENDENT_SAMPLERS

// Implements UE4's Get global sampler.
// Filter={Point,Bilinear,Trilinear}
// WrapMode={Wrapped,Clamped}

#define GetGlobalSampler(Filter,WrapMode) \
	View.Shared##Filter##WrapMode##Sampler

#endif // GetGlobalSampler


#if SUPPORTS_INDEPENDENT_SAMPLERS

// Shortcuts for global samplers.
#define GlobalPointClampedSampler GetGlobalSampler(Point, Clamped)
#define GlobalPointWrappedSampler GetGlobalSampler(Point, Wrapped)
#define GlobalBilinearClampedSampler GetGlobalSampler(Bilinear, Clamped)
#define GlobalBilinearWrappedSampler GetGlobalSampler(Bilinear, Wrapped)
#define GlobalTrilinearClampedSampler GetGlobalSampler(Trilinear, Clamped)
#define GlobalTrilinearWrappedSampler GetGlobalSampler(Trilinear, Wrapped)

#if FEATURE_LEVEL >= FEATURE_LEVEL_SM5

float4 GatherDepth(Texture2D Texture, float2 UV)
{
	// using Gather: xyzw in counter clockwise order starting with the sample to the lower left of the queried location
	float4 DeviceZ = Texture.GatherRed(GlobalBilinearClampedSampler, UV);

	return float4(
		ConvertFromDeviceZ(DeviceZ.x),
		ConvertFromDeviceZ(DeviceZ.y),
		ConvertFromDeviceZ(DeviceZ.z),
		ConvertFromDeviceZ(DeviceZ.w));
}

#endif

#endif // SUPPORTS_INDEPENDENT_SAMPLERS