float4 res : register(c0); sampler2D colorSampler : register(s0); sampler2D depthSampler : register(s1); sampler2D ditherSampler : register(s2); static float g_fRadiusBase = 0.005f; static float g_fRadiusScale = 0.25f; static float g_fFallOff = 0.7f; static float g_fIntensity = 1.25f; static float g_fGrain = 0.01f; static float g_fRange = 1024.0f; static float g_fBlurSensitivity = 250.f; static const float3 vSamplePoints[12] = { float3(0.083333f, 0.000000f, 0.083333f), float3(-0.144338f, -0.083333f, 0.166667f), float3(0.125000f, 0.216506f, 0.250000f), float3(0.000000f, -0.333333f, 0.333333f), float3(-0.208333f, 0.360844f, 0.416667f), float3(0.433013f, -0.250000f, 0.500000f), float3(-0.583333f, -0.000000f, 0.583333f), float3(0.577350f, 0.333333f, 0.666667f), float3(-0.375000f, -0.649519f, 0.750000f), float3(-0.000000f, 0.833333f, 0.833333f), float3(0.458333f, -0.793857f, 0.916667f), float3(-0.866025f, 0.500000f, 1.000000f) }; float4 ps_main(float2 TexCoord : TEXCOORD0) : COLOR0 { // Sample random 2D matrix float4 vRotation = tex2D(ditherSampler, TexCoord * res.xy / 32.0f) * 2.0f - 1.0f; float2x2 mRotation = float2x2(vRotation.xy, vRotation.zw); // Sample pixel depth float fDepth = tex2D(depthSampler, TexCoord).x; // Transform radius to screen space float3 fScaledRadius = g_fRadiusBase + g_fRadiusScale / ( 8.0f + fDepth ); // Limit radius to reasonable sampling kernels fScaledRadius = clamp(fScaledRadius, 4.0f * res.z, 64.0f * res.w); // Transform sampling vector length back to world space fScaledRadius.z *= fDepth; float fOcclusion = 0.0f; float fSampleWeight = 1.f / 18.849556f; // atan version // float fSampleWeight = 1.f / 12.f; // Loop over samples for(int i = 0; i < 12; ) { float4 fSampleDepth, fSampleRadius; // Vectorize occlusion code [unroll] for(int j = 0; j < 4; j++, i++) { // Randomly rotate scaled sample points float3 vSampleOffset = fScaledRadius * vSamplePoints[i]; vSampleOffset.xy = mul(vSampleOffset.xy, mRotation); // Sample depth texture fSampleDepth[j] = tex2D(depthSampler, TexCoord + vSampleOffset.xy).x; fSampleRadius[j] = vSampleOffset.z; } // Compute occlusion float4 fDeltaDepth = (fDepth - fSampleDepth) / fSampleRadius; float4 fAttenuation = g_fFallOff * fDeltaDepth; float4 fBlocking = atan(fDeltaDepth) / (1.0f + max(0.0f, fAttenuation)); // atan version // float4 fBlocking = fDeltaDepth / ( (1.0f + abs(fDeltaDepth)) * (1.0f + max(0.0f, fAttenuation)) ); fOcclusion += dot(fBlocking, fSampleWeight); } // Avoid ugly smudge artifacts float2 fPixelPos = TexCoord * 2.0f - 1.0f; float fBorderAttenuation = 1.0f - 0.7071f * dot(fPixelPos, fPixelPos); // Write to intermediate buffer float fAO = saturate(g_fIntensity * fOcclusion + g_fGrain) * fBorderAttenuation * step(fDepth, g_fRange); return float4((float3)fAO, 0.f); } float4 ps_blur(float2 TexCoord, uniform bool bVertical) { float2 fStepSize = float2(1.f, bVertical ? -1.f : 1.f) * res.zw; float3 fCenterDepths; // Fetch 3 center depths fCenterDepths.x = tex2D(depthSampler, TexCoord - fStepSize).x; fCenterDepths.y = tex2D(depthSampler, TexCoord).x; fCenterDepths.z = tex2D(depthSampler, TexCoord + fStepSize).x; float3 fDepthContinuities, fDepthDeltas; // Compute 3 depth continuity values fDepthContinuities.x = tex2D(depthSampler, TexCoord - 2.0f * fStepSize).x; fDepthContinuities.y = fCenterDepths.x; fDepthContinuities.z = tex2D(depthSampler, TexCoord + 2.0f * fStepSize).x; fDepthDeltas = fDepthContinuities - fCenterDepths.yzy; fDepthContinuities += fCenterDepths.yzy - 2.0f * fCenterDepths; // Correct AA issues fDepthContinuities = abs(fDepthContinuities); fDepthDeltas = abs(fDepthDeltas); fDepthDeltas -= min(min(fDepthDeltas.x, fDepthDeltas.y), fDepthDeltas.z); // Compute 3 weights float3 fWeights = 1.0f / (1.0f + g_fBlurSensitivity * (fDepthContinuities + 16.f * fDepthDeltas)); float fOutput = 0.0f; float3 fSamples; float3 fSampleWeights = float3(3.0f, 1.5f, 1.0f); // Blend 5 color samples respecting to the 3 depth continuity weights fSamples.x = tex2D(colorSampler, TexCoord - 2.0f * fStepSize).x; fSamples.y = tex2D(colorSampler, TexCoord - fStepSize).x; fSamples.z = tex2D(colorSampler, TexCoord).x; fOutput += dot(fSamples, fWeights.x); fSamples.x = tex2D(colorSampler, TexCoord + fStepSize).x; fOutput += dot(fSamples, fWeights.y); fSamples.y = tex2D(colorSampler, TexCoord + 2.0f * fStepSize).x; fOutput += dot(fSamples, fWeights.z); // Average output weight float fOutputWeight = dot(fWeights, 1.0f); // Average output fOutput /= 3.0f * fOutputWeight; // Correction weight float fCorrectionWeight = saturate(1.0f - fOutputWeight); // Write to color buffer fOutput = (1.0f - fCorrectionWeight) * fOutput + fCorrectionWeight * fSamples.z; return float4((float3)fOutput, 0.f); } float4 ps_blur_hor(float2 TexCoord : TEXCOORD0) : COLOR0 { return ps_blur(TexCoord, false); } float4 ps_blur_ver(float2 TexCoord : TEXCOORD0) : COLOR0 { return ps_blur(TexCoord, true); }