Compute Shader
In this tutorial, you'll learn how to use compute shaders with Zenith.NET. We'll create a simple image processing effect that converts a color image to grayscale on the GPU.
Overview
We'll create a ComputeShaderRenderer class that:
- Loads an image as an input texture
- Creates an output texture with read/write access
- Builds a compute pipeline
- Dispatches compute work to process the image
- Copies the result to the swap chain for display
The Renderer Class
Create a new file Renderers/ComputeShaderRenderer.cs:
namespace ZenithTutorials.Renderers;
internal unsafe class ComputeShaderRenderer : IRenderer
{
private const uint ThreadGroupSize = 16;
private const string ComputeShaderSource = """
Texture2D inputTexture;
RWTexture2D outputTexture;
[numthreads(16, 16, 1)]
void CSMain(uint3 dispatchThreadID : SV_DispatchThreadID)
{
uint width, height;
outputTexture.GetDimensions(width, height);
// Bounds check
if (dispatchThreadID.x >= width || dispatchThreadID.y >= height)
{
return;
}
// Read input pixel
float4 color = inputTexture[dispatchThreadID.xy];
// Convert to grayscale using luminance weights
float gray = dot(color.rgb, float3(0.299, 0.587, 0.114));
// Write to output
outputTexture[dispatchThreadID.xy] = float4(gray, gray, gray, color.a);
}
""";
private readonly Texture inputTexture;
private readonly Texture outputTexture;
private readonly ResourceLayout resourceLayout;
private readonly ResourceSet resourceSet;
private readonly ComputePipeline pipeline;
private bool processed;
public ComputeShaderRenderer()
{
inputTexture = App.Context.LoadTextureFromFile(Path.Combine(AppContext.BaseDirectory, "Assets", "shoko.png"), generateMipMaps: false);
outputTexture = App.Context.CreateTexture(new()
{
Type = TextureType.Texture2D,
Format = PixelFormat.R8G8B8A8UNorm,
Width = inputTexture.Desc.Width,
Height = inputTexture.Desc.Height,
Depth = 1,
MipLevels = 1,
ArrayLayers = 1,
SampleCount = SampleCount.Count1,
Flags = TextureUsageFlags.ShaderResource | TextureUsageFlags.UnorderedAccess
});
resourceLayout = App.Context.CreateResourceLayout(new()
{
Bindings = BindingHelper.Bindings
(
new() { Type = ResourceType.Texture, Count = 1, StageFlags = ShaderStageFlags.Compute },
new() { Type = ResourceType.TextureReadWrite, Count = 1, StageFlags = ShaderStageFlags.Compute }
)
});
resourceSet = App.Context.CreateResourceSet(new()
{
Layout = resourceLayout,
Resources = [inputTexture, outputTexture]
});
using Shader computeShader = App.Context.LoadShaderFromSource(ComputeShaderSource, "CSMain", ShaderStageFlags.Compute);
pipeline = App.Context.CreateComputePipeline(new()
{
Compute = computeShader,
ResourceLayouts = [resourceLayout],
ThreadGroupSizeX = ThreadGroupSize,
ThreadGroupSizeY = ThreadGroupSize,
ThreadGroupSizeZ = 1
});
}
public void Update(double deltaTime)
{
}
public void Render()
{
CommandBuffer commandBuffer = App.Context.Graphics.CommandBuffer();
if (!processed)
{
uint dispatchX = (inputTexture.Desc.Width + ThreadGroupSize - 1) / ThreadGroupSize;
uint dispatchY = (inputTexture.Desc.Height + ThreadGroupSize - 1) / ThreadGroupSize;
commandBuffer.SetPipeline(pipeline);
commandBuffer.SetResourceSet(resourceSet, 0);
commandBuffer.Dispatch(dispatchX, dispatchY, 1);
processed = true;
}
// Copy the processed texture to the swap chain's color target (centered)
Texture colorTarget = App.SwapChain.FrameBuffer.Desc.ColorAttachments[0].Target;
// Clamp copy region to fit within both textures
uint copyWidth = Math.Min(outputTexture.Desc.Width, App.Width);
uint copyHeight = Math.Min(outputTexture.Desc.Height, App.Height);
// Center the copy region
uint srcX = (outputTexture.Desc.Width - copyWidth) / 2;
uint srcY = (outputTexture.Desc.Height - copyHeight) / 2;
uint destX = (App.Width - copyWidth) / 2;
uint destY = (App.Height - copyHeight) / 2;
commandBuffer.CopyTexture(outputTexture,
default,
new() { X = srcX, Y = srcY, Z = 0 },
colorTarget,
default,
new() { X = destX, Y = destY, Z = 0 },
new() { Width = copyWidth, Height = copyHeight, Depth = 1 });
commandBuffer.Submit(waitForCompletion: true);
}
public void Resize(uint width, uint height)
{
}
public void Dispose()
{
pipeline.Dispose();
resourceSet.Dispose();
resourceLayout.Dispose();
outputTexture.Dispose();
inputTexture.Dispose();
}
}
Running the Tutorial
Update your Program.cs to run the ComputeShaderRenderer:
using ZenithTutorials;
using ZenithTutorials.Renderers;
App.Run<ComputeShaderRenderer>();
App.Cleanup();
Run the application:
dotnet run
Result

Code Breakdown
Compute Shader
Texture2D inputTexture;
RWTexture2D outputTexture;
[numthreads(16, 16, 1)]
void CSMain(uint3 dispatchThreadID : SV_DispatchThreadID)
{
uint width, height;
outputTexture.GetDimensions(width, height);
// Bounds check
if (dispatchThreadID.x >= width || dispatchThreadID.y >= height)
{
return;
}
// Read, process, write
float4 color = inputTexture[dispatchThreadID.xy];
float gray = dot(color.rgb, float3(0.299, 0.587, 0.114));
outputTexture[dispatchThreadID.xy] = float4(gray, gray, gray, color.a);
}
Key elements:
| Element | Description |
|---|---|
Texture2D |
Read-only input texture |
RWTexture2D |
Read/write output texture |
[numthreads(16, 16, 1)] |
Thread group size (16×16 threads) |
SV_DispatchThreadID |
Global thread index across all groups |
Output Texture Creation
outputTexture = App.Context.CreateTexture(new()
{
Type = TextureType.Texture2D,
Format = PixelFormat.R8G8B8A8UNorm,
Width = inputTexture.Desc.Width,
Height = inputTexture.Desc.Height,
Depth = 1,
MipLevels = 1,
ArrayLayers = 1,
SampleCount = SampleCount.Count1,
Flags = TextureUsageFlags.ShaderResource | TextureUsageFlags.UnorderedAccess
});
TextureUsageFlags.UnorderedAccess is required for textures that will be written to in compute shaders.
Compute Resource Layout
resourceLayout = App.Context.CreateResourceLayout(new()
{
Bindings = BindingHelper.Bindings
(
new() { Type = ResourceType.Texture, Count = 1, StageFlags = ShaderStageFlags.Compute },
new() { Type = ResourceType.TextureReadWrite, Count = 1, StageFlags = ShaderStageFlags.Compute }
)
});
Note the differences from graphics shaders:
ShaderStageFlags.Computeinstead ofVertexorPixelResourceType.TextureReadWritefor writable textures
Compute Pipeline Creation
pipeline = App.Context.CreateComputePipeline(new()
{
Compute = computeShader,
ResourceLayouts = [resourceLayout],
ThreadGroupSizeX = ThreadGroupSize,
ThreadGroupSizeY = ThreadGroupSize,
ThreadGroupSizeZ = 1
});
The ComputePipelineDesc requires:
Compute- The compiled compute shaderResourceLayouts- Resource bindings (same as graphics pipelines)ThreadGroupSizeX/Y/Z- Must match[numthreads()]in the shader
Dispatching Compute Work
uint dispatchX = (inputTexture.Desc.Width + ThreadGroupSize - 1) / ThreadGroupSize;
uint dispatchY = (inputTexture.Desc.Height + ThreadGroupSize - 1) / ThreadGroupSize;
commandBuffer.SetPipeline(pipeline);
commandBuffer.SetResourceSet(resourceSet, 0);
commandBuffer.Dispatch(dispatchX, dispatchY, 1);
The Dispatch call executes the compute shader:
dispatchX×dispatchY×dispatchZ= total thread groups- Each group runs
ThreadGroupSize×ThreadGroupSize× 1 threads - The formula
(size + groupSize - 1) / groupSizeensures full coverage
Copying to the Swap Chain
Texture colorTarget = App.SwapChain.FrameBuffer.Desc.ColorAttachments[0].Target;
// Clamp copy region to fit within both textures
uint copyWidth = Math.Min(outputTexture.Desc.Width, App.Width);
uint copyHeight = Math.Min(outputTexture.Desc.Height, App.Height);
// Center the copy region
uint srcX = (outputTexture.Desc.Width - copyWidth) / 2;
uint srcY = (outputTexture.Desc.Height - copyHeight) / 2;
uint destX = (App.Width - copyWidth) / 2;
uint destY = (App.Height - copyHeight) / 2;
commandBuffer.CopyTexture(outputTexture,
default,
new() { X = srcX, Y = srcY, Z = 0 },
colorTarget,
default,
new() { X = destX, Y = destY, Z = 0 },
new() { Width = copyWidth, Height = copyHeight, Depth = 1 });
Instead of using a full-screen quad with a graphics pipeline, we directly copy the processed texture to the swap chain's color target:
App.SwapChain.FrameBuffer.Desc.ColorAttachments[0].Target- Gets the swap chain's render target textureCopyTexture- Efficiently copies texture data on the GPU without needing shaders or render passescopyWidth/copyHeight- Clamps the copy region to fit within both source and destination texturessrcX/srcY- Centers the source region when the texture is larger than the windowdestX/destY- Centers the destination region when the texture is smaller than the window
This approach is simpler and more efficient when you just need to display a texture without additional processing.
Next Steps
- Indirect Drawing - Let the GPU control draw parameters for efficient multi-instance rendering
Source Code
Tip
View the complete source code on GitHub: ComputeShaderRenderer.cs