07 - The Particle Swarm

The first GL-idiom-substitution rung. The dasVulkan particle tutorial runs its simulation in a compute shader: a storage buffer of particles is rewritten every frame (a gravity pull toward the origin, explicit-Euler integrated), then the same buffer is drawn as point sprites. WebGL2 has no compute shaders and no storage buffers – so this rung shows the canonical GL substitute: transform feedback. A vertex shader reads each particle’s state as vertex attributes, integrates it, and its outputs are captured straight back into a buffer instead of being rasterised. Two buffers ping-pong, and a second program draws the freshly-written buffer as glowing point sprites.

// ===== shared state attributes (read by BOTH the sim and the render vertex shader) =====
var @in @location = 0 a_pos : float3
var @in @location = 1 a_vel : float3
var @in @location = 2 a_life : float

// ===== simulation pass: a vertex shader used as a compute step =====
// Its outputs are captured by transform feedback into the destination state buffer.
var @out tf_pos : float3
var @out tf_vel : float3
var @out tf_life : float
var @uniform u_dt : float

[vertex_program(name = "sim_src")]
def sim_vs {
    // central attractor: a softened 1/r^2 pull toward the origin (the 0.05 softening keeps
    // particles that pass close to the origin from blowing up), explicit-Euler integrated.
    let r2 = dot(a_pos, a_pos)
    let r_mag = sqrt(r2 + 0.05)
    let inv_r3 = 1.0 / (r_mag * r_mag * r_mag)
    let accel = a_pos * (-inv_r3 * 1.8)
    let new_vel = a_vel + accel * u_dt
    let new_pos = a_pos + new_vel * u_dt
    tf_pos = new_pos
    tf_vel = new_vel
    tf_life = a_life            // life is a colour driver only, never decremented
}

// The sim program still needs a fragment shader to link; with RASTERIZER_DISCARD it
// never runs, so it just writes a constant.
var @out f_FragColor : float4

[fragment_program(name = "sim_dummy_src")]
def sim_dummy_fs {
    f_FragColor = float4(0.0, 0.0, 0.0, 1.0)
}

// ===== render pass: one glowing point sprite per particle =====
let PARTICLE_PX = 9.0           // screen-space splat width in pixels
var @uniform u_view : float4x4
var @uniform u_proj : float4x4
var @inout r_speed : float
var @inout r_life : float

[vertex_program(name = "render_vs_src")]
def render_vs {
    gl_Position = u_proj * u_view * float4(a_pos, 1.0)
    gl_PointSize = PARTICLE_PX  // vertex-stage builtin: the rasteriser splat width for GL_POINTS
    r_speed = sqrt(dot(a_vel, a_vel))
    r_life = a_life
}

[fragment_program(name = "render_fs_src")]
def render_fs {
    // gl_PointCoord is the [0,1]^2 UV inside the rasterised point; distance from the centre
    // gives a circular mask + soft cubic falloff -- a point sprite without uploading a texture.
    let fc = gl_PointCoord - float2(0.5, 0.5)
    let d = sqrt(dot(fc, fc)) * 2.0
    if (d > 1.0) {
        discard()               // outside the inscribed disc: punch a hole through the square
    }
    let core = 1.0 - clamp(d, 0.0, 1.0)
    let glow = core * core * core                       // hot core + halo
    // colour by speed: cool teal for slow particles, hot magenta for fast
    let cool = float3(0.20, 0.70, 1.00)
    let hot = float3(1.00, 0.30, 0.55)
    let s = clamp(r_speed * 0.4, 0.0, 1.0)
    let base = lerp(cool, hot, float3(s, s, s))
    // premultiplied colour with alpha = glow; ONE/ONE additive blend gives a pure additive glow
    let intensity = glow * r_life
    f_FragColor = float4(base * intensity, intensity)
}

// ===== GL objects =====

var sim_prog : uint
var render_prog : uint
var buf : uint[2]               // two state buffers, ping-ponged
var vao : uint[2]               // one VAO per buffer (reads it as a_pos/a_vel/a_life)
var cur = 0                     // which buffer holds the current state
var window : GLFWwindow?
var time : float = 0.0

let N_PARTICLES = 1024

[vertex_buffer]
struct Particle {
    pos : float3
    vel : float3
    life : float
}

// Seed N particles on a tilted ring of radius ~1.2 with tangential velocity. The ring
// decays slowly under the gravity integrator but stays lively; the initial ring is
// visually instructive -- you can watch the integrator perturb the orbit over time.
def gen_initial_particles(n : int) : array<Particle> {
    var result : array<Particle>
    result |> resize(n)
    let speed = 1.4
    for (i in range(n)) {
        let fi = float(i) / float(n)
        let theta = fi * 2.0 * PI
        let r = 1.2 + 0.15 * sin(fi * 2.0 * PI * 7.0)
        let x = r * cos(theta)
        let y = sin(fi * 2.0 * PI * 5.0) * 0.3
        let z = r * sin(theta)
        let vx = -speed * sin(theta)
        let vy = cos(fi * 2.0 * PI * 3.0) * 0.35
        let vz = speed * cos(theta)
        let life = 0.4 + 0.6 * fi
        result[i] = Particle(pos = float3(x, y, z), vel = float3(vx, vy, vz), life = life)
    }
    return <- result
}

// One state buffer + a VAO that reads it as the Particle attribute stream. Both ping-pong
// buffers are seeded identically so frame 0 reads valid data from either.
def make_state_buffer(seed : array<Particle>) : tuple<uint; uint> {
    var b : uint
    glGenBuffers(1, safe_addr(b))
    glBindBuffer(GL_ARRAY_BUFFER, b)
    glBufferData(GL_ARRAY_BUFFER, seed, GL_DYNAMIC_COPY)
    var v : uint
    glGenVertexArrays(1, safe_addr(v))
    glBindVertexArray(v)
    glBindBuffer(GL_ARRAY_BUFFER, b)
    bind_vertex_buffer(null, type<Particle>)
    return (b, v)
}

def create_gl_objects {
    // sim program: insert the transform-feedback varyings BEFORE linking, so the GPU
    // captures tf_pos / tf_vel / tf_life interleaved (matching the Particle layout).
    let vs = create_shader(sim_src, GL_VERTEX_SHADER)
    let fs = create_shader(sim_dummy_src, GL_FRAGMENT_SHADER)
    sim_prog = create_shader_program(vs, fs)
    var tf_names <- ["tf_pos", "tf_vel", "tf_life"]
    glTransformFeedbackVaryings(sim_prog, 3, unsafe(addr(tf_names[0])), GL_INTERLEAVED_ATTRIBS)
    if (!link_shader(sim_prog)) {
        panic("sim program link failed")
    }
    delete tf_names
    glDeleteShader(vs)
    glDeleteShader(fs)

    render_prog = create_shader_program(render_vs_src, render_fs_src)

    var seed <- gen_initial_particles(N_PARTICLES)
    let a = make_state_buffer(seed)
    buf[0] = a._0
    vao[0] = a._1
    let b = make_state_buffer(seed)
    buf[1] = b._0
    vao[1] = b._1
    delete seed
}

[export]
def init {
    if (glfwInit() == 0) {
        panic("can't init glfw")
    }
    glfwInitOpenGL(3, 3)
    window = glfwCreateWindow(640, 480, "OpenGL - 07 particle swarm", null, null)
    if (window == null) {
        panic("can't create window")
    }
    glfwMakeContextCurrent(window)
    create_gl_objects()
}

[export]
def update : bool {
    time += 1.0 / 60.0
    let t = time
    let nxt = 1 - cur
    var display_w, display_h : int
    glfwGetFramebufferSize(window, display_w, display_h)
    let h = max(display_h, 1)
    let aspect = float(display_w) / float(h)

    // ===== Pass 1: simulate -- a vertex shader run as a compute step, captured by TF =====
    glEnable(GL_RASTERIZER_DISCARD)
    glUseProgram(sim_prog)
    u_dt = 1.0 / 60.0
    sim_vs_bind_uniform(sim_prog)
    glBindVertexArray(vao[cur])
    // WebGL2 forbids a buffer bound to TRANSFORM_FEEDBACK_BUFFER from also being bound to
    // GL_ARRAY_BUFFER -- clear the generic binding (setup left it on a state buffer) first.
    glBindBuffer(GL_ARRAY_BUFFER, 0u)
    glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0u, buf[nxt])
    glBeginTransformFeedback(GL_POINTS)
    glDrawArrays(GL_POINTS, 0, N_PARTICLES)
    glEndTransformFeedback()
    glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0u, 0u)
    glDisable(GL_RASTERIZER_DISCARD)

    // ===== Pass 2: render the freshly-written buffer as additive point sprites =====
    let cam_angle = t * 0.3
    let cam_r = 3.0
    let cam_pos = float3(cam_r * cos(cam_angle), sin(cam_angle * 2.0) * 0.5, cam_r * sin(cam_angle))
    u_view = look_at_rh(cam_pos, float3(0, 0, 0), float3(0, 1, 0))
    u_proj = perspective_rh_opengl(60.0 * PI / 180.0, aspect, 0.1, 30.0)

    glViewport(0, 0, display_w, display_h)
    glClearColor(0.02, 0.02, 0.05, 1.0)
    glClear(GL_COLOR_BUFFER_BIT)
    // additive glow: no depth test (every splat adds), premultiplied colour + ONE/ONE blend
    glDisable(GL_DEPTH_TEST)
    glEnable(GL_BLEND)
    glBlendFunc(uint(GL_ONE), uint(GL_ONE))
    glUseProgram(render_prog)
    render_vs_bind_uniform(render_prog)
    render_fs_bind_uniform(render_prog)
    glBindVertexArray(vao[nxt])
    glDrawArrays(GL_POINTS, 0, N_PARTICLES)
    glDisable(GL_BLEND)

    cur = nxt
    glfwPollEvents()
    glfwSwapBuffers(window)
    return glfwWindowShouldClose(window) == 0
}

[export]
def shutdown {
    glfwDestroyWindow(window)
    glfwTerminate()
}

// Desktop driver. On the web this is never called -- the run path drives the
// three lifecycle functions directly and persists the Context across frames.
[export]
def main {
    init()
    while (update()) {
    }
    shutdown()
}

A vertex shader as a compute step

sim_vs reads a_pos / a_vel / a_life (the particle state, as vertex attributes) and writes tf_pos / tf_vel / tf_life. Three rails turn that draw into a compute pass:

  • Transform-feedback varyings. Before the sim program is linked, glTransformFeedbackVaryings(prog, 3, ["tf_pos","tf_vel","tf_life"], GL_INTERLEAVED_ATTRIBS) registers those outputs for capture. The low-level create_shader / create_shader_program(vs, fs) / link_shader API exists precisely so this call can be inserted between attach and link – the one-shot create_shader_program(vsrc, fsrc) helper links immediately and can’t.

  • Rasterizer discard. glEnable(GL_RASTERIZER_DISCARD) around the sim draw means the vertices are processed and captured but nothing is drawn – a pure compute step.

  • Ping-pong. A buffer can’t be read and written in the same draw, so two state buffers alternate roles: frame N reads A and writes B, frame N+1 reads B and writes A.

One [vertex_buffer] struct Particle (pos, vel, life) defines the state layout – attribute offsets 0 / 12 / 24, stride 28. The transform-feedback capture order matches that interleave exactly, so the same VAO reads the captured bytes back as the next frame’s simulation input and as the render input.

A WebGL2 transform-feedback rule

WebGL2 forbids a buffer bound to GL_TRANSFORM_FEEDBACK_BUFFER from also being bound to GL_ARRAY_BUFFER. Buffer setup leaves the generic GL_ARRAY_BUFFER binding pointing at a state buffer, so the sim pass clears it (glBindBuffer(GL_ARRAY_BUFFER, 0)) before binding the feedback target – otherwise the capture draw raises GL_INVALID_OPERATION and silently writes nothing.

The point sprites

render_vs projects each particle and sets gl_PointSize (the vertex-stage builtin the rasteriser reads for GL_POINTS); render_fs uses gl_PointCoord – the [0,1]^2 UV inside the rasterised point – to carve a circular disc with a soft cubic falloff and discard() outside it, colours by speed (cool teal to hot magenta), and premultiplies by the glow so a GL_ONE/GL_ONE additive blend gives a pure additive glow. (This rung is the first to use discard() on the GL rail – it surfaced an emitter hole: discard() was emitted as an undefined _discard() call in GLSL versions below 400, including the ES 3.00 / 330 the tutorials use; dasGlsl now emits the void _discard() { discard; } bridge for every fragment shader.)

Run it

Locally, in a window:

daslang tutorials/opengl/07_particles/07_particles.das

In the browser, it runs live in the daslang playground – the same .das, lowered to WebGL2: a thousand particles seeded on a tilted ring, the swarm simulated entirely on the GPU by transform feedback and flowing under gravity as the camera orbits.